Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 123 additions & 4 deletions .github/actions/create_workflow_report/create_workflow_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,10 @@ def get_new_fails_this_pr(

# Combine both types of fails and select only desired columns
desired_columns = ["job_name", "test_name", "test_status", "results_link"]
all_pr_fails = pd.concat([checks_fails, regression_fails], ignore_index=True)[
desired_columns
]
all_pr_fails = pd.concat(
[df for df in [checks_fails, regression_fails] if len(df) > 0],
ignore_index=True,
)[desired_columns]
if len(all_pr_fails) == 0:
return pd.DataFrame()

Expand Down Expand Up @@ -353,7 +354,9 @@ def get_new_fails_this_pr(
base_regression = base_regression.drop(columns=["arch", "status"])

# Combine base results
base_results = pd.concat([base_checks, base_regression], ignore_index=True)
base_results = pd.concat(
[df for df in [base_checks, base_regression] if len(df) > 0], ignore_index=True
)

# Find tests that failed in PR but passed in base
pr_failed_tests = set(zip(all_pr_fails["job_name"], all_pr_fails["test_name"]))
Expand All @@ -370,6 +373,105 @@ def get_new_fails_this_pr(
return new_fails_df


def get_test_instability_scores(client: Client, branch_name: str):
"""
Calculate test instability metrics.
"""
query = f"""WITH test_results AS (
SELECT check_start_time, head_ref, base_ref, test_name, test_status,
replaceRegexpOne(check_name, ', [1-9/]*\\)$', '') as job_name_base -- remove group number
FROM `gh-data`.checks
WHERE (head_ref = '{branch_name}' OR base_ref = '{branch_name}')
AND test_status IN ('OK', 'FAIL', 'BROKEN')
AND check_start_time > now() - INTERVAL 4 WEEK
ORDER BY check_start_time
),
test_sequences AS (
SELECT job_name_base, test_name, head_ref, base_ref, groupArray(if(test_status = 'OK', 'pass', 'fail')) AS status_array
FROM test_results
GROUP BY head_ref, base_ref, job_name_base, test_name
),
flip_results AS (
SELECT job_name_base, test_name, base_ref, head_ref,
length(status_array) AS total_runs,
arraySum(
arrayMap(
i -> status_array[i - 1] = 'pass' AND status_array[i] = 'fail' ? 1 : 0,
arraySlice(arrayEnumerate(status_array), 2)
)
) AS num_pass_to_fail
FROM test_sequences
WHERE total_runs >= 3 -- ensure decent sample size for this check-test-branch combination
)
SELECT
job_name_base,
test_name,
if(base_ref = '', head_ref, base_ref) as version,
sum(total_runs) as runs,
sum(num_pass_to_fail) as sudden_fails,
round(2 * sudden_fails / runs, 2) as instability
FROM flip_results
GROUP BY job_name_base, test_name, version
ORDER BY instability DESC
"""

try:
df = client.query_dataframe(query)[
["job_name_base", "test_name", "instability"]
]

except Exception as e:
print(f"Error getting test instability scores: {e}")
return pd.DataFrame()

# Set the index to make it compatible with pandas join operations
if len(df) > 0:
df = df.set_index(["job_name_base", "test_name"])

return df


def join_instability_scores(
df: pd.DataFrame, instability_scores: pd.DataFrame
) -> pd.DataFrame:
"""
Join instability scores to a DataFrame based on job_name_base and test_name.
Uses fuzzy matching on job_name (ignoring group numbers).

Args:
df: DataFrame to join with
instability_scores: DataFrame with instability scores (must have MultiIndex on job_name_base, test_name)

Returns:
DataFrame with instability scores joined
"""
if len(df) == 0 or len(instability_scores) == 0:
return df

# Create a copy to avoid modifying the original
df_copy = df.copy()

# Add a base job name column for joining (strip group numbers)
df_copy["job_name_base"] = df_copy["job_name"].str.replace(
r", [1-9/]*\)$", "", regex=True
)

# Join on the base job name and test name
try:
result = df_copy.join(
instability_scores,
on=["job_name_base", "test_name"],
how="left",
)
except Exception as e:
print(f"Error joining instability scores: {e}")
return df

# Remove the temporary column and fill missing values
result = result.drop(columns=["job_name_base"]).fillna({"instability": 0})
return result


@lru_cache
def get_workflow_config() -> dict:
workflow_config_files = glob("./ci/tmp/workflow_config*.json")
Expand Down Expand Up @@ -711,9 +813,11 @@ def create_workflow_report(

if pr_number == 0:
pr_info_html = f"Release ({branch_name})"
base_branch = branch_name
else:
try:
pr_info = get_pr_info_from_number(pr_number)
base_branch = pr_info.get("base", {}).get("ref")
pr_info_html = f"""<a href="https://github.com/{GITHUB_REPO}/pull/{pr_info["number"]}">
#{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")}
</a>"""
Expand All @@ -730,6 +834,21 @@ def create_workflow_report(
fail_results["job_statuses"], pr_number, branch_name, commit_sha
)

checks_instability_scores = get_test_instability_scores(db_client, base_branch)
if len(checks_instability_scores) > 0:
for table in [
"checks_fails",
"checks_errors",
"regression_fails",
"checks_known_fails",
"pr_new_fails",
]:
if table not in fail_results:
continue
fail_results[table] = join_instability_scores(
fail_results[table], checks_instability_scores
)

high_cve_count = 0
if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0:
high_cve_count = (
Expand Down
Loading