22"""
33Check the output of running Sphinx in nit-picky mode (missing references).
44"""
5+ from __future__ import annotations
6+
57import argparse
6- import csv
8+ import itertools
79import os
810import re
11+ import subprocess
912import sys
1013from pathlib import Path
14+ from typing import TextIO
1115
1216# Exclude these whether they're dirty or clean,
1317# because they trigger a rebuild of dirty files.
2428 "venv" ,
2529}
2630
27- PATTERN = re .compile (r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)" )
31+ # Regex pattern to match the parts of a Sphinx warning
32+ WARNING_PATTERN = re .compile (
33+ r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
34+ )
35+
36+ # Regex pattern to match the line numbers in a Git unified diff
37+ DIFF_PATTERN = re .compile (
38+ r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@" ,
39+ flags = re .MULTILINE ,
40+ )
41+
42+
43+ def get_diff_files (ref_a : str , ref_b : str , filter_mode : str = "" ) -> set [Path ]:
44+ """List the files changed between two Git refs, filtered by change type."""
45+ added_files_result = subprocess .run (
46+ [
47+ "git" ,
48+ "diff" ,
49+ f"--diff-filter={ filter_mode } " ,
50+ "--name-only" ,
51+ f"{ ref_a } ...{ ref_b } " ,
52+ "--" ,
53+ ],
54+ stdout = subprocess .PIPE ,
55+ check = True ,
56+ text = True ,
57+ encoding = "UTF-8" ,
58+ )
59+
60+ added_files = added_files_result .stdout .strip ().split ("\n " )
61+ return {Path (file .strip ()) for file in added_files if file .strip ()}
62+
63+
64+ def get_diff_lines (ref_a : str , ref_b : str , file : Path ) -> list [int ]:
65+ """List the lines changed between two Git refs for a specific file."""
66+ diff_output = subprocess .run (
67+ [
68+ "git" ,
69+ "diff" ,
70+ "--unified=0" ,
71+ f"{ ref_a } ...{ ref_b } " ,
72+ "--" ,
73+ str (file ),
74+ ],
75+ stdout = subprocess .PIPE ,
76+ check = True ,
77+ text = True ,
78+ encoding = "UTF-8" ,
79+ )
80+
81+ # Scrape line offsets + lengths from diff and convert to line numbers
82+ line_matches = DIFF_PATTERN .finditer (diff_output .stdout )
83+ # Removed and added line counts are 1 if not printed
84+ line_match_values = [
85+ line_match .groupdict (default = 1 ) for line_match in line_matches
86+ ]
87+ line_ints = [
88+ (int (match_value ["lineb" ]), int (match_value ["added" ]))
89+ for match_value in line_match_values
90+ ]
91+ line_ranges = [
92+ range (line_b , line_b + added ) for line_b , added in line_ints
93+ ]
94+ line_numbers = list (itertools .chain (* line_ranges ))
95+
96+ return line_numbers
97+
98+
99+ def get_para_line_numbers (file_obj : TextIO ) -> list [list [int ]]:
100+ """Get the line numbers of text in a file object, grouped by paragraph."""
101+ paragraphs = []
102+ prev_line = None
103+ for lineno , line in enumerate (file_obj ):
104+ lineno = lineno + 1
105+ if prev_line is None or (line .strip () and not prev_line .strip ()):
106+ paragraph = [lineno - 1 ]
107+ paragraphs .append (paragraph )
108+ paragraph .append (lineno )
109+ prev_line = line
110+ return paragraphs
111+
112+
113+ def filter_and_parse_warnings (
114+ warnings : list [str ], files : set [Path ]
115+ ) -> list [re .Match [str ]]:
116+ """Get the warnings matching passed files and parse them with regex."""
117+ filtered_warnings = [
118+ warning
119+ for warning in warnings
120+ if any (str (file ) in warning for file in files )
121+ ]
122+ warning_matches = [
123+ WARNING_PATTERN .fullmatch (warning .strip ())
124+ for warning in filtered_warnings
125+ ]
126+ non_null_matches = [warning for warning in warning_matches if warning ]
127+ return non_null_matches
128+
129+
130+ def filter_warnings_by_diff (
131+ warnings : list [re .Match [str ]], ref_a : str , ref_b : str , file : Path
132+ ) -> list [re .Match [str ]]:
133+ """Filter the passed per-file warnings to just those on changed lines."""
134+ diff_lines = get_diff_lines (ref_a , ref_b , file )
135+ with file .open (encoding = "UTF-8" ) as file_obj :
136+ paragraphs = get_para_line_numbers (file_obj )
137+ touched_paras = [
138+ para_lines
139+ for para_lines in paragraphs
140+ if set (diff_lines ) & set (para_lines )
141+ ]
142+ touched_para_lines = set (itertools .chain (* touched_paras ))
143+ warnings_infile = [
144+ warning for warning in warnings if str (file ) in warning ["file" ]
145+ ]
146+ warnings_touched = [
147+ warning
148+ for warning in warnings_infile
149+ if int (warning ["line" ]) in touched_para_lines
150+ ]
151+ return warnings_touched
152+
28153
154+ def process_touched_warnings (
155+ warnings : list [str ], ref_a : str , ref_b : str
156+ ) -> list [re .Match [str ]]:
157+ """Filter a list of Sphinx warnings to those affecting touched lines."""
158+ added_files , modified_files = tuple (
159+ get_diff_files (ref_a , ref_b , filter_mode = mode ) for mode in ("A" , "M" )
160+ )
161+
162+ warnings_added = filter_and_parse_warnings (warnings , added_files )
163+ warnings_modified = filter_and_parse_warnings (warnings , modified_files )
164+
165+ modified_files_warned = {
166+ file
167+ for file in modified_files
168+ if any (str (file ) in warning ["file" ] for warning in warnings_modified )
169+ }
29170
30- def check_and_annotate (warnings : list [str ], files_to_check : str ) -> None :
171+ warnings_modified_touched = [
172+ filter_warnings_by_diff (warnings_modified , ref_a , ref_b , file )
173+ for file in modified_files_warned
174+ ]
175+ warnings_touched = warnings_added + list (
176+ itertools .chain (* warnings_modified_touched )
177+ )
178+
179+ return warnings_touched
180+
181+
182+ def annotate_diff (
183+ warnings : list [str ], ref_a : str = "main" , ref_b : str = "HEAD"
184+ ) -> None :
31185 """
32- Convert Sphinx warning messages to GitHub Actions.
186+ Convert Sphinx warning messages to GitHub Actions for changed paragraphs .
33187
34188 Converts lines like:
35189 .../Doc/library/cgi.rst:98: WARNING: reference target not found
36190 to:
37191 ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38192
39- Non-matching lines are echoed unchanged.
40-
41- see:
193+ See:
42194 https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43195 """
44- files_to_check = next (csv .reader ([files_to_check ]))
45- for warning in warnings :
46- if any (filename in warning for filename in files_to_check ):
47- if match := PATTERN .fullmatch (warning ):
48- print ("::warning file={file},line={line}::{msg}" .format_map (match ))
196+ warnings_touched = process_touched_warnings (warnings , ref_a , ref_b )
197+ print ("Emitting doc warnings matching modified lines:" )
198+ for warning in warnings_touched :
199+ print ("::warning file={file},line={line}::{msg}" .format_map (warning ))
200+ print (warning [0 ])
201+ if not warnings_touched :
202+ print ("None" )
49203
50204
51205def fail_if_regression (
@@ -68,7 +222,7 @@ def fail_if_regression(
68222 print (filename )
69223 for warning in warnings :
70224 if filename in warning :
71- if match := PATTERN .fullmatch (warning ):
225+ if match := WARNING_PATTERN .fullmatch (warning ):
72226 print (" {line}: {msg}" .format_map (match ))
73227 return - 1
74228 return 0
@@ -91,12 +245,14 @@ def fail_if_improved(
91245 return 0
92246
93247
94- def main () -> int :
248+ def main (argv : list [ str ] | None = None ) -> int :
95249 parser = argparse .ArgumentParser ()
96250 parser .add_argument (
97- "--check-and-annotate" ,
98- help = "Comma-separated list of files to check, "
99- "and annotate those with warnings on GitHub Actions" ,
251+ "--annotate-diff" ,
252+ nargs = "*" ,
253+ metavar = ("BASE_REF" , "HEAD_REF" ),
254+ help = "Add GitHub Actions annotations on the diff for warnings on "
255+ "lines changed between the given refs (main and HEAD, by default)" ,
100256 )
101257 parser .add_argument (
102258 "--fail-if-regression" ,
@@ -108,13 +264,19 @@ def main() -> int:
108264 action = "store_true" ,
109265 help = "Fail if new files with no nits are found" ,
110266 )
111- args = parser .parse_args ()
267+
268+ args = parser .parse_args (argv )
269+ if args .annotate_diff is not None and len (args .annotate_diff ) > 2 :
270+ parser .error (
271+ "--annotate-diff takes between 0 and 2 ref args, not "
272+ f"{ len (args .annotate_diff )} { tuple (args .annotate_diff )} "
273+ )
112274 exit_code = 0
113275
114276 wrong_directory_msg = "Must run this script from the repo root"
115277 assert Path ("Doc" ).exists () and Path ("Doc" ).is_dir (), wrong_directory_msg
116278
117- with Path ("Doc/sphinx-warnings.txt" ).open () as f :
279+ with Path ("Doc/sphinx-warnings.txt" ).open (encoding = "UTF-8" ) as f :
118280 warnings = f .read ().splitlines ()
119281
120282 cwd = str (Path .cwd ()) + os .path .sep
@@ -124,15 +286,15 @@ def main() -> int:
124286 if "Doc/" in warning
125287 }
126288
127- with Path ("Doc/tools/.nitignore" ).open () as clean_files :
289+ with Path ("Doc/tools/.nitignore" ).open (encoding = "UTF-8" ) as clean_files :
128290 files_with_expected_nits = {
129291 filename .strip ()
130292 for filename in clean_files
131293 if filename .strip () and not filename .startswith ("#" )
132294 }
133295
134- if args .check_and_annotate :
135- check_and_annotate (warnings , args .check_and_annotate )
296+ if args .annotate_diff is not None :
297+ annotate_diff (warnings , * args .annotate_diff )
136298
137299 if args .fail_if_regression :
138300 exit_code += fail_if_regression (
0 commit comments