-
Notifications
You must be signed in to change notification settings - Fork 31.2k
Trigger corresponding pipeline tests if tests/utils/tiny_model_summary.json is modified
#27693
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
465f26c
5f6d1a1
f36cf78
8a68f60
077a690
511c81f
d7f4ec3
659f557
99e2f0d
90401cc
b140508
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,9 +51,11 @@ | |
|
|
||
| import argparse | ||
| import collections | ||
| import importlib.util | ||
| import json | ||
| import os | ||
| import re | ||
| import tempfile | ||
| from contextlib import contextmanager | ||
| from pathlib import Path | ||
| from typing import Dict, List, Optional, Tuple, Union | ||
|
|
@@ -254,6 +256,122 @@ def diff_contains_doc_examples(repo: Repo, branching_point: str, filename: str) | |
| return old_content_clean != new_content_clean | ||
|
|
||
|
|
||
| def get_impacted_files_from_tiny_model_summary(diff_with_last_commit: bool = False) -> List[str]: | ||
| """ | ||
| Return a list of python modeling files that are impacted by the changes of `tiny_model_summary.json` in between: | ||
|
|
||
| - the current head and the main branch if `diff_with_last_commit=False` (default) | ||
| - the current head and its parent commit otherwise. | ||
|
|
||
| Returns: | ||
| `List[str]`: The list of Python modeling files that are impacted by the changes of `tiny_model_summary.json`. | ||
| """ | ||
| repo = Repo(PATH_TO_REPO) | ||
|
|
||
| folder = Path(repo.working_dir) | ||
|
|
||
| if not diff_with_last_commit: | ||
| print(f"main is at {repo.refs.main.commit}") | ||
| print(f"Current head is at {repo.head.commit}") | ||
|
|
||
| commits = repo.merge_base(repo.refs.main, repo.head) | ||
| for commit in commits: | ||
| print(f"Branching commit: {commit}") | ||
| else: | ||
| print(f"main is at {repo.head.commit}") | ||
| commits = repo.head.commit.parents | ||
| for commit in commits: | ||
| print(f"Parent commit: {commit}") | ||
|
|
||
| if not os.path.isfile(folder / "tests/utils/tiny_model_summary.json"): | ||
| return [] | ||
|
|
||
| files = set() | ||
| for commit in commits: | ||
| with checkout_commit(repo, commit): | ||
| with open(folder / "tests/utils/tiny_model_summary.json", "r", encoding="utf-8") as f: | ||
| old_content = f.read() | ||
|
|
||
| with open(folder / "tests/utils/tiny_model_summary.json", "r", encoding="utf-8") as f: | ||
| new_content = f.read() | ||
|
|
||
| # get the content as json object | ||
| old_content = json.loads(old_content) | ||
| new_content = json.loads(new_content) | ||
|
|
||
| old_keys = set(old_content.keys()) | ||
| new_keys = set(new_content.keys()) | ||
|
Comment on lines
+302
to
+303
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keys are always a set
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. a = {1: 2, 2: 3}
b = {2: 3, 3: 4}
a.keys()
b.keys()
c1 = set(a.keys()).intersection(set(b.keys()))
print(c1)
c2 = a.keys().intersection(b.keys())
print(c2)gives
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright! Thanks |
||
|
|
||
| # get the difference | ||
| keys_with_diff = old_keys.symmetric_difference(new_keys) | ||
| common_keys = old_keys.intersection(new_keys) | ||
| # if both have the same key, check its content | ||
| for key in common_keys: | ||
| if old_content[key] != new_content[key]: | ||
| keys_with_diff.add(key) | ||
|
|
||
| # get the model classes | ||
| impacted_model_classes = [] | ||
| for key in keys_with_diff: | ||
| if key in new_keys: | ||
| impacted_model_classes.extend(new_content[key]["model_classes"]) | ||
|
|
||
| # get the module where the model classes are defined. We want to use the main `__init__` file, but it requires | ||
| # all the framework being installed, which is not ideal for a simple script like test fetcher. | ||
| # So we create a temporary and modified main `__init__` and access its `_import_structure`. | ||
|
Comment on lines
+319
to
+321
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use inspect on the class? The transformers packages should be installed and >>> import inspect
>>> from transformers import *
>>> inspect.getfile(globals()["CLIPConfig"])
'/Users/arthurzucker/Work/transformers/src/transformers/models/clip/configuration_clip.py'we can split on
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mmm but it's more resilient less error prone. Agreed tho that we would need every tf flax and torch dependency which is not optimal.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree too. I don't think the community would run test fetcher. So I am fine if we decide to use
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understood correctly we need dependencies anyway on the machine that will run the tests no? So what we can do is:
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you confirm that it's ok to install
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently , test fetcher run in ~ 1m30. Loading cache of the whole
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fetching the test should be fast, so would just do this job last. I thought we had a different machine running the pipeline test and would say let's forward getting the files to update over there if there are any changes to the tiny model!
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If the commit(s) in a PR only modifies the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the offline discussion. Yes, your suggestion is a good possibility (other than the fact the file As mentioned, I proposed to merge the current version, and I will work in a follow up PR to implement your suggested idea. So at the end, if there is any unexpected undesired issue, we can at least have a working version. |
||
| with open(folder / "src/transformers/__init__.py") as fp: | ||
| lines = fp.readlines() | ||
| new_lines = [] | ||
| # Get all the code related to `_import_structure` | ||
| for line in lines: | ||
| if line == "_import_structure = {\n": | ||
| new_lines.append(line) | ||
| elif line == "# Direct imports for type-checking\n": | ||
| break | ||
| elif len(new_lines) > 0: | ||
| # bypass the framework check so we can get all the information even if frameworks are not available | ||
| line = re.sub(r"is_.+_available\(\)", "True", line) | ||
| line = line.replace("OptionalDependencyNotAvailable", "Exception") | ||
| line = line.replace("Exception()", "Exception") | ||
| new_lines.append(line) | ||
|
|
||
| # create and load the temporary module | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| with open(os.path.join(tmpdirname, "temp_init.py"), "w") as fp: | ||
| fp.write("".join(new_lines)) | ||
|
|
||
| spec = importlib.util.spec_from_file_location("temp_init", os.path.join(tmpdirname, "temp_init.py")) | ||
| module = importlib.util.module_from_spec(spec) | ||
| spec.loader.exec_module(module) | ||
| # Finally, get `_import_structure` that we need | ||
| import_structure = module._import_structure | ||
|
|
||
| # map model classes to their defined module | ||
| reversed_structure = {} | ||
| for key, values in import_structure.items(): | ||
| for value in values: | ||
| reversed_structure[value] = key | ||
|
|
||
| # Get the corresponding modeling file path | ||
| for model_class in impacted_model_classes: | ||
| module = reversed_structure[model_class] | ||
| framework = "" | ||
| if model_class.startswith("TF"): | ||
| framework = "tf" | ||
| elif model_class.startswith("Flax"): | ||
| framework = "flax" | ||
| fn = ( | ||
| f"modeling_{module.split('.')[-1]}.py" | ||
| if framework == "" | ||
| else f"modeling_{framework}_{module.split('.')[-1]}.py" | ||
| ) | ||
| files.add( | ||
| f"src.transformers.{module}.{fn}".replace(".", os.path.sep).replace(f"{os.path.sep}py", ".py") | ||
| ) | ||
|
|
||
| return sorted(files) | ||
|
|
||
|
|
||
| def get_diff(repo: Repo, base_commit: str, commits: List[str]) -> List[str]: | ||
| """ | ||
| Get the diff between a base commit and one or several commits. | ||
|
|
@@ -949,18 +1067,16 @@ def infer_tests_to_run( | |
| if any(x in modified_files for x in ["setup.py", ".circleci/create_circleci_config.py"]): | ||
| test_files_to_run = ["tests", "examples"] | ||
| repo_utils_launch = True | ||
| # in order to trigger pipeline tests even if no code change at all | ||
| elif "tests/utils/tiny_model_summary.json" in modified_files: | ||
| test_files_to_run = ["tests"] | ||
| repo_utils_launch = any(f.split(os.path.sep)[0] == "utils" for f in modified_files) | ||
|
Comment on lines
-952
to
-955
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not working as My bad, sorry |
||
| else: | ||
| # All modified tests need to be run. | ||
| test_files_to_run = [ | ||
| f for f in modified_files if f.startswith("tests") and f.split(os.path.sep)[-1].startswith("test") | ||
| ] | ||
| impacted_files = get_impacted_files_from_tiny_model_summary(diff_with_last_commit=diff_with_last_commit) | ||
|
|
||
| # Then we grab the corresponding test files. | ||
| test_map = create_module_to_test_map(reverse_map=reverse_map, filter_models=filter_models) | ||
| for f in modified_files: | ||
| for f in modified_files + impacted_files: | ||
| if f in test_map: | ||
| test_files_to_run.extend(test_map[f]) | ||
| test_files_to_run = sorted(set(test_files_to_run)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missed in the hotfix #27687