diff --git a/src/sentry/grouping/api.py b/src/sentry/grouping/api.py index 061762b5a7e9d3..2cc8e1f5fd2c8e 100644 --- a/src/sentry/grouping/api.py +++ b/src/sentry/grouping/api.py @@ -37,6 +37,7 @@ HashedChecksumVariant, SaltedComponentVariant, ) +from sentry.issues.auto_source_code_config.constants import DERIVED_ENHANCEMENTS_OPTION_KEY from sentry.models.grouphash import GroupHash if TYPE_CHECKING: @@ -87,6 +88,7 @@ def get_config_dict(self, project: Project) -> GroupingConfig: } def _get_enhancements(self, project: Project) -> str: + derived_enhancements = project.get_option(DERIVED_ENHANCEMENTS_OPTION_KEY) project_enhancements = project.get_option("sentry:grouping_enhancements") config_id = self._get_config_id(project) @@ -100,15 +102,27 @@ def _get_enhancements(self, project: Project) -> str: cache_prefix = self.cache_prefix cache_prefix += f"{LATEST_VERSION}:" cache_key = ( - cache_prefix + md5_text(f"{enhancements_base}|{project_enhancements}").hexdigest() + cache_prefix + + md5_text( + f"{enhancements_base}|{derived_enhancements}|{project_enhancements}" + ).hexdigest() ) enhancements = cache.get(cache_key) if enhancements is not None: return enhancements try: + # Automatic enhancements are always applied first, so they can be overridden by + # project-specific enhancements. + enhancements_string = project_enhancements or "" + if derived_enhancements: + enhancements_string = ( + f"{derived_enhancements}\n{enhancements_string}" + if enhancements_string + else derived_enhancements + ) enhancements = Enhancements.from_config_string( - project_enhancements, bases=[enhancements_base] if enhancements_base else [] + enhancements_string, bases=[enhancements_base] if enhancements_base else [] ).base64_string except InvalidEnhancerConfig: enhancements = get_default_enhancements() @@ -441,7 +455,7 @@ def get_grouping_variants_for_event( def get_contributing_variant_and_component( - variants: dict[str, BaseVariant] + variants: dict[str, BaseVariant], ) -> tuple[BaseVariant, ContributingComponent | None]: if len(variants) == 1: contributing_variant = list(variants.values())[0] diff --git a/src/sentry/issues/auto_source_code_config/code_mapping.py b/src/sentry/issues/auto_source_code_config/code_mapping.py index 564e0c73bc07a2..450c0a7e641d7b 100644 --- a/src/sentry/issues/auto_source_code_config/code_mapping.py +++ b/src/sentry/issues/auto_source_code_config/code_mapping.py @@ -540,8 +540,8 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]: elif source_path.endswith(stack_path): # "Packaged" logic source_prefix = source_path.rpartition(stack_path)[0] return ( - f"{stack_root}{frame_filename.stack_root}/", - f"{source_prefix}{frame_filename.stack_root}/", + f"{stack_root}{frame_filename.stack_root}/".replace("//", "/"), + f"{source_prefix}{frame_filename.stack_root}/".replace("//", "/"), ) elif stack_path.endswith(source_path): stack_prefix = stack_path.rpartition(source_path)[0] @@ -593,9 +593,17 @@ def get_path_from_module(module: str, abs_path: str) -> tuple[str, str]: if "." not in module: raise DoesNotFollowJavaPackageNamingConvention - # If module has a dot, take everything before the last dot - # com.example.foo.Bar$InnerClass -> com/example/foo - stack_root = module.rsplit(".", 1)[0].replace(".", "/") - file_path = f"{stack_root}/{abs_path}" + parts = module.split(".") + + if len(parts) > 2: + # com.example.foo.bar.Baz$InnerClass, Baz.kt -> + # stack_root: com/example/ + # file_path: com/example/foo/bar/Baz.kt + stack_root = "/".join(parts[:2]) + file_path = "/".join(parts[:-1]) + "/" + abs_path + else: + # a.Bar, Bar.kt -> stack_root: a/, file_path: a/Bar.kt + stack_root = parts[0] + "/" + file_path = f"{stack_root}{abs_path}" return stack_root, file_path diff --git a/src/sentry/issues/auto_source_code_config/constants.py b/src/sentry/issues/auto_source_code_config/constants.py index 5c5dcb8182c9b2..594fe82c2c55a9 100644 --- a/src/sentry/issues/auto_source_code_config/constants.py +++ b/src/sentry/issues/auto_source_code_config/constants.py @@ -4,6 +4,7 @@ from typing import Any METRIC_PREFIX = "auto_source_code_config" +DERIVED_ENHANCEMENTS_OPTION_KEY = "sentry:derived_grouping_enhancements" SUPPORTED_INTEGRATIONS = ["github"] # Any new languages should also require updating the stacktraceLink.tsx diff --git a/src/sentry/issues/auto_source_code_config/in_app_stack_trace_rules.py b/src/sentry/issues/auto_source_code_config/in_app_stack_trace_rules.py new file mode 100644 index 00000000000000..3bda0a52476a3e --- /dev/null +++ b/src/sentry/issues/auto_source_code_config/in_app_stack_trace_rules.py @@ -0,0 +1,64 @@ +import logging +from collections.abc import Sequence + +from sentry.issues.auto_source_code_config.code_mapping import CodeMapping +from sentry.issues.auto_source_code_config.utils import PlatformConfig +from sentry.models.project import Project +from sentry.utils import metrics + +from .constants import DERIVED_ENHANCEMENTS_OPTION_KEY, METRIC_PREFIX + +logger = logging.getLogger(__name__) + + +def save_in_app_stack_trace_rules( + project: Project, code_mappings: Sequence[CodeMapping], platform_config: PlatformConfig +) -> list[str]: + """Save in app stack trace rules for a given project""" + rules_from_code_mappings = set() + for code_mapping in code_mappings: + try: + rules_from_code_mappings.add(generate_rule_for_code_mapping(code_mapping)) + except ValueError: + pass + + current_enhancements = project.get_option(DERIVED_ENHANCEMENTS_OPTION_KEY) + current_rules = set(current_enhancements.split("\n")) if current_enhancements else set() + + united_rules = rules_from_code_mappings.union(current_rules) + if not platform_config.is_dry_run_platform() and united_rules != current_rules: + project.update_option(DERIVED_ENHANCEMENTS_OPTION_KEY, "\n".join(sorted(united_rules))) + + new_rules_added = united_rules - current_rules + metrics.incr( + key=f"{METRIC_PREFIX}.in_app_stack_trace_rules.created", + amount=len(new_rules_added), + tags={ + "platform": platform_config.platform, + "dry_run": platform_config.is_dry_run_platform(), + }, + sample_rate=1.0, + ) + return list(new_rules_added) + + +# XXX: This is very Java specific. If we want to support other languages, we need to +# come up with a better way to generate the rule. +def generate_rule_for_code_mapping(code_mapping: CodeMapping) -> str: + """Generate an in-app rule for a given code mapping""" + stacktrace_root = code_mapping.stacktrace_root + if stacktrace_root == "": + raise ValueError("Stacktrace root is empty") + + parts = stacktrace_root.rstrip("/").split("/", 2) + # We only want the first two parts + module = ".".join(parts[:2]) + + if module == "": + raise ValueError("Module is empty") + + # a/ -> a.** + # x/y/ -> x.y.** + # com/example/foo/bar/ -> com.example.** + # uk/co/example/foo/bar/ -> uk.co.** + return f"stack.module:{module}.** +app" diff --git a/src/sentry/issues/auto_source_code_config/task.py b/src/sentry/issues/auto_source_code_config/task.py index f8c842d982c383..dbb3c5851c8926 100644 --- a/src/sentry/issues/auto_source_code_config/task.py +++ b/src/sentry/issues/auto_source_code_config/task.py @@ -25,6 +25,7 @@ from sentry.utils.locking import UnableToAcquireLock from .constants import METRIC_PREFIX +from .in_app_stack_trace_rules import save_in_app_stack_trace_rules from .integration_utils import ( InstallationCannotGetTreesError, InstallationNotFoundError, @@ -192,8 +193,9 @@ def create_configurations( in_app_stack_trace_rules: list[str] = [] if platform_config.creates_in_app_stack_trace_rules(): - # XXX: This will be changed on the next PR - pass + in_app_stack_trace_rules = save_in_app_stack_trace_rules( + project, code_mappings, platform_config + ) # We return this to allow tests running in dry-run mode to assert # what would have been created. diff --git a/src/sentry/models/options/project_option.py b/src/sentry/models/options/project_option.py index d4e855711daafe..ea570a63d029f0 100644 --- a/src/sentry/models/options/project_option.py +++ b/src/sentry/models/options/project_option.py @@ -53,6 +53,7 @@ "sentry:grouping_enhancements_base", "sentry:secondary_grouping_config", "sentry:secondary_grouping_expiry", + "sentry:derived_grouping_enhancements", "sentry:similarity_backfill_completed", "sentry:fingerprinting_rules", "sentry:relay_pii_config", diff --git a/src/sentry/projectoptions/defaults.py b/src/sentry/projectoptions/defaults.py index caebaea21323dd..65adfea73b4d33 100644 --- a/src/sentry/projectoptions/defaults.py +++ b/src/sentry/projectoptions/defaults.py @@ -30,6 +30,7 @@ ) register(key="sentry:grouping_enhancements", default="") +register(key="sentry:derived_grouping_enhancements", default="") # server side fingerprinting defaults. register(key="sentry:fingerprinting_rules", default="") diff --git a/tests/sentry/issues/auto_source_code_config/test_process_event.py b/tests/sentry/issues/auto_source_code_config/test_process_event.py index 2d816cb9edf446..630e414e9f3144 100644 --- a/tests/sentry/issues/auto_source_code_config/test_process_event.py +++ b/tests/sentry/issues/auto_source_code_config/test_process_event.py @@ -5,7 +5,10 @@ from sentry.eventstore.models import GroupEvent from sentry.integrations.models.organization_integration import OrganizationIntegration from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig -from sentry.issues.auto_source_code_config.constants import METRIC_PREFIX +from sentry.issues.auto_source_code_config.constants import ( + DERIVED_ENHANCEMENTS_OPTION_KEY, + METRIC_PREFIX, +) from sentry.issues.auto_source_code_config.integration_utils import InstallationNotFoundError from sentry.issues.auto_source_code_config.task import DeriveCodeMappingsErrorReason, process_event from sentry.issues.auto_source_code_config.utils import PlatformConfig @@ -84,7 +87,7 @@ def _process_and_assert_configuration_changes( platform: str, expected_new_code_mappings: Sequence[ExpectedCodeMapping] | None = None, expected_num_code_mappings: int = 1, - expected_in_app_stack_trace_rules: list[str] | None = None, + expected_new_in_app_stack_trace_rules: list[str] | None = None, ) -> GroupEvent: platform_config = PlatformConfig(platform) dry_run = platform_config.is_dry_run_platform() @@ -98,6 +101,7 @@ def _process_and_assert_configuration_changes( ), patch("sentry.utils.metrics.incr") as mock_incr, ): + starting_enhancements = self.project.get_option(DERIVED_ENHANCEMENTS_OPTION_KEY) starting_repositories_count = Repository.objects.all().count() starting_code_mappings_count = RepositoryProjectPathConfig.objects.all().count() event = self.create_event(frames, platform) @@ -106,11 +110,13 @@ def _process_and_assert_configuration_changes( ) code_mappings = RepositoryProjectPathConfig.objects.all() + current_enhancements = self.project.get_option(DERIVED_ENHANCEMENTS_OPTION_KEY) if dry_run: # If dry run, no configurations should have been created assert starting_code_mappings_count == code_mappings.count() assert starting_repositories_count == Repository.objects.all().count() + assert current_enhancements == starting_enhancements if expected_new_code_mappings: assert len(dry_run_code_mappings) == len(expected_new_code_mappings) @@ -125,8 +131,21 @@ def _process_and_assert_configuration_changes( mock_incr.assert_any_call( key=f"{METRIC_PREFIX}.code_mapping.created", tags=tags, sample_rate=1.0 ) + + if expected_new_in_app_stack_trace_rules: + assert sorted(in_app_stack_trace_rules) == sorted( + expected_new_in_app_stack_trace_rules + ) + assert ( + "\n".join(expected_new_in_app_stack_trace_rules) not in current_enhancements + ) + mock_incr.assert_any_call( + key=f"{METRIC_PREFIX}.in_app_stack_trace_rules.created", + amount=len(expected_new_in_app_stack_trace_rules), + tags=tags, + sample_rate=1.0, + ) else: - assert code_mappings.count() == expected_num_code_mappings if expected_new_code_mappings: assert code_mappings.count() == starting_code_mappings_count + len( expected_new_code_mappings @@ -149,9 +168,19 @@ def _process_and_assert_configuration_changes( key=f"{METRIC_PREFIX}.code_mapping.created", tags=tags, sample_rate=1.0 ) - if expected_in_app_stack_trace_rules is not None: - # XXX: Grab it from the option - assert expected_in_app_stack_trace_rules == in_app_stack_trace_rules + if expected_new_in_app_stack_trace_rules: + expected_enhancements = "\n".join(expected_new_in_app_stack_trace_rules) + assert current_enhancements == ( + f"{starting_enhancements}\n{expected_enhancements}" + if starting_enhancements + else expected_enhancements + ) + mock_incr.assert_any_call( + key=f"{METRIC_PREFIX}.in_app_stack_trace_rules.created", + amount=len(expected_new_in_app_stack_trace_rules), + tags=tags, + sample_rate=1.0, + ) # Returning these to inspect the results return event @@ -581,13 +610,32 @@ def test_auto_source_code_config_no_normalization(self) -> None: class TestJavaDeriveCodeMappings(LanguageSpecificDeriveCodeMappings): platform = "java" - def test_very_short_module_name(self) -> None: + def test_short_packages(self) -> None: # No code mapping will be stored, however, we get what would have been created self._process_and_assert_configuration_changes( - repo_trees={REPO1: ["src/a/Foo.java"]}, - frames=[self.frame(module="a.Foo", abs_path="Foo.java")], + repo_trees={ + REPO1: [ + "src/Foo.java", + "src/a/Bar.java", + "src/x/y/Baz.java", + ] + }, + frames=[ + # This will not create a code mapping because + # the stacktrace root would be empty + self.frame(module="Foo", abs_path="Foo.java", in_app=False), + self.frame(module="a.Bar", abs_path="Bar.java", in_app=False), + self.frame(module="x.y.Baz", abs_path="Baz.java", in_app=False), + ], platform=self.platform, - expected_new_code_mappings=[self.code_mapping("a/", "src/a/")], + expected_new_code_mappings=[ + self.code_mapping("a/", "src/a/"), + self.code_mapping("x/y/", "src/x/y/"), + ], + expected_new_in_app_stack_trace_rules=[ + "stack.module:a.** +app", + "stack.module:x.y.** +app", + ], expected_num_code_mappings=0, ) @@ -595,10 +643,161 @@ def test_handles_dollar_sign_in_module(self) -> None: # No code mapping will be stored, however, we get what would have been created self._process_and_assert_configuration_changes( repo_trees={REPO1: ["src/com/example/foo/Bar.kt"]}, - frames=[self.frame(module="com.example.foo.Bar$InnerClass", abs_path="Bar.kt")], + frames=[ + self.frame(module="com.example.foo.Bar$InnerClass", abs_path="Bar.kt", in_app=False) + ], + platform=self.platform, + expected_new_code_mappings=[self.code_mapping("com/example/", "src/com/example/")], + expected_new_in_app_stack_trace_rules=["stack.module:com.example.** +app"], + expected_num_code_mappings=0, + ) + + def test_multiple_configuration_changes(self) -> None: + # Test case with multiple frames from different packages + self._process_and_assert_configuration_changes( + repo_trees={ + REPO1: [ + "src/com/example/foo/bar/Baz.kt", + "src/com/example/utils/Helper.kt", + "src/org/other/service/Service.kt", + ] + }, + frames=[ + self.frame(module="com.example.foo.bar.Baz", abs_path="Baz.kt", in_app=False), + self.frame(module="com.example.utils.Helper", abs_path="Helper.kt", in_app=False), + self.frame(module="org.other.service.Service", abs_path="Service.kt", in_app=False), + ], platform=self.platform, expected_new_code_mappings=[ - self.code_mapping("com/example/foo/", "src/com/example/foo/") + self.code_mapping(stack_root="com/example/", source_root="src/com/example/"), + self.code_mapping(stack_root="org/other/", source_root="src/org/other/"), ], - expected_num_code_mappings=0, + expected_new_in_app_stack_trace_rules=[ + "stack.module:com.example.** +app", + "stack.module:org.other.** +app", + ], + expected_num_code_mappings=3, ) + + def test_multiple_tlds(self) -> None: + # XXX: Multiple TLDs cause over in-app categorization + # Think of uk.co company using packages from another uk.co company + # They can still use their project rules to exclude the other uk.co packages + frames = [ + self.frame(module="uk.co.example.foo.Bar", abs_path="Bar.kt", in_app=False), + self.frame(module="uk.co.not-example.baz.qux", abs_path="qux.kt", in_app=False), + ] + + # Let's pretend we're not running as dry-run + with patch(f"{CODE_ROOT}.utils.PlatformConfig.is_dry_run_platform", return_value=False): + event = self._process_and_assert_configuration_changes( + repo_trees={REPO1: ["src/uk/co/example/foo/Bar.kt"]}, + frames=frames, + platform=self.platform, + expected_new_code_mappings=[ + # XXX: Notice that we loose "example" + self.code_mapping(stack_root="uk/co/", source_root="src/uk/co/"), + ], + expected_new_in_app_stack_trace_rules=["stack.module:uk.co.** +app"], + ) + assert event.data["metadata"]["in_app_frame_mix"] == "system-only" + + event = self._process_and_assert_configuration_changes( + repo_trees={REPO1: ["src/uk/co/example/foo/Bar.kt"]}, + frames=frames, + platform=self.platform, + ) + # It's in-app-only because even the not-example package is in-app + assert event.data["metadata"]["in_app_frame_mix"] == "in-app-only" + + # The developer can undo our rule + self.project.update_option( + "sentry:grouping_enhancements", + "stack.module:uk.co.not-example.** -app", + ) + event = self._process_and_assert_configuration_changes( + repo_trees={REPO1: ["src/uk/co/example/foo/Bar.kt"]}, + frames=frames, + platform=self.platform, + ) + assert event.data["metadata"]["in_app_frame_mix"] == "mixed" + event_frames = event.data["stacktrace"]["frames"] + assert event_frames[0]["module"] == "uk.co.example.foo.Bar" + assert event_frames[0]["in_app"] is True + assert event_frames[1]["module"] == "uk.co.not-example.baz.qux" + assert event_frames[1]["in_app"] is False + + def test_do_not_clobber_rules(self) -> None: + # Let's pretend we're not running as dry-run + with patch(f"{CODE_ROOT}.utils.PlatformConfig.is_dry_run_platform", return_value=False): + self._process_and_assert_configuration_changes( + repo_trees={REPO1: ["src/a/Bar.java", "src/x/y/Baz.java"]}, + frames=[self.frame(module="a.Bar", abs_path="Bar.java", in_app=False)], + platform=self.platform, + expected_new_code_mappings=[self.code_mapping("a/", "src/a/")], + expected_new_in_app_stack_trace_rules=["stack.module:a.** +app"], + ) + self._process_and_assert_configuration_changes( + repo_trees={REPO1: ["src/a/Bar.java", "src/x/y/Baz.java"]}, + frames=[self.frame(module="x.y.Baz", abs_path="Baz.java", in_app=False)], + platform=self.platform, + expected_new_code_mappings=[self.code_mapping("x/y/", "src/x/y/")], + # Both rules should exist + expected_new_in_app_stack_trace_rules=["stack.module:x.y.** +app"], + ) + + def test_run_without_dry_run(self) -> None: + repo_trees = {REPO1: ["src/com/example/foo/Bar.kt"]} + frames = [ + self.frame(module="com.example.foo.Bar", abs_path="Bar.kt", in_app=False), + self.frame(module="com.other.foo.Bar", abs_path="Bar.kt", in_app=False), + ] + rule = "stack.module:com.example.**" + expected_in_app_rule = f"{rule} +app" + + # Let's pretend we're not running as dry-run + with patch(f"{CODE_ROOT}.utils.PlatformConfig.is_dry_run_platform", return_value=False): + event = self._process_and_assert_configuration_changes( + repo_trees=repo_trees, + frames=frames, + platform=self.platform, + expected_new_code_mappings=[ + self.code_mapping(stack_root="com/example/", source_root="src/com/example/"), + ], + expected_num_code_mappings=1, + expected_new_in_app_stack_trace_rules=[expected_in_app_rule], + ) + # The effects of the configuration changes will be noticed on the second event processing + assert event.data["metadata"]["in_app_frame_mix"] == "system-only" + assert len(event.data["hashes"]) == 1 # Only system hash + system_only_hash = event.data["hashes"][0] + first_enhancements_base64_string = event.data["grouping_config"]["enhancements"] + group_id = event.group_id + + # Running a second time will not create any new configurations, however, + # the rules from the previous run will be applied to the event's stack trace + event = self._process_and_assert_configuration_changes( + repo_trees=repo_trees, frames=frames, platform=self.platform + ) + assert event.group_id == group_id # The new rules did not cause new groups + assert event.data["metadata"]["in_app_frame_mix"] == "mixed" + second_enhancements_hash = event.data["grouping_config"]["enhancements"] + # The enhancements now contain the automatic rule (+app) + assert second_enhancements_hash != first_enhancements_base64_string + assert len(event.data["hashes"]) == 2 + event.data["hashes"].remove(system_only_hash) + in_app_hash = event.data["hashes"][0] + assert in_app_hash != system_only_hash + + # The developer will add a rule to invalidate our automatinc rule (-app) + self.project.update_option("sentry:grouping_enhancements", f"{rule} -app") + event = self._process_and_assert_configuration_changes( + repo_trees=repo_trees, frames=frames, platform=self.platform + ) + # Back to system-only + assert event.data["metadata"]["in_app_frame_mix"] == "system-only" + assert event.group_id == group_id # It still belongs to the same group + assert event.data["hashes"] == [system_only_hash] + # The enhancements now contain the automatic rule (+app) and the developer's rule (-app) + assert event.data["grouping_config"]["enhancements"] != first_enhancements_base64_string + assert event.data["grouping_config"]["enhancements"] != second_enhancements_hash