From 8d7f5bf9b6ccc4ae18325af12c2a5f5b7802d0d5 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 10 Apr 2024 09:05:28 +0200 Subject: [PATCH 01/16] Add visitor to replace project-filter by project-id-filter in query --- .../sentry_metrics/querying/data/parsing.py | 4 ++- .../querying/visitors/__init__.py | 2 ++ .../querying/visitors/query_condition.py | 29 +++++++++++++++++++ .../sentry_metrics/querying/data/test_api.py | 26 +++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/sentry/sentry_metrics/querying/data/parsing.py b/src/sentry/sentry_metrics/querying/data/parsing.py index b60282699527ef..dff586a9a8c3eb 100644 --- a/src/sentry/sentry_metrics/querying/data/parsing.py +++ b/src/sentry/sentry_metrics/querying/data/parsing.py @@ -11,6 +11,7 @@ from sentry.sentry_metrics.querying.visitors import ( EnvironmentsInjectionVisitor, LatestReleaseTransformationVisitor, + ProjectToProjectIDTransformationVisitor, QueryConditionsCompositeVisitor, QueryValidationV2Visitor, VisitableQueryExpression, @@ -80,7 +81,8 @@ def generate_queries( # We transform all `release:latest` filters into the actual latest releases. .add_visitor( QueryConditionsCompositeVisitor( - LatestReleaseTransformationVisitor(self._projects) + LatestReleaseTransformationVisitor(self._projects), + ProjectToProjectIDTransformationVisitor(self._projects), ) ).get() ) diff --git a/src/sentry/sentry_metrics/querying/visitors/__init__.py b/src/sentry/sentry_metrics/querying/visitors/__init__.py index b4af4db58fbeae..bc51e74b87e40a 100644 --- a/src/sentry/sentry_metrics/querying/visitors/__init__.py +++ b/src/sentry/sentry_metrics/querying/visitors/__init__.py @@ -2,6 +2,7 @@ from .query_condition import ( LatestReleaseTransformationVisitor, MappingTransformationVisitor, + ProjectToProjectIDTransformationVisitor, TagsTransformationVisitor, ) from .query_expression import ( @@ -30,4 +31,5 @@ "QueriedMetricsVisitor", "UsedGroupBysVisitor", "UnitsNormalizationVisitor", + "ProjectToProjectIDTransformationVisitor", ] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index 78683adce29e0f..5ce012c6ebca77 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -96,3 +96,32 @@ def _visit_condition(self, condition: Condition) -> QueryCondition: op=condition.op, rhs=condition.rhs, ) + + +class ProjectToProjectIDTransformationVisitor(QueryConditionVisitor[QueryCondition]): + """ + Visitor that transforms all project name conditions in the query to project IDs. Initial + use case of this is to enable the front-end to query projects directly instead of by id + """ + + def __init__(self, projects: Sequence[Project]): + self._projects = projects + + def _visit_condition(self, condition: Condition) -> QueryCondition: + if ( + isinstance(condition.lhs, Column) + and condition.lhs.name == "project" + and isinstance(condition.rhs, str) + ): + return Condition( + lhs=Column(name="project_id"), + op=condition.op, + rhs=self._extract_project_id(condition.rhs), + ) + + return condition + + def _extract_project_id(self, project_name: str) -> str: + for project in self._projects: + if project.slug == project_name: + return project.id diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 43f79be1c0fe07..4c7c2961b82c9c 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -110,6 +110,7 @@ def to_reference_unit( _, _, unit = unit_family_and_unit return unit.convert(value) + # why do we still pipe this through? seems redundant def run_query( self, mql_queries: Sequence[MQLQuery], @@ -1609,3 +1610,28 @@ def test_query_with_basic_formula_and_coefficient_operators(self): assert meta[0][1]["unit_family"] == expected_unit_family assert meta[0][1]["unit"] == expected_unit assert meta[0][1]["scaling_factor"] is None + + @with_feature("organizations:ddm-metrics-api-unit-normalization") + def test_condition_using_project_name_gets_visited_correctly(self) -> None: + mql = self.mql("sum", TransactionMRI.DURATION.value, "project:bar") + query = MQLQuery(mql) + + results = self.run_query( + mql_queries=[query], + start=self.now() - timedelta(minutes=30), + end=self.now() + timedelta(hours=1, minutes=30), + interval=3600, + organization=self.project.organization, + projects=[self.project], + environments=[], + referrer="metrics.data.api", + ) + data = results["data"] + assert len(data) == 1 + assert data[0][0]["by"] == {} + assert data[0][0]["series"] == [ + None, + self.to_reference_unit(12.0), + self.to_reference_unit(9.0), + ] + assert data[0][0]["totals"] == self.to_reference_unit(21.0) From 0d8b2eb1b85c5255ca0e454ae4e0195247ef90a1 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 10 Apr 2024 15:08:54 +0200 Subject: [PATCH 02/16] Add visitor to replace groupby:project by groupby:project_id --- .../sentry_metrics/querying/data/parsing.py | 5 +- .../querying/visitors/__init__.py | 2 + .../querying/visitors/query_condition.py | 4 +- .../querying/visitors/query_expression.py | 28 +++++++++++ .../sentry_metrics/querying/data/test_api.py | 49 +++++++++++++++++++ 5 files changed, 85 insertions(+), 3 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/parsing.py b/src/sentry/sentry_metrics/querying/data/parsing.py index dff586a9a8c3eb..1545df33fffa4a 100644 --- a/src/sentry/sentry_metrics/querying/data/parsing.py +++ b/src/sentry/sentry_metrics/querying/data/parsing.py @@ -10,6 +10,7 @@ from sentry.sentry_metrics.querying.types import QueryExpression, QueryOrder from sentry.sentry_metrics.querying.visitors import ( EnvironmentsInjectionVisitor, + GroupByProjectVisitor, LatestReleaseTransformationVisitor, ProjectToProjectIDTransformationVisitor, QueryConditionsCompositeVisitor, @@ -84,6 +85,8 @@ def generate_queries( LatestReleaseTransformationVisitor(self._projects), ProjectToProjectIDTransformationVisitor(self._projects), ) - ).get() + ) + .add_visitor(GroupByProjectVisitor(self._projects)) + .get() ) yield query_expression, compiled_mql_query.order, compiled_mql_query.limit diff --git a/src/sentry/sentry_metrics/querying/visitors/__init__.py b/src/sentry/sentry_metrics/querying/visitors/__init__.py index bc51e74b87e40a..64fb3940cf4c5b 100644 --- a/src/sentry/sentry_metrics/querying/visitors/__init__.py +++ b/src/sentry/sentry_metrics/querying/visitors/__init__.py @@ -7,6 +7,7 @@ ) from .query_expression import ( EnvironmentsInjectionVisitor, + GroupByProjectVisitor, QueriedMetricsVisitor, QueryConditionsCompositeVisitor, QueryValidationV2Visitor, @@ -32,4 +33,5 @@ "UsedGroupBysVisitor", "UnitsNormalizationVisitor", "ProjectToProjectIDTransformationVisitor", + "GroupByProjectVisitor", ] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index 5ce012c6ebca77..41d8c751cb2427 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -121,7 +121,7 @@ def _visit_condition(self, condition: Condition) -> QueryCondition: return condition - def _extract_project_id(self, project_name: str) -> str: + def _extract_project_id(self, project_slug: str) -> str: for project in self._projects: - if project.slug == project_name: + if project.slug == project_slug: return project.id diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 6a7dc79b652801..788a37d1f7ddbf 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -437,3 +437,31 @@ def _visit_float(self, float_number: float) -> QueryExpression: def _is_numeric_scalar(self, value: QueryExpression) -> bool: return isinstance(value, int) or isinstance(value, float) + + +class GroupByProjectVisitor(QueryExpressionVisitor[QueryExpression]): + def __init__(self, projects): + self._projects = projects + + def _replace_project_by_project_id(self, column: Column): + if column.name == "project": + return Column("project_id") + return column + + def _visit_groupby(self, groupby: list[Column | AliasedExpression] | None = None): + for idx, single_groupby in enumerate(groupby): + if isinstance(single_groupby, Column): + groupby[idx] = self._replace_project_by_project_id(single_groupby) + + if isinstance(single_groupby, AliasedExpression): + groupby[idx] = self._replace_project_by_project_id(single_groupby.exp) + + def _visit_formula(self, formula: Formula) -> QueryExpression: + if formula.groupby is not None: + self._visit_groupby(formula.groupby) + return formula + + def _visit_timeseries(self, timeseries: Timeseries) -> QueryExpression: + if timeseries.groupby is not None: + self._visit_groupby(timeseries.groupby) + return timeseries diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 4c7c2961b82c9c..5a68a4fbb2d091 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -1635,3 +1635,52 @@ def test_condition_using_project_name_gets_visited_correctly(self) -> None: self.to_reference_unit(9.0), ] assert data[0][0]["totals"] == self.to_reference_unit(21.0) + + @with_feature("organizations:ddm-metrics-api-unit-normalization") + def test_groupby_using_project_name_gets_visited_correctly(self) -> None: + self.new_project = self.create_project(name="Bar Again") + for value, transaction, platform, env, time in ( + (1, "/hello", "android", "prod", self.now()), + (3, "/hello", "android", "prod", self.now()), + (5, "/hello", "android", "prod", self.now()), + (2, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (5, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + ): + self.store_metric( + self.new_project.organization.id, + self.new_project.id, + "distribution", + TransactionMRI.DURATION.value, + { + "transaction": transaction, + "platform": platform, + "environment": env, + }, + self.ts(time), + value, + UseCaseID.TRANSACTIONS, + ) + + mql = self.mql("avg", TransactionMRI.DURATION.value, group_by="project") + query = MQLQuery(mql) + + results = self.run_query( + mql_queries=[query], + start=self.now() - timedelta(minutes=30), + end=self.now() + timedelta(hours=1, minutes=30), + interval=3600, + organization=self.project.organization, + projects=[self.project, self.new_project], + environments=[], + referrer="metrics.data.api", + ) + data = results["data"] + assert len(data) == 1 + assert data[0][0]["by"] == {"project_id": self.new_project.id} + assert data[0][0]["series"] == [ + None, + self.to_reference_unit(3.0), + self.to_reference_unit(5.0), + ] + assert data[0][0]["totals"] == self.to_reference_unit(4.0) From f98aa999fccea3013aa5d99e296aace2dc70cfe5 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 11 Apr 2024 17:09:03 +0200 Subject: [PATCH 03/16] wip: generalize --- .../sentry_metrics/querying/data/api.py | 21 ++- .../sentry_metrics/querying/data/parsing.py | 9 +- .../querying/data/preparation/base.py | 2 + .../querying/visitors/modulator.py | 19 +++ .../querying/visitors/query_modulator.py | 128 ++++++++++++++++++ 5 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 src/sentry/sentry_metrics/querying/visitors/modulator.py create mode 100644 src/sentry/sentry_metrics/querying/visitors/query_modulator.py diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 5cc57e34de4448..861df0b1efdc9c 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -19,6 +19,9 @@ ) from sentry.sentry_metrics.querying.data.query import MQLQueriesResult, MQLQuery from sentry.sentry_metrics.querying.types import QueryType +from sentry.sentry_metrics.querying.visitors.query_modulator import QueryModulationStep + +modulators = [] def run_queries( @@ -51,22 +54,28 @@ def run_queries( ) intermediate_queries = [] + compiled_mql_queries = [] # We parse the query plan and obtain a series of queries. parser = QueryParser(projects=projects, environments=environments, mql_queries=mql_queries) - for query_expression, query_order, query_limit in parser.generate_queries(): + + for query_expression, compiled_mql_query in parser.generate_queries(): intermediate_queries.append( IntermediateQuery( metrics_query=base_query.set_query(query_expression), - order=query_order, - limit=query_limit, + order=compiled_mql_query.order, + limit=compiled_mql_query.limit, ) ) + compiled_mql_queries.append(compiled_mql_query) preparation_steps = [] if features.has( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None ): preparation_steps.append(UnitsNormalizationStep()) + preparation_steps.append(QueryModulationStep(modulators, projects)) + + # replace names and values in query # We run a series of preparation steps which operate on the entire list of queries. intermediate_queries = run_preparation_steps(intermediate_queries, *preparation_steps) @@ -78,5 +87,11 @@ def run_queries( results = executor.execute() + # this is where the query results are adapted to transform into the format that the query expected + for result in results: + for element in result.series: + # we don't actually see in the series_query whether there was a project-slug-filter in the original query -> we have to share some state. + pass + # We wrap the result in a class that exposes some utils methods to operate on results. return MQLQueriesResult(cast(list[QueryResult], results)) diff --git a/src/sentry/sentry_metrics/querying/data/parsing.py b/src/sentry/sentry_metrics/querying/data/parsing.py index 1545df33fffa4a..d590edab3dffcf 100644 --- a/src/sentry/sentry_metrics/querying/data/parsing.py +++ b/src/sentry/sentry_metrics/querying/data/parsing.py @@ -10,9 +10,7 @@ from sentry.sentry_metrics.querying.types import QueryExpression, QueryOrder from sentry.sentry_metrics.querying.visitors import ( EnvironmentsInjectionVisitor, - GroupByProjectVisitor, LatestReleaseTransformationVisitor, - ProjectToProjectIDTransformationVisitor, QueryConditionsCompositeVisitor, QueryValidationV2Visitor, VisitableQueryExpression, @@ -83,10 +81,7 @@ def generate_queries( .add_visitor( QueryConditionsCompositeVisitor( LatestReleaseTransformationVisitor(self._projects), - ProjectToProjectIDTransformationVisitor(self._projects), ) - ) - .add_visitor(GroupByProjectVisitor(self._projects)) - .get() + ).get() ) - yield query_expression, compiled_mql_query.order, compiled_mql_query.limit + yield query_expression, compiled_mql_query diff --git a/src/sentry/sentry_metrics/querying/data/preparation/base.py b/src/sentry/sentry_metrics/querying/data/preparation/base.py index 51874593102460..643227d20055ec 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/base.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/base.py @@ -5,6 +5,7 @@ from sentry.sentry_metrics.querying.types import QueryOrder from sentry.sentry_metrics.querying.units import MeasurementUnit, UnitFamily +from sentry.sentry_metrics.querying.visitors.query_modulator import Modulator @dataclass(frozen=True) @@ -27,6 +28,7 @@ class IntermediateQuery: unit_family: UnitFamily | None = None unit: MeasurementUnit | None = None scaling_factor: float | None = None + modulators: list[Modulator] | None = None class PreparationStep(ABC): diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py new file mode 100644 index 00000000000000..de0bac79b5cc2c --- /dev/null +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -0,0 +1,19 @@ +import abc +from collections.abc import Sequence + +from snuba_sdk import Formula + +from sentry.models.project import Project +from sentry.sentry_metrics.querying.visitors.query_modulator import ModulationMetadata + + +class Modulator(abc.ABC): + def __init__(self, from_key: str, to_key: str, from_value: str): + self.from_key = from_key + self.to_key = to_key + self.from_value = from_value + + def modulate( + self, formula: Formula, projects: Sequence[Project] + ) -> tuple[ModulationMetadata, Formula]: + return formula diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py new file mode 100644 index 00000000000000..eed30753cf856d --- /dev/null +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -0,0 +1,128 @@ +from collections.abc import Sequence +from dataclasses import replace + +from snuba_sdk import AliasedExpression, BooleanCondition, Column, Condition, Formula +from snuba_sdk.expressions import ScalarType + +from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery, PreparationStep +from sentry.sentry_metrics.querying.visitors import QueryConditionVisitor, QueryExpressionVisitor +from sentry.sentry_metrics.querying.visitors.base import TVisited +from sentry.sentry_metrics.querying.visitors.modulator import Modulator + + +class ModulationMetadata: + def __init__(self, from_key: str, to_key: str, from_value: str, to_value: str): + self.from_key = from_key + self.to_key = to_key + self.from_value = from_value + self.to_value = to_value + + +def find_modulator(modulators, from_key: str) -> Modulator: + for modulator in modulators: + if modulator.from_key == from_key: + return modulator + + +class ModulatorConditionVisitor(QueryConditionVisitor): + + # A AND B AND C + # BooleanCondition(AND, [BooleanCondition(AND, [Condition(A), Condition(B)], Condition(C)])] + + def _visit_condition(self, condition: Condition) -> TVisited: + lhs = condition.lhs + rhs = condition.rhs + + if isinstance(lhs, Column): + modulator = find_modulator(lhs.name) + lhs = Column(modulator.to_key) + + if isinstance(rhs, ScalarType): + new_value = modulator.get_new_value(self._projects, rhs) + rhs = new_value + + return Condition(lhs=lhs, op=condition.op, rhs=rhs) + + return condition + + def _visit_boolean_condition(self, boolean_condition: BooleanCondition) -> TVisited: + conditions = [] + for condition in boolean_condition.conditions: + conditions.append(self.visit(condition)) + + return BooleanCondition(op=boolean_condition.op, conditions=conditions) + + +class ModulatorVisitor(QueryExpressionVisitor): + """ + Visitor that recursively transforms the QueryExpression components to modulate certain attributes to be queried + by API that need to be translated for Snuba to be able to query the data. + """ + + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self._projects = projects + self.modulators = modulators + + def _visit_formula(self, formula: Formula): + formula = super()._visit_formula(formula) + + filters = ModulatorConditionVisitor().visit_group(formula.filters) + formula = formula.set_filters(filters) + + groupby = formula.groupby + if groupby: + groups = [] + for group in groupby: + new_group = group + if isinstance(group, Column): + modulator = find_modulator(group.name) + new_group = Column(name=modulator.to_key) + elif isinstance(group, AliasedExpression): + modulator = find_modulator(group.exp.name) + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + + groups.append(new_group) + + formula = formula.set_groupby(groupby) + return formula + + +class QueryModulationStep(PreparationStep): + def __init__(self, modulators: list[Modulator]): + self.modulators = modulators + + def _get_modulated_intermediate_query( + self, intermediate_query: IntermediateQuery + ) -> IntermediateQuery: + modulators, modulated_query = ModulatorVisitor(self.modulators).visit( + intermediate_query.metrics_query.query + ) + if modulators: + return replace( + intermediate_query, + metrics_query=intermediate_query.metrics_query.set_query(modulated_query), + modulators=modulators, + ) + + return modulated_query + + def run(self, intermediate_queries: list[IntermediateQuery]) -> list[IntermediateQuery]: + modulated_intermediate_queries = [] + + for intermediate_query in intermediate_queries: + modulated_intermediate_queries.append( + self._get_modulated_intermediate_query(intermediate_query) + ) + + return modulated_intermediate_queries + + +class QueryModulatorMetadata: + def __init__(self, from_key, to_key, from_value, to_value): + self.from_key = from_key + self.to_key = to_key + self.from_value = from_value + self.to_value = to_value From 8ccc13c861110aaf83cda27863e78dc0d3fa9db3 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 12 Apr 2024 15:19:33 +0200 Subject: [PATCH 04/16] wip: implement first running version of modulator-visitor based on flexibly defined modulators --- .../sentry_metrics/querying/data/api.py | 5 +- .../querying/data/preparation/base.py | 2 +- .../querying/visitors/modulator.py | 13 ++-- .../querying/visitors/query_modulator.py | 69 ++++++++++++------- 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 861df0b1efdc9c..03b8879e71e009 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -19,9 +19,10 @@ ) from sentry.sentry_metrics.querying.data.query import MQLQueriesResult, MQLQuery from sentry.sentry_metrics.querying.types import QueryType +from sentry.sentry_metrics.querying.visitors.modulator import Modulator from sentry.sentry_metrics.querying.visitors.query_modulator import QueryModulationStep -modulators = [] +modulators = [Modulator(from_key="project", to_key="project_id")] def run_queries( @@ -73,7 +74,7 @@ def run_queries( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None ): preparation_steps.append(UnitsNormalizationStep()) - preparation_steps.append(QueryModulationStep(modulators, projects)) + preparation_steps.append(QueryModulationStep(projects, modulators)) # replace names and values in query diff --git a/src/sentry/sentry_metrics/querying/data/preparation/base.py b/src/sentry/sentry_metrics/querying/data/preparation/base.py index 643227d20055ec..f3ca584aee0f0f 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/base.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/base.py @@ -5,7 +5,7 @@ from sentry.sentry_metrics.querying.types import QueryOrder from sentry.sentry_metrics.querying.units import MeasurementUnit, UnitFamily -from sentry.sentry_metrics.querying.visitors.query_modulator import Modulator +from sentry.sentry_metrics.querying.visitors.modulator import Modulator @dataclass(frozen=True) diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index de0bac79b5cc2c..c03db096b88239 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -1,17 +1,22 @@ -import abc from collections.abc import Sequence from snuba_sdk import Formula from sentry.models.project import Project -from sentry.sentry_metrics.querying.visitors.query_modulator import ModulationMetadata -class Modulator(abc.ABC): - def __init__(self, from_key: str, to_key: str, from_value: str): +class ModulationMetadata: + def __init__(self, from_key: str, to_key: str, from_value: str, to_value: str): self.from_key = from_key self.to_key = to_key self.from_value = from_value + self.to_value = to_value + + +class Modulator: + def __init__(self, from_key: str, to_key: str): + self.from_key = from_key + self.to_key = to_key def modulate( self, formula: Formula, projects: Sequence[Project] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py index eed30753cf856d..8eddca3f3b31ca 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -1,7 +1,7 @@ from collections.abc import Sequence from dataclasses import replace -from snuba_sdk import AliasedExpression, BooleanCondition, Column, Condition, Formula +from snuba_sdk import AliasedExpression, BooleanCondition, Column, Condition, Formula, Timeseries from snuba_sdk.expressions import ScalarType from sentry.models.project import Project @@ -11,14 +11,6 @@ from sentry.sentry_metrics.querying.visitors.modulator import Modulator -class ModulationMetadata: - def __init__(self, from_key: str, to_key: str, from_value: str, to_value: str): - self.from_key = from_key - self.to_key = to_key - self.from_value = from_value - self.to_value = to_value - - def find_modulator(modulators, from_key: str) -> Modulator: for modulator in modulators: if modulator.from_key == from_key: @@ -26,10 +18,6 @@ def find_modulator(modulators, from_key: str) -> Modulator: class ModulatorConditionVisitor(QueryConditionVisitor): - - # A AND B AND C - # BooleanCondition(AND, [BooleanCondition(AND, [Condition(A), Condition(B)], Condition(C)])] - def _visit_condition(self, condition: Condition) -> TVisited: lhs = condition.lhs rhs = condition.rhs @@ -63,10 +51,10 @@ class ModulatorVisitor(QueryExpressionVisitor): def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): self._projects = projects self.modulators = modulators + self.applied_modulators = [] - def _visit_formula(self, formula: Formula): + def _visit_formula(self, formula: Formula) -> TVisited: formula = super()._visit_formula(formula) - filters = ModulatorConditionVisitor().visit_group(formula.filters) formula = formula.set_filters(filters) @@ -76,30 +64,61 @@ def _visit_formula(self, formula: Formula): for group in groupby: new_group = group if isinstance(group, Column): - modulator = find_modulator(group.name) - new_group = Column(name=modulator.to_key) + modulator = find_modulator(self.modulators, group.name) + if modulator: + new_group = Column(name=modulator.to_key) + self.applied_modulators.append(modulator) elif isinstance(group, AliasedExpression): - modulator = find_modulator(group.exp.name) - new_group = AliasedExpression( - exp=Column(name=modulator.to_key), alias=group.alias - ) + modulator = find_modulator(self.modulators, group.exp.name) + if modulator: + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + self.applied_modulators.append(modulator) groups.append(new_group) formula = formula.set_groupby(groupby) return formula + def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: + timeseries = super()._visit_timeseries(timeseries) + filters = ModulatorConditionVisitor().visit_group(timeseries.filters) + timeseries.set_filters(filters) + groupby = timeseries.groupby + if groupby: + groups = [] + for group in groupby: + new_group = group + if isinstance(group, Column): + modulator = find_modulator(self.modulators, group.name) + if modulator: + new_group = Column(name=modulator.to_key) + self.applied_modulators.append(modulator) + elif isinstance(group, AliasedExpression): + modulator = find_modulator(self.modulators, group.exp.name) + if modulator: + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + self.applied_modulators.append(modulator) + groups.append(new_group) + timeseries = timeseries.set_groupby(groups) + return timeseries + class QueryModulationStep(PreparationStep): - def __init__(self, modulators: list[Modulator]): + def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): + self.projects = projects self.modulators = modulators def _get_modulated_intermediate_query( self, intermediate_query: IntermediateQuery ) -> IntermediateQuery: - modulators, modulated_query = ModulatorVisitor(self.modulators).visit( - intermediate_query.metrics_query.query - ) + # the ModulatorVisitor isn't doing what it should be yet: the modulators, which are enacted on the timeseries of the formula, are not added to the modulators list. + visitor = ModulatorVisitor(self.projects, self.modulators) + modulated_query = visitor.visit(intermediate_query.metrics_query.query) + modulators = visitor.applied_modulators if modulators: return replace( intermediate_query, From 12b9481aa01d5681bab75ecaa115bfea3c9af647 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 09:33:36 +0200 Subject: [PATCH 05/16] add query post-processing steps --- .../sentry_metrics/querying/data/api.py | 15 ++-- .../sentry_metrics/querying/data/execution.py | 10 ++- .../querying/data/postprocessing/__init__.py | 0 .../querying/data/postprocessing/base.py | 37 +++++++++ .../data/postprocessing/demodulation.py | 48 ++++++++++++ .../querying/data/preparation/modulation.py | 39 ++++++++++ .../sentry_metrics/querying/visitors/base.py | 2 +- .../querying/visitors/modulator.py | 39 +++++++++- .../querying/visitors/query_modulator.py | 75 ++++++------------- .../sentry_metrics/querying/data/test_api.py | 8 +- 10 files changed, 201 insertions(+), 72 deletions(-) create mode 100644 src/sentry/sentry_metrics/querying/data/postprocessing/__init__.py create mode 100644 src/sentry/sentry_metrics/querying/data/postprocessing/base.py create mode 100644 src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py create mode 100644 src/sentry/sentry_metrics/querying/data/preparation/modulation.py diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 03b8879e71e009..4a80f3ec9352a8 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -10,19 +10,21 @@ from sentry.models.project import Project from sentry.sentry_metrics.querying.data.execution import QueryExecutor, QueryResult from sentry.sentry_metrics.querying.data.parsing import QueryParser +from sentry.sentry_metrics.querying.data.postprocessing.base import run_postprocessing_steps +from sentry.sentry_metrics.querying.data.postprocessing.demodulation import QueryDemodulationStep from sentry.sentry_metrics.querying.data.preparation.base import ( IntermediateQuery, run_preparation_steps, ) +from sentry.sentry_metrics.querying.data.preparation.modulation import QueryModulationStep from sentry.sentry_metrics.querying.data.preparation.units_normalization import ( UnitsNormalizationStep, ) from sentry.sentry_metrics.querying.data.query import MQLQueriesResult, MQLQuery from sentry.sentry_metrics.querying.types import QueryType -from sentry.sentry_metrics.querying.visitors.modulator import Modulator -from sentry.sentry_metrics.querying.visitors.query_modulator import QueryModulationStep +from sentry.sentry_metrics.querying.visitors.modulator import Project2ProjectIDModulator -modulators = [Modulator(from_key="project", to_key="project_id")] +modulators = [Project2ProjectIDModulator(from_key="project", to_key="project_id")] def run_queries( @@ -87,12 +89,7 @@ def run_queries( executor.schedule(intermediate_query=intermediate_query, query_type=query_type) results = executor.execute() - - # this is where the query results are adapted to transform into the format that the query expected - for result in results: - for element in result.series: - # we don't actually see in the series_query whether there was a project-slug-filter in the original query -> we have to share some state. - pass + results = run_postprocessing_steps(results, QueryDemodulationStep(projects, modulators)) # We wrap the result in a class that exposes some utils methods to operate on results. return MQLQueriesResult(cast(list[QueryResult], results)) diff --git a/src/sentry/sentry_metrics/querying/data/execution.py b/src/sentry/sentry_metrics/querying/data/execution.py index 3ad70da3e44fd6..7d7f4d8300b5f1 100644 --- a/src/sentry/sentry_metrics/querying/data/execution.py +++ b/src/sentry/sentry_metrics/querying/data/execution.py @@ -318,7 +318,7 @@ def _align_date_range(cls, metrics_query: MetricsQuery) -> tuple[MetricsQuery, i return metrics_query, None -@dataclass(frozen=True) +@dataclass class QueryResult: """ Represents the result of a ScheduledQuery containing its associated series and totals results. @@ -447,10 +447,18 @@ def modified_end(self) -> datetime: def series(self) -> Sequence[Mapping[str, Any]]: return self.result["series"]["data"] + @series.setter + def series(self, value: Sequence[Mapping[str, Any]]) -> None: + self.result["series"]["data"] = value + @property def totals(self) -> Sequence[Mapping[str, Any]]: return self.result["totals"]["data"] + @totals.setter + def totals(self, value: Sequence[Mapping[str, Any]]) -> None: + self.result["totals"]["data"] = value + @property def meta(self) -> Sequence[Mapping[str, str]]: # By default, we extract the metadata from the totals query, if that is not there we extract from the series diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/__init__.py b/src/sentry/sentry_metrics/querying/data/postprocessing/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/base.py b/src/sentry/sentry_metrics/querying/data/postprocessing/base.py new file mode 100644 index 00000000000000..d16f8f056d94e9 --- /dev/null +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/base.py @@ -0,0 +1,37 @@ +from abc import ABC, abstractmethod + +from sentry.sentry_metrics.querying.data.execution import QueryResult + + +class PostProcessingStep(ABC): + """ + Represents an abstract step that post-processes a collection of QueryResult objects. + + The post-processing of these objects might include transforming them or just obtaining some intermediate data that + is useful to compute other things before returning the results. + """ + + @abstractmethod + def run(self, query_results: list[QueryResult]) -> list[QueryResult]: + """ + Runs the post-processing steps on a list of query results. + + Returns: + A list of post-processed query results. + """ + raise NotImplementedError + + +def run_postprocessing_steps(query_results: list[QueryResult], *steps) -> list[QueryResult]: + """ + Takes a series of query results and steps and runs the post-processing steps one after each other in order they are + supplied in. + + Returns: + A list of query results after running the post-processing steps. + """ + for step in steps: + if isinstance(step, PostProcessingStep): + query_results = step.run(query_results=query_results) + + return query_results diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py new file mode 100644 index 00000000000000..609bde2d41f80f --- /dev/null +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -0,0 +1,48 @@ +from collections.abc import Mapping, Sequence +from typing import Any + +from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.execution import QueryResult +from sentry.sentry_metrics.querying.data.postprocessing.base import PostProcessingStep +from sentry.sentry_metrics.querying.visitors.modulator import Modulator + + +class QueryDemodulationStep(PostProcessingStep): + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self.projects = projects + self.modulators = modulators + + def run(self, query_results: list[QueryResult]) -> list[QueryResult]: + for query_result in query_results: + if hasattr(query_result, "totals"): + query_result.totals = self._demodulate_data(query_result.totals) + if hasattr(query_result, "series"): + query_result.series = self._demodulate_data(query_result.series) + + # query_result.group_bys = self._demodulate_groupbys(query_result.group_bys) + + return query_results + + def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mapping[str, Any]]: + for element in data: + updated_element = dict() + keys_to_delete = [] + for result_key in element.keys(): + for modulator in self.modulators: + if modulator.to_key == result_key: + original_value = modulator.demodulate(result_key, self.projects) + updated_element[modulator.from_key] = original_value + keys_to_delete.append(result_key) + + for key in keys_to_delete: + del element[key] + element.update(updated_element) + + return data + + def _demodulate_groupbys(self, group_bys: list[str]) -> list[str]: + for idx, group in enumerate(group_bys): + for modulator in self.modulators: + group_bys[idx] = modulator.demodulate(group, self.projects) + + return group_bys diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py new file mode 100644 index 00000000000000..00e26896afe5e4 --- /dev/null +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -0,0 +1,39 @@ +from collections.abc import Sequence +from dataclasses import replace + +from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery, PreparationStep +from sentry.sentry_metrics.querying.visitors.modulator import Modulator +from sentry.sentry_metrics.querying.visitors.query_modulator import ModulatorVisitor + + +class QueryModulationStep(PreparationStep): + def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): + self.projects = projects + self.modulators = modulators + + def _get_modulated_intermediate_query( + self, intermediate_query: IntermediateQuery + ) -> IntermediateQuery: + # the ModulatorVisitor isn't doing what it should be yet: the modulators, which are enacted on the timeseries of the formula, are not added to the modulators list. + visitor = ModulatorVisitor(self.projects, self.modulators) + modulated_query = visitor.visit(intermediate_query.metrics_query.query) + modulators = visitor.applied_modulators + if modulators: + return replace( + intermediate_query, + metrics_query=intermediate_query.metrics_query.set_query(modulated_query), + modulators=modulators, + ) + + return intermediate_query + + def run(self, intermediate_queries: list[IntermediateQuery]) -> list[IntermediateQuery]: + modulated_intermediate_queries = [] + + for intermediate_query in intermediate_queries: + modulated_intermediate_queries.append( + self._get_modulated_intermediate_query(intermediate_query) + ) + + return modulated_intermediate_queries diff --git a/src/sentry/sentry_metrics/querying/visitors/base.py b/src/sentry/sentry_metrics/querying/visitors/base.py index e2217d303c21d4..1d82a2489f7bad 100644 --- a/src/sentry/sentry_metrics/querying/visitors/base.py +++ b/src/sentry/sentry_metrics/querying/visitors/base.py @@ -39,7 +39,7 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula.set_parameters(parameters) def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - raise timeseries + return timeseries def _visit_int(self, int_number: float) -> TVisited: return int_number # type: ignore[return-value] diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index c03db096b88239..224b3c8d61eaa6 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -1,4 +1,7 @@ +import abc +from collections import defaultdict from collections.abc import Sequence +from typing import Any from snuba_sdk import Formula @@ -13,12 +16,40 @@ def __init__(self, from_key: str, to_key: str, from_value: str, to_value: str): self.to_value = to_value -class Modulator: +class Modulator(abc.ABC): def __init__(self, from_key: str, to_key: str): self.from_key = from_key self.to_key = to_key + self.value_map: dict[Any, Any] = defaultdict(lambda: None) - def modulate( - self, formula: Formula, projects: Sequence[Project] - ) -> tuple[ModulationMetadata, Formula]: + def __hash__(self): + return hash((self.from_key, self.to_key)) + + @abc.abstractmethod + def modulate(self, formula: Formula, **kwargs) -> tuple[ModulationMetadata, Formula]: + return formula + + @abc.abstractmethod + def demodulate(self, formula: Formula, **kwargs) -> Formula: + return formula + + +class Project2ProjectIDModulator(Modulator): + def modulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: + # if no value is stored for this formula, add it to the map, then & otherwise return the existing formula + # the value should not change for the same key, so we don't re-store in case it is already in there + if formula not in self.value_map: + self.value_map[formula] = None + for project in projects: + if project.slug == formula: + self.value_map[formula] = project.id return formula + + def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: + if formula not in self.value_map: + # If the from_value was not set during modulation, e.g. because only a groupby statement was used in the + # query, retrieve the value mapping on demodulation in order to inject it into the query result. + for project in projects: + self.value_map[project.id] = project.slug + + return self.value_map[project.id] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py index 8eddca3f3b31ca..e45f17554d72a9 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -1,36 +1,39 @@ from collections.abc import Sequence -from dataclasses import replace from snuba_sdk import AliasedExpression, BooleanCondition, Column, Condition, Formula, Timeseries from snuba_sdk.expressions import ScalarType from sentry.models.project import Project -from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery, PreparationStep from sentry.sentry_metrics.querying.visitors import QueryConditionVisitor, QueryExpressionVisitor from sentry.sentry_metrics.querying.visitors.base import TVisited from sentry.sentry_metrics.querying.visitors.modulator import Modulator -def find_modulator(modulators, from_key: str) -> Modulator: +def find_modulator(modulators: Sequence[Modulator], from_key: str) -> Modulator: for modulator in modulators: if modulator.from_key == from_key: return modulator class ModulatorConditionVisitor(QueryConditionVisitor): + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self._projects = projects + self.modulators = modulators + self.applied_modulators = [] + def _visit_condition(self, condition: Condition) -> TVisited: lhs = condition.lhs rhs = condition.rhs if isinstance(lhs, Column): - modulator = find_modulator(lhs.name) - lhs = Column(modulator.to_key) + modulator = find_modulator(self.modulators, lhs.name) + if modulator: + lhs = Column(modulator.to_key) - if isinstance(rhs, ScalarType): - new_value = modulator.get_new_value(self._projects, rhs) - rhs = new_value - - return Condition(lhs=lhs, op=condition.op, rhs=rhs) + if isinstance(rhs, ScalarType): + new_value = modulator.modulate(rhs, self._projects) + rhs = new_value + return Condition(lhs=lhs, op=condition.op, rhs=rhs) return condition @@ -55,7 +58,9 @@ def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]) def _visit_formula(self, formula: Formula) -> TVisited: formula = super()._visit_formula(formula) - filters = ModulatorConditionVisitor().visit_group(formula.filters) + filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( + formula.filters + ) formula = formula.set_filters(filters) groupby = formula.groupby @@ -78,12 +83,14 @@ def _visit_formula(self, formula: Formula) -> TVisited: groups.append(new_group) - formula = formula.set_groupby(groupby) + formula = formula.set_groupby(groups) return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: timeseries = super()._visit_timeseries(timeseries) - filters = ModulatorConditionVisitor().visit_group(timeseries.filters) + filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( + timeseries.filters + ) timeseries.set_filters(filters) groupby = timeseries.groupby if groupby: @@ -103,45 +110,5 @@ def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: ) self.applied_modulators.append(modulator) groups.append(new_group) - timeseries = timeseries.set_groupby(groups) + timeseries = timeseries.set_groupby(groups) return timeseries - - -class QueryModulationStep(PreparationStep): - def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): - self.projects = projects - self.modulators = modulators - - def _get_modulated_intermediate_query( - self, intermediate_query: IntermediateQuery - ) -> IntermediateQuery: - # the ModulatorVisitor isn't doing what it should be yet: the modulators, which are enacted on the timeseries of the formula, are not added to the modulators list. - visitor = ModulatorVisitor(self.projects, self.modulators) - modulated_query = visitor.visit(intermediate_query.metrics_query.query) - modulators = visitor.applied_modulators - if modulators: - return replace( - intermediate_query, - metrics_query=intermediate_query.metrics_query.set_query(modulated_query), - modulators=modulators, - ) - - return modulated_query - - def run(self, intermediate_queries: list[IntermediateQuery]) -> list[IntermediateQuery]: - modulated_intermediate_queries = [] - - for intermediate_query in intermediate_queries: - modulated_intermediate_queries.append( - self._get_modulated_intermediate_query(intermediate_query) - ) - - return modulated_intermediate_queries - - -class QueryModulatorMetadata: - def __init__(self, from_key, to_key, from_value, to_value): - self.from_key = from_key - self.to_key = to_key - self.from_value = from_value - self.to_value = to_value diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 5a68a4fbb2d091..1f7f33657ad5d5 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -123,7 +123,7 @@ def run_query( referrer: str, query_type: QueryType = QueryType.TOTALS_AND_SERIES, ) -> Mapping[str, Any]: - return run_queries( + untransformed = run_queries( mql_queries, start, end, @@ -133,7 +133,9 @@ def run_query( environments, referrer, query_type, - ).apply_transformer(self.query_transformer) + ) + transformed = untransformed.apply_transformer(self.query_transformer) + return transformed @with_feature("organizations:ddm-metrics-api-unit-normalization") def test_query_with_no_formulas(self) -> None: @@ -1677,7 +1679,7 @@ def test_groupby_using_project_name_gets_visited_correctly(self) -> None: ) data = results["data"] assert len(data) == 1 - assert data[0][0]["by"] == {"project_id": self.new_project.id} + assert data[0][0]["by"] == {"project": self.new_project.slug} assert data[0][0]["series"] == [ None, self.to_reference_unit(3.0), From f7b216126a06264e12e184679d47c217702271bd Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 11:13:52 +0200 Subject: [PATCH 06/16] wip: fix bugs & restructure --- .../sentry_metrics/querying/data/execution.py | 4 ++ .../data/postprocessing/demodulation.py | 4 +- .../querying/data/preparation/modulation.py | 14 ++---- .../querying/visitors/modulator.py | 2 +- .../querying/visitors/query_modulator.py | 47 ++++++++++--------- 5 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/execution.py b/src/sentry/sentry_metrics/querying/data/execution.py index 7d7f4d8300b5f1..6009f214639a09 100644 --- a/src/sentry/sentry_metrics/querying/data/execution.py +++ b/src/sentry/sentry_metrics/querying/data/execution.py @@ -445,6 +445,8 @@ def modified_end(self) -> datetime: @property def series(self) -> Sequence[Mapping[str, Any]]: + if "series" not in self.result: + return [] return self.result["series"]["data"] @series.setter @@ -453,6 +455,8 @@ def series(self, value: Sequence[Mapping[str, Any]]) -> None: @property def totals(self) -> Sequence[Mapping[str, Any]]: + if "totals" not in self.result: + return [] return self.result["totals"]["data"] @totals.setter diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 609bde2d41f80f..48714b6b713ca7 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -14,9 +14,9 @@ def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]) def run(self, query_results: list[QueryResult]) -> list[QueryResult]: for query_result in query_results: - if hasattr(query_result, "totals"): + if query_result.totals: query_result.totals = self._demodulate_data(query_result.totals) - if hasattr(query_result, "series"): + if query_result.series: query_result.series = self._demodulate_data(query_result.series) # query_result.group_bys = self._demodulate_groupbys(query_result.group_bys) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py index 00e26896afe5e4..c5e0645032942c 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -15,18 +15,14 @@ def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): def _get_modulated_intermediate_query( self, intermediate_query: IntermediateQuery ) -> IntermediateQuery: - # the ModulatorVisitor isn't doing what it should be yet: the modulators, which are enacted on the timeseries of the formula, are not added to the modulators list. visitor = ModulatorVisitor(self.projects, self.modulators) modulated_query = visitor.visit(intermediate_query.metrics_query.query) - modulators = visitor.applied_modulators - if modulators: - return replace( - intermediate_query, - metrics_query=intermediate_query.metrics_query.set_query(modulated_query), - modulators=modulators, - ) - return intermediate_query + return replace( + intermediate_query, + metrics_query=intermediate_query.metrics_query.set_query(modulated_query), + modulators=visitor.applied_modulators, + ) def run(self, intermediate_queries: list[IntermediateQuery]) -> list[IntermediateQuery]: modulated_intermediate_queries = [] diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index 224b3c8d61eaa6..33e1fb07a20623 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -43,7 +43,7 @@ def modulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: for project in projects: if project.slug == formula: self.value_map[formula] = project.id - return formula + return self.value_map[formula] def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: if formula not in self.value_map: diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py index e45f17554d72a9..b7a44f00322688 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -28,12 +28,12 @@ def _visit_condition(self, condition: Condition) -> TVisited: if isinstance(lhs, Column): modulator = find_modulator(self.modulators, lhs.name) if modulator: - lhs = Column(modulator.to_key) + new_lhs = Column(modulator.to_key) + self.applied_modulators.append(modulator) if isinstance(rhs, ScalarType): - new_value = modulator.modulate(rhs, self._projects) - rhs = new_value - return Condition(lhs=lhs, op=condition.op, rhs=rhs) + new_rhs = modulator.modulate(rhs, self._projects) + return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) return condition @@ -42,7 +42,7 @@ def _visit_boolean_condition(self, boolean_condition: BooleanCondition) -> TVisi for condition in boolean_condition.conditions: conditions.append(self.visit(condition)) - return BooleanCondition(op=boolean_condition.op, conditions=conditions) + return BooleanCondition(op=boolean_condition.op, conditions=conditions) class ModulatorVisitor(QueryExpressionVisitor): @@ -57,16 +57,19 @@ def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]) self.applied_modulators = [] def _visit_formula(self, formula: Formula) -> TVisited: + # visit the params recursively formula = super()._visit_formula(formula) + + # visit the filters filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( formula.filters ) formula = formula.set_filters(filters) - groupby = formula.groupby - if groupby: - groups = [] - for group in groupby: + # visit the groupby + if formula.groupby: + new_group_bys = [] + for group in formula.groupby: new_group = group if isinstance(group, Column): modulator = find_modulator(self.modulators, group.name) @@ -79,23 +82,25 @@ def _visit_formula(self, formula: Formula) -> TVisited: new_group = AliasedExpression( exp=Column(name=modulator.to_key), alias=group.alias ) - self.applied_modulators.append(modulator) - - groups.append(new_group) - - formula = formula.set_groupby(groups) + self.applied_modulators.append(modulator) + new_group_bys.append(new_group) + formula = formula.set_groupby(new_group_bys) return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: + # visit the parameters recursively timeseries = super()._visit_timeseries(timeseries) + + # visit the filters filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( timeseries.filters ) - timeseries.set_filters(filters) - groupby = timeseries.groupby - if groupby: - groups = [] - for group in groupby: + timeseries = timeseries.set_filters(filters) + + # visit the groupby + if timeseries.groupby: + new_group_bys = [] + for group in timeseries.groupby: new_group = group if isinstance(group, Column): modulator = find_modulator(self.modulators, group.name) @@ -109,6 +114,6 @@ def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: exp=Column(name=modulator.to_key), alias=group.alias ) self.applied_modulators.append(modulator) - groups.append(new_group) - timeseries = timeseries.set_groupby(groups) + new_group_bys.append(new_group) + timeseries = timeseries.set_groupby(new_group_bys) return timeseries From 9e8652fb509b34892c1226b66257f12c67799dd3 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 13:34:41 +0200 Subject: [PATCH 07/16] wip: everything works before cleanup --- .../sentry_metrics/querying/data/execution.py | 11 +++++++++-- .../data/postprocessing/demodulation.py | 2 +- .../querying/data/preparation/base.py | 4 ++-- .../querying/visitors/modulator.py | 5 +++-- .../querying/visitors/query_expression.py | 19 +++++++++++++++++-- .../querying/visitors/query_modulator.py | 12 +++++++++--- 6 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/execution.py b/src/sentry/sentry_metrics/querying/data/execution.py index 6009f214639a09..4e6b381483b2bd 100644 --- a/src/sentry/sentry_metrics/querying/data/execution.py +++ b/src/sentry/sentry_metrics/querying/data/execution.py @@ -1,5 +1,5 @@ from collections.abc import Mapping, Sequence -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from datetime import datetime from enum import Enum from typing import Any, Union, cast @@ -24,6 +24,7 @@ TimeseriesConditionInjectionVisitor, UsedGroupBysVisitor, ) +from sentry.sentry_metrics.querying.visitors.modulator import Modulator from sentry.sentry_metrics.visibility import get_metrics_blocking_state from sentry.snuba.dataset import Dataset from sentry.snuba.metrics_layer.query import bulk_run_query @@ -145,6 +146,7 @@ class ScheduledQuery: unit_family: UnitFamily | None = None unit: MeasurementUnit | None = None scaling_factor: float | None = None + modulators: list[Modulator] = field(default_factory=list) def initialize( self, @@ -476,7 +478,11 @@ def group_bys(self) -> list[str]: # that we can correctly render groups in case they are not returned from the db because of missing data. # # Sorting of the groups is done to maintain consistency across function calls. - return sorted(UsedGroupBysVisitor().visit(self._any_query().metrics_query.query)) + scheduled_query = self._any_query() + modulators = scheduled_query.modulators + return sorted( + UsedGroupBysVisitor(modulators=modulators).visit(scheduled_query.metrics_query.query) + ) @property def interval(self) -> int | None: @@ -824,6 +830,7 @@ def schedule(self, intermediate_query: IntermediateQuery, query_type: QueryType) unit_family=intermediate_query.unit_family, unit=intermediate_query.unit, scaling_factor=intermediate_query.scaling_factor, + modulators=intermediate_query.modulators, ) # In case the user chooses to run also a series query, we will duplicate the query and chain it after totals. diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 48714b6b713ca7..7d89e53090e9ce 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -30,7 +30,7 @@ def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mappin for result_key in element.keys(): for modulator in self.modulators: if modulator.to_key == result_key: - original_value = modulator.demodulate(result_key, self.projects) + original_value = modulator.demodulate(element[result_key], self.projects) updated_element[modulator.from_key] = original_value keys_to_delete.append(result_key) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/base.py b/src/sentry/sentry_metrics/querying/data/preparation/base.py index f3ca584aee0f0f..08793f4aa6392f 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/base.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from snuba_sdk import MetricsQuery @@ -28,7 +28,7 @@ class IntermediateQuery: unit_family: UnitFamily | None = None unit: MeasurementUnit | None = None scaling_factor: float | None = None - modulators: list[Modulator] | None = None + modulators: list[Modulator] = field(default_factory=list) class PreparationStep(ABC): diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index 33e1fb07a20623..1bf2e38bdbd69a 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -50,6 +50,7 @@ def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: # If the from_value was not set during modulation, e.g. because only a groupby statement was used in the # query, retrieve the value mapping on demodulation in order to inject it into the query result. for project in projects: - self.value_map[project.id] = project.slug + if project.id == formula: + self.value_map[formula] = project.slug - return self.value_map[project.id] + return self.value_map[formula] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 788a37d1f7ddbf..8dea329bd3a8e8 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -20,6 +20,8 @@ QueryConditionVisitor, QueryExpressionVisitor, ) +from sentry.sentry_metrics.querying.visitors.modulator import Modulator +from sentry.sentry_metrics.querying.visitors.query_modulator import find_modulator from sentry.snuba.metrics import parse_mri @@ -245,6 +247,9 @@ class UsedGroupBysVisitor(QueryExpressionVisitor[set[str]]): Visitor that recursively computes all the groups of the `QueryExpression`. """ + def __init__(self, modulators: list[Modulator] = None): + self.modulators = modulators or [] + def _visit_formula(self, formula: Formula) -> set[str]: group_bys: set[str] = set() @@ -271,10 +276,20 @@ def _group_bys_as_string(self, group_bys: list[Column | AliasedExpression] | Non string_group_bys = set() for group_by in group_bys: + modulator = None if isinstance(group_by, AliasedExpression): - string_group_bys.add(group_by.exp.name) + modulator = find_modulator(modulators=self.modulators, to_key=group_by.exp.name) elif isinstance(group_by, Column): - string_group_bys.add(group_by.name) + modulator = find_modulator(modulators=self.modulators, to_key=group_by.name) + + if modulator: + string_group_bys.add(modulator.from_key) + + else: + if isinstance(group_by, AliasedExpression): + string_group_bys.add(group_by.exp.name) + elif isinstance(group_by, Column): + string_group_bys.add(group_by.name) return string_group_bys diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py index b7a44f00322688..a7b906d1c4f712 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -9,10 +9,16 @@ from sentry.sentry_metrics.querying.visitors.modulator import Modulator -def find_modulator(modulators: Sequence[Modulator], from_key: str) -> Modulator: +def find_modulator( + modulators: Sequence[Modulator], from_key: str = None, to_key: str = None +) -> Modulator: for modulator in modulators: - if modulator.from_key == from_key: - return modulator + if from_key: + if modulator.from_key == from_key: + return modulator + if to_key: + if modulator.to_key == to_key: + return modulator class ModulatorConditionVisitor(QueryConditionVisitor): From 0aadd600f46fdab6c050d7b6a9c166f56436d974 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 14:45:01 +0200 Subject: [PATCH 08/16] wip: cleanup --- .../sentry_metrics/querying/data/api.py | 10 ++----- .../sentry_metrics/querying/data/parsing.py | 4 +-- .../data/postprocessing/demodulation.py | 9 ------ .../querying/visitors/__init__.py | 2 -- .../sentry_metrics/querying/visitors/base.py | 2 +- .../querying/visitors/modulator.py | 4 --- .../querying/visitors/query_expression.py | 28 ------------------- .../querying/visitors/query_modulator.py | 8 ------ .../sentry_metrics/querying/data/test_api.py | 6 ++-- 9 files changed, 8 insertions(+), 65 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 4a80f3ec9352a8..ad048916f6829a 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -57,19 +57,17 @@ def run_queries( ) intermediate_queries = [] - compiled_mql_queries = [] # We parse the query plan and obtain a series of queries. parser = QueryParser(projects=projects, environments=environments, mql_queries=mql_queries) - for query_expression, compiled_mql_query in parser.generate_queries(): + for query_expression, query_order, query_limit in parser.generate_queries(): intermediate_queries.append( IntermediateQuery( metrics_query=base_query.set_query(query_expression), - order=compiled_mql_query.order, - limit=compiled_mql_query.limit, + order=query_order, + limit=query_limit, ) ) - compiled_mql_queries.append(compiled_mql_query) preparation_steps = [] if features.has( @@ -78,8 +76,6 @@ def run_queries( preparation_steps.append(UnitsNormalizationStep()) preparation_steps.append(QueryModulationStep(projects, modulators)) - # replace names and values in query - # We run a series of preparation steps which operate on the entire list of queries. intermediate_queries = run_preparation_steps(intermediate_queries, *preparation_steps) diff --git a/src/sentry/sentry_metrics/querying/data/parsing.py b/src/sentry/sentry_metrics/querying/data/parsing.py index d590edab3dffcf..b60282699527ef 100644 --- a/src/sentry/sentry_metrics/querying/data/parsing.py +++ b/src/sentry/sentry_metrics/querying/data/parsing.py @@ -80,8 +80,8 @@ def generate_queries( # We transform all `release:latest` filters into the actual latest releases. .add_visitor( QueryConditionsCompositeVisitor( - LatestReleaseTransformationVisitor(self._projects), + LatestReleaseTransformationVisitor(self._projects) ) ).get() ) - yield query_expression, compiled_mql_query + yield query_expression, compiled_mql_query.order, compiled_mql_query.limit diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 7d89e53090e9ce..c7d2e49639cc88 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -19,8 +19,6 @@ def run(self, query_results: list[QueryResult]) -> list[QueryResult]: if query_result.series: query_result.series = self._demodulate_data(query_result.series) - # query_result.group_bys = self._demodulate_groupbys(query_result.group_bys) - return query_results def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mapping[str, Any]]: @@ -39,10 +37,3 @@ def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mappin element.update(updated_element) return data - - def _demodulate_groupbys(self, group_bys: list[str]) -> list[str]: - for idx, group in enumerate(group_bys): - for modulator in self.modulators: - group_bys[idx] = modulator.demodulate(group, self.projects) - - return group_bys diff --git a/src/sentry/sentry_metrics/querying/visitors/__init__.py b/src/sentry/sentry_metrics/querying/visitors/__init__.py index 64fb3940cf4c5b..bc51e74b87e40a 100644 --- a/src/sentry/sentry_metrics/querying/visitors/__init__.py +++ b/src/sentry/sentry_metrics/querying/visitors/__init__.py @@ -7,7 +7,6 @@ ) from .query_expression import ( EnvironmentsInjectionVisitor, - GroupByProjectVisitor, QueriedMetricsVisitor, QueryConditionsCompositeVisitor, QueryValidationV2Visitor, @@ -33,5 +32,4 @@ "UsedGroupBysVisitor", "UnitsNormalizationVisitor", "ProjectToProjectIDTransformationVisitor", - "GroupByProjectVisitor", ] diff --git a/src/sentry/sentry_metrics/querying/visitors/base.py b/src/sentry/sentry_metrics/querying/visitors/base.py index 1d82a2489f7bad..e2217d303c21d4 100644 --- a/src/sentry/sentry_metrics/querying/visitors/base.py +++ b/src/sentry/sentry_metrics/querying/visitors/base.py @@ -39,7 +39,7 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula.set_parameters(parameters) def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - return timeseries + raise timeseries def _visit_int(self, int_number: float) -> TVisited: return int_number # type: ignore[return-value] diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index 1bf2e38bdbd69a..8399d38631ba6b 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -36,8 +36,6 @@ def demodulate(self, formula: Formula, **kwargs) -> Formula: class Project2ProjectIDModulator(Modulator): def modulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: - # if no value is stored for this formula, add it to the map, then & otherwise return the existing formula - # the value should not change for the same key, so we don't re-store in case it is already in there if formula not in self.value_map: self.value_map[formula] = None for project in projects: @@ -47,8 +45,6 @@ def modulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: if formula not in self.value_map: - # If the from_value was not set during modulation, e.g. because only a groupby statement was used in the - # query, retrieve the value mapping on demodulation in order to inject it into the query result. for project in projects: if project.id == formula: self.value_map[formula] = project.slug diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 8dea329bd3a8e8..00f39e59efa44b 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -452,31 +452,3 @@ def _visit_float(self, float_number: float) -> QueryExpression: def _is_numeric_scalar(self, value: QueryExpression) -> bool: return isinstance(value, int) or isinstance(value, float) - - -class GroupByProjectVisitor(QueryExpressionVisitor[QueryExpression]): - def __init__(self, projects): - self._projects = projects - - def _replace_project_by_project_id(self, column: Column): - if column.name == "project": - return Column("project_id") - return column - - def _visit_groupby(self, groupby: list[Column | AliasedExpression] | None = None): - for idx, single_groupby in enumerate(groupby): - if isinstance(single_groupby, Column): - groupby[idx] = self._replace_project_by_project_id(single_groupby) - - if isinstance(single_groupby, AliasedExpression): - groupby[idx] = self._replace_project_by_project_id(single_groupby.exp) - - def _visit_formula(self, formula: Formula) -> QueryExpression: - if formula.groupby is not None: - self._visit_groupby(formula.groupby) - return formula - - def _visit_timeseries(self, timeseries: Timeseries) -> QueryExpression: - if timeseries.groupby is not None: - self._visit_groupby(timeseries.groupby) - return timeseries diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py index a7b906d1c4f712..aff265f1a5410d 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py @@ -63,16 +63,13 @@ def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]) self.applied_modulators = [] def _visit_formula(self, formula: Formula) -> TVisited: - # visit the params recursively formula = super()._visit_formula(formula) - # visit the filters filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( formula.filters ) formula = formula.set_filters(filters) - # visit the groupby if formula.groupby: new_group_bys = [] for group in formula.groupby: @@ -94,16 +91,11 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - # visit the parameters recursively - timeseries = super()._visit_timeseries(timeseries) - - # visit the filters filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( timeseries.filters ) timeseries = timeseries.set_filters(filters) - # visit the groupby if timeseries.groupby: new_group_bys = [] for group in timeseries.groupby: diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 1f7f33657ad5d5..9d996319561a6e 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -123,7 +123,7 @@ def run_query( referrer: str, query_type: QueryType = QueryType.TOTALS_AND_SERIES, ) -> Mapping[str, Any]: - untransformed = run_queries( + return run_queries( mql_queries, start, end, @@ -133,9 +133,7 @@ def run_query( environments, referrer, query_type, - ) - transformed = untransformed.apply_transformer(self.query_transformer) - return transformed + ).apply_transformer(self.query_transformer) @with_feature("organizations:ddm-metrics-api-unit-normalization") def test_query_with_no_formulas(self) -> None: From 1287f0a19b6b1adfb48e2b4cc3f5087a44b656fa Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 14:59:45 +0200 Subject: [PATCH 09/16] wip: remove ModulationMetadata - not needed --- .../sentry_metrics/querying/visitors/modulator.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/visitors/modulator.py index 8399d38631ba6b..bd4a1f5a2cb422 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/visitors/modulator.py @@ -8,14 +8,6 @@ from sentry.models.project import Project -class ModulationMetadata: - def __init__(self, from_key: str, to_key: str, from_value: str, to_value: str): - self.from_key = from_key - self.to_key = to_key - self.from_value = from_value - self.to_value = to_value - - class Modulator(abc.ABC): def __init__(self, from_key: str, to_key: str): self.from_key = from_key @@ -26,7 +18,7 @@ def __hash__(self): return hash((self.from_key, self.to_key)) @abc.abstractmethod - def modulate(self, formula: Formula, **kwargs) -> tuple[ModulationMetadata, Formula]: + def modulate(self, formula: Formula, **kwargs) -> Formula: return formula @abc.abstractmethod From 420d0df8b88788544148886cd1486a828748fda0 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 15:08:42 +0200 Subject: [PATCH 10/16] wip: move things around --- .../sentry_metrics/querying/data/api.py | 2 +- .../sentry_metrics/querying/data/execution.py | 2 +- .../querying/data/modulation/__init__.py | 0 .../modulation}/modulator.py | 12 ++ .../data/postprocessing/demodulation.py | 2 +- .../querying/data/preparation/base.py | 2 +- .../querying/data/preparation/modulation.py | 4 +- .../querying/visitors/query_condition.py | 34 ++++- .../querying/visitors/query_expression.py | 72 ++++++++++- .../querying/visitors/query_modulator.py | 117 ------------------ 10 files changed, 121 insertions(+), 126 deletions(-) create mode 100644 src/sentry/sentry_metrics/querying/data/modulation/__init__.py rename src/sentry/sentry_metrics/querying/{visitors => data/modulation}/modulator.py (80%) delete mode 100644 src/sentry/sentry_metrics/querying/visitors/query_modulator.py diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index ad048916f6829a..86ec461d7e2315 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -9,6 +9,7 @@ from sentry.models.organization import Organization from sentry.models.project import Project from sentry.sentry_metrics.querying.data.execution import QueryExecutor, QueryResult +from sentry.sentry_metrics.querying.data.modulation.modulator import Project2ProjectIDModulator from sentry.sentry_metrics.querying.data.parsing import QueryParser from sentry.sentry_metrics.querying.data.postprocessing.base import run_postprocessing_steps from sentry.sentry_metrics.querying.data.postprocessing.demodulation import QueryDemodulationStep @@ -22,7 +23,6 @@ ) from sentry.sentry_metrics.querying.data.query import MQLQueriesResult, MQLQuery from sentry.sentry_metrics.querying.types import QueryType -from sentry.sentry_metrics.querying.visitors.modulator import Project2ProjectIDModulator modulators = [Project2ProjectIDModulator(from_key="project", to_key="project_id")] diff --git a/src/sentry/sentry_metrics/querying/data/execution.py b/src/sentry/sentry_metrics/querying/data/execution.py index 4e6b381483b2bd..054e63c27b06a1 100644 --- a/src/sentry/sentry_metrics/querying/data/execution.py +++ b/src/sentry/sentry_metrics/querying/data/execution.py @@ -11,6 +11,7 @@ from sentry.models.organization import Organization from sentry.models.project import Project from sentry.sentry_metrics.querying.constants import SNUBA_QUERY_LIMIT +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery from sentry.sentry_metrics.querying.data.utils import adjust_time_bounds_with_interval from sentry.sentry_metrics.querying.errors import ( @@ -24,7 +25,6 @@ TimeseriesConditionInjectionVisitor, UsedGroupBysVisitor, ) -from sentry.sentry_metrics.querying.visitors.modulator import Modulator from sentry.sentry_metrics.visibility import get_metrics_blocking_state from sentry.snuba.dataset import Dataset from sentry.snuba.metrics_layer.query import bulk_run_query diff --git a/src/sentry/sentry_metrics/querying/data/modulation/__init__.py b/src/sentry/sentry_metrics/querying/data/modulation/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/src/sentry/sentry_metrics/querying/visitors/modulator.py b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py similarity index 80% rename from src/sentry/sentry_metrics/querying/visitors/modulator.py rename to src/sentry/sentry_metrics/querying/data/modulation/modulator.py index bd4a1f5a2cb422..9b4cec12a3a24e 100644 --- a/src/sentry/sentry_metrics/querying/visitors/modulator.py +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py @@ -42,3 +42,15 @@ def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: self.value_map[formula] = project.slug return self.value_map[formula] + + +def find_modulator( + modulators: Sequence[Modulator], from_key: str = None, to_key: str = None +) -> Modulator: + for modulator in modulators: + if from_key: + if modulator.from_key == from_key: + return modulator + if to_key: + if modulator.to_key == to_key: + return modulator diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index c7d2e49639cc88..4f452ea60ea450 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -3,8 +3,8 @@ from sentry.models.project import Project from sentry.sentry_metrics.querying.data.execution import QueryResult +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator from sentry.sentry_metrics.querying.data.postprocessing.base import PostProcessingStep -from sentry.sentry_metrics.querying.visitors.modulator import Modulator class QueryDemodulationStep(PostProcessingStep): diff --git a/src/sentry/sentry_metrics/querying/data/preparation/base.py b/src/sentry/sentry_metrics/querying/data/preparation/base.py index 08793f4aa6392f..39a2519bab571c 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/base.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/base.py @@ -3,9 +3,9 @@ from snuba_sdk import MetricsQuery +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator from sentry.sentry_metrics.querying.types import QueryOrder from sentry.sentry_metrics.querying.units import MeasurementUnit, UnitFamily -from sentry.sentry_metrics.querying.visitors.modulator import Modulator @dataclass(frozen=True) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py index c5e0645032942c..eade4318751538 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -2,9 +2,9 @@ from dataclasses import replace from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery, PreparationStep -from sentry.sentry_metrics.querying.visitors.modulator import Modulator -from sentry.sentry_metrics.querying.visitors.query_modulator import ModulatorVisitor +from sentry.sentry_metrics.querying.visitors.query_expression import ModulatorVisitor class QueryModulationStep(PreparationStep): diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index 41d8c751cb2427..c05d5e4c016d2b 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -1,12 +1,14 @@ from collections.abc import Mapping, Sequence from snuba_sdk import BooleanCondition, BooleanOp, Column, Condition, Op +from snuba_sdk.expressions import ScalarType from sentry.api.serializers import bulk_fetch_project_latest_releases from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import LatestReleaseNotFoundError from sentry.sentry_metrics.querying.types import QueryCondition -from sentry.sentry_metrics.querying.visitors.base import QueryConditionVisitor +from sentry.sentry_metrics.querying.visitors.base import QueryConditionVisitor, TVisited class LatestReleaseTransformationVisitor(QueryConditionVisitor[QueryCondition]): @@ -125,3 +127,33 @@ def _extract_project_id(self, project_slug: str) -> str: for project in self._projects: if project.slug == project_slug: return project.id + + +class ModulatorConditionVisitor(QueryConditionVisitor): + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self._projects = projects + self.modulators = modulators + self.applied_modulators = [] + + def _visit_condition(self, condition: Condition) -> TVisited: + lhs = condition.lhs + rhs = condition.rhs + + if isinstance(lhs, Column): + modulator = find_modulator(self.modulators, lhs.name) + if modulator: + new_lhs = Column(modulator.to_key) + self.applied_modulators.append(modulator) + + if isinstance(rhs, ScalarType): + new_rhs = modulator.modulate(rhs, self._projects) + return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) + + return condition + + def _visit_boolean_condition(self, boolean_condition: BooleanCondition) -> TVisited: + conditions = [] + for condition in boolean_condition.conditions: + conditions.append(self.visit(condition)) + + return BooleanCondition(op=boolean_condition.op, conditions=conditions) diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 00f39e59efa44b..d21a202ea744ba 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -4,7 +4,9 @@ from snuba_sdk.conditions import ConditionGroup from sentry.models.environment import Environment +from sentry.models.project import Project from sentry.sentry_metrics.querying.constants import COEFFICIENT_OPERATORS +from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import InvalidMetricsQueryError from sentry.sentry_metrics.querying.types import QueryExpression from sentry.sentry_metrics.querying.units import ( @@ -19,9 +21,9 @@ from sentry.sentry_metrics.querying.visitors.base import ( QueryConditionVisitor, QueryExpressionVisitor, + TVisited, ) -from sentry.sentry_metrics.querying.visitors.modulator import Modulator -from sentry.sentry_metrics.querying.visitors.query_modulator import find_modulator +from sentry.sentry_metrics.querying.visitors.query_condition import ModulatorConditionVisitor from sentry.snuba.metrics import parse_mri @@ -452,3 +454,69 @@ def _visit_float(self, float_number: float) -> QueryExpression: def _is_numeric_scalar(self, value: QueryExpression) -> bool: return isinstance(value, int) or isinstance(value, float) + + +class ModulatorVisitor(QueryExpressionVisitor): + """ + Visitor that recursively transforms the QueryExpression components to modulate certain attributes to be queried + by API that need to be translated for Snuba to be able to query the data. + """ + + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self._projects = projects + self.modulators = modulators + self.applied_modulators = [] + + def _visit_formula(self, formula: Formula) -> TVisited: + formula = super()._visit_formula(formula) + + filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( + formula.filters + ) + formula = formula.set_filters(filters) + + if formula.groupby: + new_group_bys = [] + for group in formula.groupby: + new_group = group + if isinstance(group, Column): + modulator = find_modulator(self.modulators, group.name) + if modulator: + new_group = Column(name=modulator.to_key) + self.applied_modulators.append(modulator) + elif isinstance(group, AliasedExpression): + modulator = find_modulator(self.modulators, group.exp.name) + if modulator: + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + self.applied_modulators.append(modulator) + new_group_bys.append(new_group) + formula = formula.set_groupby(new_group_bys) + return formula + + def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: + filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( + timeseries.filters + ) + timeseries = timeseries.set_filters(filters) + + if timeseries.groupby: + new_group_bys = [] + for group in timeseries.groupby: + new_group = group + if isinstance(group, Column): + modulator = find_modulator(self.modulators, group.name) + if modulator: + new_group = Column(name=modulator.to_key) + self.applied_modulators.append(modulator) + elif isinstance(group, AliasedExpression): + modulator = find_modulator(self.modulators, group.exp.name) + if modulator: + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + self.applied_modulators.append(modulator) + new_group_bys.append(new_group) + timeseries = timeseries.set_groupby(new_group_bys) + return timeseries diff --git a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py b/src/sentry/sentry_metrics/querying/visitors/query_modulator.py deleted file mode 100644 index aff265f1a5410d..00000000000000 --- a/src/sentry/sentry_metrics/querying/visitors/query_modulator.py +++ /dev/null @@ -1,117 +0,0 @@ -from collections.abc import Sequence - -from snuba_sdk import AliasedExpression, BooleanCondition, Column, Condition, Formula, Timeseries -from snuba_sdk.expressions import ScalarType - -from sentry.models.project import Project -from sentry.sentry_metrics.querying.visitors import QueryConditionVisitor, QueryExpressionVisitor -from sentry.sentry_metrics.querying.visitors.base import TVisited -from sentry.sentry_metrics.querying.visitors.modulator import Modulator - - -def find_modulator( - modulators: Sequence[Modulator], from_key: str = None, to_key: str = None -) -> Modulator: - for modulator in modulators: - if from_key: - if modulator.from_key == from_key: - return modulator - if to_key: - if modulator.to_key == to_key: - return modulator - - -class ModulatorConditionVisitor(QueryConditionVisitor): - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): - self._projects = projects - self.modulators = modulators - self.applied_modulators = [] - - def _visit_condition(self, condition: Condition) -> TVisited: - lhs = condition.lhs - rhs = condition.rhs - - if isinstance(lhs, Column): - modulator = find_modulator(self.modulators, lhs.name) - if modulator: - new_lhs = Column(modulator.to_key) - self.applied_modulators.append(modulator) - - if isinstance(rhs, ScalarType): - new_rhs = modulator.modulate(rhs, self._projects) - return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) - - return condition - - def _visit_boolean_condition(self, boolean_condition: BooleanCondition) -> TVisited: - conditions = [] - for condition in boolean_condition.conditions: - conditions.append(self.visit(condition)) - - return BooleanCondition(op=boolean_condition.op, conditions=conditions) - - -class ModulatorVisitor(QueryExpressionVisitor): - """ - Visitor that recursively transforms the QueryExpression components to modulate certain attributes to be queried - by API that need to be translated for Snuba to be able to query the data. - """ - - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): - self._projects = projects - self.modulators = modulators - self.applied_modulators = [] - - def _visit_formula(self, formula: Formula) -> TVisited: - formula = super()._visit_formula(formula) - - filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( - formula.filters - ) - formula = formula.set_filters(filters) - - if formula.groupby: - new_group_bys = [] - for group in formula.groupby: - new_group = group - if isinstance(group, Column): - modulator = find_modulator(self.modulators, group.name) - if modulator: - new_group = Column(name=modulator.to_key) - self.applied_modulators.append(modulator) - elif isinstance(group, AliasedExpression): - modulator = find_modulator(self.modulators, group.exp.name) - if modulator: - new_group = AliasedExpression( - exp=Column(name=modulator.to_key), alias=group.alias - ) - self.applied_modulators.append(modulator) - new_group_bys.append(new_group) - formula = formula.set_groupby(new_group_bys) - return formula - - def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( - timeseries.filters - ) - timeseries = timeseries.set_filters(filters) - - if timeseries.groupby: - new_group_bys = [] - for group in timeseries.groupby: - new_group = group - if isinstance(group, Column): - modulator = find_modulator(self.modulators, group.name) - if modulator: - new_group = Column(name=modulator.to_key) - self.applied_modulators.append(modulator) - elif isinstance(group, AliasedExpression): - modulator = find_modulator(self.modulators, group.exp.name) - if modulator: - new_group = AliasedExpression( - exp=Column(name=modulator.to_key), alias=group.alias - ) - self.applied_modulators.append(modulator) - new_group_bys.append(new_group) - timeseries = timeseries.set_groupby(new_group_bys) - return timeseries From 5e6066cc93f527352bf6436d7b41d2d3bd05cbed Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 15:36:44 +0200 Subject: [PATCH 11/16] wip: inject projects directly into the visitor that needs it instead of passing it around --- .../sentry_metrics/querying/data/api.py | 5 ++- .../querying/data/modulation/modulator.py | 16 ++++++--- .../data/postprocessing/demodulation.py | 2 +- .../querying/data/preparation/modulation.py | 2 +- .../querying/visitors/__init__.py | 2 -- .../querying/visitors/query_condition.py | 34 ++----------------- .../querying/visitors/query_expression.py | 12 ++----- 7 files changed, 21 insertions(+), 52 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 86ec461d7e2315..dce3ac5694998e 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -24,8 +24,6 @@ from sentry.sentry_metrics.querying.data.query import MQLQueriesResult, MQLQuery from sentry.sentry_metrics.querying.types import QueryType -modulators = [Project2ProjectIDModulator(from_key="project", to_key="project_id")] - def run_queries( mql_queries: Sequence[MQLQuery], @@ -59,7 +57,6 @@ def run_queries( intermediate_queries = [] # We parse the query plan and obtain a series of queries. parser = QueryParser(projects=projects, environments=environments, mql_queries=mql_queries) - for query_expression, query_order, query_limit in parser.generate_queries(): intermediate_queries.append( IntermediateQuery( @@ -70,6 +67,8 @@ def run_queries( ) preparation_steps = [] + modulators = [Project2ProjectIDModulator(projects)] + if features.has( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None ): diff --git a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py index 9b4cec12a3a24e..67ed7f371514e5 100644 --- a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py @@ -27,17 +27,25 @@ def demodulate(self, formula: Formula, **kwargs) -> Formula: class Project2ProjectIDModulator(Modulator): - def modulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: + def __init__( + self, projects: Sequence[Project], from_key: str = "project", to_key: str = "project_id" + ): + self.projects = projects + self.from_key = from_key + self.to_key = to_key + self.value_map: dict[Any, Any] = defaultdict(lambda: None) + + def modulate(self, formula: Formula) -> Formula: if formula not in self.value_map: self.value_map[formula] = None - for project in projects: + for project in self.projects: if project.slug == formula: self.value_map[formula] = project.id return self.value_map[formula] - def demodulate(self, formula: Formula, projects: Sequence[Project]) -> Formula: + def demodulate(self, formula: Formula) -> Formula: if formula not in self.value_map: - for project in projects: + for project in self.projects: if project.id == formula: self.value_map[formula] = project.slug diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 4f452ea60ea450..5e0a5d2a2f1bca 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -28,7 +28,7 @@ def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mappin for result_key in element.keys(): for modulator in self.modulators: if modulator.to_key == result_key: - original_value = modulator.demodulate(element[result_key], self.projects) + original_value = modulator.demodulate(element[result_key]) updated_element[modulator.from_key] = original_value keys_to_delete.append(result_key) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py index eade4318751538..6d20b1e9cbcac8 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -15,7 +15,7 @@ def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): def _get_modulated_intermediate_query( self, intermediate_query: IntermediateQuery ) -> IntermediateQuery: - visitor = ModulatorVisitor(self.projects, self.modulators) + visitor = ModulatorVisitor(self.modulators) modulated_query = visitor.visit(intermediate_query.metrics_query.query) return replace( diff --git a/src/sentry/sentry_metrics/querying/visitors/__init__.py b/src/sentry/sentry_metrics/querying/visitors/__init__.py index bc51e74b87e40a..b4af4db58fbeae 100644 --- a/src/sentry/sentry_metrics/querying/visitors/__init__.py +++ b/src/sentry/sentry_metrics/querying/visitors/__init__.py @@ -2,7 +2,6 @@ from .query_condition import ( LatestReleaseTransformationVisitor, MappingTransformationVisitor, - ProjectToProjectIDTransformationVisitor, TagsTransformationVisitor, ) from .query_expression import ( @@ -31,5 +30,4 @@ "QueriedMetricsVisitor", "UsedGroupBysVisitor", "UnitsNormalizationVisitor", - "ProjectToProjectIDTransformationVisitor", ] diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index c05d5e4c016d2b..a6c02c2b85ae54 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -100,38 +100,8 @@ def _visit_condition(self, condition: Condition) -> QueryCondition: ) -class ProjectToProjectIDTransformationVisitor(QueryConditionVisitor[QueryCondition]): - """ - Visitor that transforms all project name conditions in the query to project IDs. Initial - use case of this is to enable the front-end to query projects directly instead of by id - """ - - def __init__(self, projects: Sequence[Project]): - self._projects = projects - - def _visit_condition(self, condition: Condition) -> QueryCondition: - if ( - isinstance(condition.lhs, Column) - and condition.lhs.name == "project" - and isinstance(condition.rhs, str) - ): - return Condition( - lhs=Column(name="project_id"), - op=condition.op, - rhs=self._extract_project_id(condition.rhs), - ) - - return condition - - def _extract_project_id(self, project_slug: str) -> str: - for project in self._projects: - if project.slug == project_slug: - return project.id - - class ModulatorConditionVisitor(QueryConditionVisitor): - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): - self._projects = projects + def __init__(self, modulators: Sequence[Modulator]): self.modulators = modulators self.applied_modulators = [] @@ -146,7 +116,7 @@ def _visit_condition(self, condition: Condition) -> TVisited: self.applied_modulators.append(modulator) if isinstance(rhs, ScalarType): - new_rhs = modulator.modulate(rhs, self._projects) + new_rhs = modulator.modulate(rhs) return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) return condition diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index d21a202ea744ba..84b438c90fcd9d 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -4,7 +4,6 @@ from snuba_sdk.conditions import ConditionGroup from sentry.models.environment import Environment -from sentry.models.project import Project from sentry.sentry_metrics.querying.constants import COEFFICIENT_OPERATORS from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import InvalidMetricsQueryError @@ -462,17 +461,14 @@ class ModulatorVisitor(QueryExpressionVisitor): by API that need to be translated for Snuba to be able to query the data. """ - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): - self._projects = projects + def __init__(self, modulators: Sequence[Modulator]): self.modulators = modulators self.applied_modulators = [] def _visit_formula(self, formula: Formula) -> TVisited: formula = super()._visit_formula(formula) - filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( - formula.filters - ) + filters = ModulatorConditionVisitor(self.modulators).visit_group(formula.filters) formula = formula.set_filters(filters) if formula.groupby: @@ -496,9 +492,7 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - filters = ModulatorConditionVisitor(self._projects, self.modulators).visit_group( - timeseries.filters - ) + filters = ModulatorConditionVisitor(self.modulators).visit_group(timeseries.filters) timeseries = timeseries.set_filters(filters) if timeseries.groupby: From 9b6e9ec95b07f9f51ae21c5afcd504389d0a70e0 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 15:42:33 +0200 Subject: [PATCH 12/16] wip: move groupby modulation into function for ModulatorVisitor --- .../sentry_metrics/querying/data/api.py | 3 +- .../querying/visitors/query_expression.py | 55 ++++++++----------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index dce3ac5694998e..3947dd787f492a 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -73,7 +73,8 @@ def run_queries( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None ): preparation_steps.append(UnitsNormalizationStep()) - preparation_steps.append(QueryModulationStep(projects, modulators)) + + preparation_steps.append(QueryModulationStep(projects, modulators)) # We run a series of preparation steps which operate on the entire list of queries. intermediate_queries = run_preparation_steps(intermediate_queries, *preparation_steps) diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 84b438c90fcd9d..4e8d652a3f886e 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -472,23 +472,9 @@ def _visit_formula(self, formula: Formula) -> TVisited: formula = formula.set_filters(filters) if formula.groupby: - new_group_bys = [] - for group in formula.groupby: - new_group = group - if isinstance(group, Column): - modulator = find_modulator(self.modulators, group.name) - if modulator: - new_group = Column(name=modulator.to_key) - self.applied_modulators.append(modulator) - elif isinstance(group, AliasedExpression): - modulator = find_modulator(self.modulators, group.exp.name) - if modulator: - new_group = AliasedExpression( - exp=Column(name=modulator.to_key), alias=group.alias - ) - self.applied_modulators.append(modulator) - new_group_bys.append(new_group) + new_group_bys = self._modulate_groupby(formula.groupby) formula = formula.set_groupby(new_group_bys) + return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: @@ -496,21 +482,26 @@ def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: timeseries = timeseries.set_filters(filters) if timeseries.groupby: - new_group_bys = [] - for group in timeseries.groupby: - new_group = group - if isinstance(group, Column): - modulator = find_modulator(self.modulators, group.name) - if modulator: - new_group = Column(name=modulator.to_key) - self.applied_modulators.append(modulator) - elif isinstance(group, AliasedExpression): - modulator = find_modulator(self.modulators, group.exp.name) - if modulator: - new_group = AliasedExpression( - exp=Column(name=modulator.to_key), alias=group.alias - ) - self.applied_modulators.append(modulator) - new_group_bys.append(new_group) + new_group_bys = self._modulate_groupby(timeseries.groupby) timeseries = timeseries.set_groupby(new_group_bys) + return timeseries + + def _modulate_groupby(self, groupby: list[Column | AliasedExpression] | None = None): + new_group_bys = [] + for group in groupby: + new_group = group + if isinstance(group, Column): + modulator = find_modulator(self.modulators, group.name) + if modulator: + new_group = Column(name=modulator.to_key) + self.applied_modulators.append(modulator) + elif isinstance(group, AliasedExpression): + modulator = find_modulator(self.modulators, group.exp.name) + if modulator: + new_group = AliasedExpression( + exp=Column(name=modulator.to_key), alias=group.alias + ) + self.applied_modulators.append(modulator) + new_group_bys.append(new_group) + return new_group_bys From 02016d9a99482926cee525117d99d8ae389488f0 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 17:08:36 +0200 Subject: [PATCH 13/16] add tests & refactor --- .../sentry_metrics/querying/data/api.py | 2 +- .../querying/data/modulation/modulator.py | 13 +-- .../data/postprocessing/demodulation.py | 2 +- .../querying/data/preparation/modulation.py | 2 +- .../querying/visitors/query_condition.py | 12 +- .../querying/visitors/query_expression.py | 12 +- .../sentry_metrics/querying/data/test_api.py | 103 +++++++++++++++++- 7 files changed, 122 insertions(+), 24 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 3947dd787f492a..1b5cc0204d26e1 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -67,7 +67,7 @@ def run_queries( ) preparation_steps = [] - modulators = [Project2ProjectIDModulator(projects)] + modulators = [Project2ProjectIDModulator()] if features.has( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None diff --git a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py index 67ed7f371514e5..cd65d802124100 100644 --- a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py @@ -27,25 +27,22 @@ def demodulate(self, formula: Formula, **kwargs) -> Formula: class Project2ProjectIDModulator(Modulator): - def __init__( - self, projects: Sequence[Project], from_key: str = "project", to_key: str = "project_id" - ): - self.projects = projects + def __init__(self, from_key: str = "project", to_key: str = "project_id"): self.from_key = from_key self.to_key = to_key self.value_map: dict[Any, Any] = defaultdict(lambda: None) - def modulate(self, formula: Formula) -> Formula: + def modulate(self, projects: Sequence[Project], formula: Formula) -> Formula: if formula not in self.value_map: self.value_map[formula] = None - for project in self.projects: + for project in projects: if project.slug == formula: self.value_map[formula] = project.id return self.value_map[formula] - def demodulate(self, formula: Formula) -> Formula: + def demodulate(self, projects: Sequence[Project], formula: Formula) -> Formula: if formula not in self.value_map: - for project in self.projects: + for project in projects: if project.id == formula: self.value_map[formula] = project.slug diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 5e0a5d2a2f1bca..9a06e39eaf4a5f 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -28,7 +28,7 @@ def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mappin for result_key in element.keys(): for modulator in self.modulators: if modulator.to_key == result_key: - original_value = modulator.demodulate(element[result_key]) + original_value = modulator.demodulate(self.projects, element[result_key]) updated_element[modulator.from_key] = original_value keys_to_delete.append(result_key) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py index 6d20b1e9cbcac8..eade4318751538 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -15,7 +15,7 @@ def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): def _get_modulated_intermediate_query( self, intermediate_query: IntermediateQuery ) -> IntermediateQuery: - visitor = ModulatorVisitor(self.modulators) + visitor = ModulatorVisitor(self.projects, self.modulators) modulated_query = visitor.visit(intermediate_query.metrics_query.query) return replace( diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index a6c02c2b85ae54..bfd919917df360 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -1,7 +1,6 @@ from collections.abc import Mapping, Sequence from snuba_sdk import BooleanCondition, BooleanOp, Column, Condition, Op -from snuba_sdk.expressions import ScalarType from sentry.api.serializers import bulk_fetch_project_latest_releases from sentry.models.project import Project @@ -101,7 +100,8 @@ def _visit_condition(self, condition: Condition) -> QueryCondition: class ModulatorConditionVisitor(QueryConditionVisitor): - def __init__(self, modulators: Sequence[Modulator]): + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self.projects = projects self.modulators = modulators self.applied_modulators = [] @@ -115,9 +115,11 @@ def _visit_condition(self, condition: Condition) -> TVisited: new_lhs = Column(modulator.to_key) self.applied_modulators.append(modulator) - if isinstance(rhs, ScalarType): - new_rhs = modulator.modulate(rhs) - return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) + if isinstance(rhs, list): + new_rhs = [modulator.modulate(self.projects, element) for element in rhs] + else: + new_rhs = modulator.modulate(self.projects, rhs) + return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) return condition diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 4e8d652a3f886e..81cc9aa713e930 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -4,6 +4,7 @@ from snuba_sdk.conditions import ConditionGroup from sentry.models.environment import Environment +from sentry.models.project import Project from sentry.sentry_metrics.querying.constants import COEFFICIENT_OPERATORS from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import InvalidMetricsQueryError @@ -461,14 +462,17 @@ class ModulatorVisitor(QueryExpressionVisitor): by API that need to be translated for Snuba to be able to query the data. """ - def __init__(self, modulators: Sequence[Modulator]): + def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + self.projects = projects self.modulators = modulators self.applied_modulators = [] def _visit_formula(self, formula: Formula) -> TVisited: formula = super()._visit_formula(formula) - filters = ModulatorConditionVisitor(self.modulators).visit_group(formula.filters) + filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( + formula.filters + ) formula = formula.set_filters(filters) if formula.groupby: @@ -478,7 +482,9 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: - filters = ModulatorConditionVisitor(self.modulators).visit_group(timeseries.filters) + filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( + timeseries.filters + ) timeseries = timeseries.set_filters(filters) if timeseries.groupby: diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 9d996319561a6e..226c506b723090 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -1638,7 +1638,7 @@ def test_condition_using_project_name_gets_visited_correctly(self) -> None: @with_feature("organizations:ddm-metrics-api-unit-normalization") def test_groupby_using_project_name_gets_visited_correctly(self) -> None: - self.new_project = self.create_project(name="Bar Again") + self.new_project_1 = self.create_project(name="Bar Again") for value, transaction, platform, env, time in ( (1, "/hello", "android", "prod", self.now()), (3, "/hello", "android", "prod", self.now()), @@ -1648,8 +1648,8 @@ def test_groupby_using_project_name_gets_visited_correctly(self) -> None: (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), ): self.store_metric( - self.new_project.organization.id, - self.new_project.id, + self.new_project_1.organization.id, + self.new_project_1.id, "distribution", TransactionMRI.DURATION.value, { @@ -1671,16 +1671,109 @@ def test_groupby_using_project_name_gets_visited_correctly(self) -> None: end=self.now() + timedelta(hours=1, minutes=30), interval=3600, organization=self.project.organization, - projects=[self.project, self.new_project], + projects=[self.project, self.new_project_1], environments=[], referrer="metrics.data.api", ) data = results["data"] assert len(data) == 1 - assert data[0][0]["by"] == {"project": self.new_project.slug} + assert data[0][0]["by"] == {"project": self.new_project_1.slug} assert data[0][0]["series"] == [ None, self.to_reference_unit(3.0), self.to_reference_unit(5.0), ] assert data[0][0]["totals"] == self.to_reference_unit(4.0) + + @with_feature("organizations:ddm-metrics-api-unit-normalization") + def test_groupby_and_filter_by_project_name(self) -> None: + self.new_project_1 = self.create_project(name="Bar Again") + self.new_project_2 = self.create_project(name="Bar Yet Again") + for value, transaction, platform, env, time in ( + (1, "/hello", "android", "prod", self.now()), + (3, "/hello", "android", "prod", self.now()), + (5, "/hello", "android", "prod", self.now()), + (2, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (5, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + ): + self.store_metric( + self.new_project_1.organization.id, + self.new_project_1.id, + "distribution", + TransactionMRI.DURATION.value, + { + "transaction": transaction, + "platform": platform, + "environment": env, + }, + self.ts(time), + value, + UseCaseID.TRANSACTIONS, + ) + + for value, transaction, platform, env, time in ( + (1, "/hello", "android", "prod", self.now()), + (3, "/hello", "android", "prod", self.now()), + (5, "/hello", "android", "prod", self.now()), + (2, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (5, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + ): + self.store_metric( + self.new_project_2.organization.id, + self.new_project_2.id, + "distribution", + TransactionMRI.DURATION.value, + { + "transaction": transaction, + "platform": platform, + "environment": env, + }, + self.ts(time), + value, + UseCaseID.TRANSACTIONS, + ) + + mqls = [ + self.mql( + "avg", + TransactionMRI.DURATION.value, + group_by="project", + filters="project:[bar,bar-again]", + ), + self.mql( + "avg", + TransactionMRI.DURATION.value, + group_by="project", + filters="!project:bar-yet-again", + ), + self.mql( + "avg", + TransactionMRI.DURATION.value, + group_by="project", + filters="project:bar or project:bar-again", + ), + ] + for mql in mqls: + query = MQLQuery(mql) + + results = self.run_query( + mql_queries=[query], + start=self.now() - timedelta(minutes=30), + end=self.now() + timedelta(hours=1, minutes=30), + interval=3600, + organization=self.project.organization, + projects=[self.project, self.new_project_1, self.new_project_2], + environments=[], + referrer="metrics.data.api", + ) + data = results["data"] + assert len(data) == 1 + assert data[0][0]["by"] == {"project": self.new_project_1.slug} + assert data[0][0]["series"] == [ + None, + self.to_reference_unit(3.0), + self.to_reference_unit(5.0), + ] + assert data[0][0]["totals"] == self.to_reference_unit(4.0) From 0bd36b1e8031c7ca738e882a362110cbfd7d7da4 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 16 Apr 2024 17:54:58 +0200 Subject: [PATCH 14/16] wip: fix mypy errors --- src/sentry/sentry_metrics/querying/data/api.py | 15 +++++++++------ .../sentry_metrics/querying/data/execution.py | 4 ++-- .../querying/data/modulation/modulator.py | 10 ++++++---- .../querying/visitors/query_expression.py | 11 ++++++----- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index 1b5cc0204d26e1..c83731f6247a52 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -1,6 +1,5 @@ from collections.abc import Sequence from datetime import datetime -from typing import cast from snuba_sdk import MetricsQuery, MetricsScope, Rollup @@ -8,13 +7,17 @@ from sentry.models.environment import Environment from sentry.models.organization import Organization from sentry.models.project import Project -from sentry.sentry_metrics.querying.data.execution import QueryExecutor, QueryResult -from sentry.sentry_metrics.querying.data.modulation.modulator import Project2ProjectIDModulator +from sentry.sentry_metrics.querying.data.execution import QueryExecutor +from sentry.sentry_metrics.querying.data.modulation.modulator import ( + Modulator, + Project2ProjectIDModulator, +) from sentry.sentry_metrics.querying.data.parsing import QueryParser from sentry.sentry_metrics.querying.data.postprocessing.base import run_postprocessing_steps from sentry.sentry_metrics.querying.data.postprocessing.demodulation import QueryDemodulationStep from sentry.sentry_metrics.querying.data.preparation.base import ( IntermediateQuery, + PreparationStep, run_preparation_steps, ) from sentry.sentry_metrics.querying.data.preparation.modulation import QueryModulationStep @@ -66,8 +69,8 @@ def run_queries( ) ) - preparation_steps = [] - modulators = [Project2ProjectIDModulator()] + preparation_steps: list[PreparationStep] = [] + modulators: list[Modulator] = [Project2ProjectIDModulator()] if features.has( "organizations:ddm-metrics-api-unit-normalization", organization=organization, actor=None @@ -88,4 +91,4 @@ def run_queries( results = run_postprocessing_steps(results, QueryDemodulationStep(projects, modulators)) # We wrap the result in a class that exposes some utils methods to operate on results. - return MQLQueriesResult(cast(list[QueryResult], results)) + return MQLQueriesResult(results) diff --git a/src/sentry/sentry_metrics/querying/data/execution.py b/src/sentry/sentry_metrics/querying/data/execution.py index 054e63c27b06a1..d3500553d3ee25 100644 --- a/src/sentry/sentry_metrics/querying/data/execution.py +++ b/src/sentry/sentry_metrics/querying/data/execution.py @@ -791,7 +791,7 @@ def _execution_loop(self): while continue_execution: continue_execution = self._bulk_execute() - def execute(self) -> Sequence[QueryResult]: + def execute(self) -> list[QueryResult]: """ Executes the scheduled queries in the execution loop. @@ -815,7 +815,7 @@ def execute(self) -> Sequence[QueryResult]: "Not all queries were executed in the execution loop" ) - return cast(Sequence[QueryResult], self._query_results) + return cast(list[QueryResult], self._query_results) def schedule(self, intermediate_query: IntermediateQuery, query_type: QueryType): """ diff --git a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py index cd65d802124100..f1f2d201b9f7da 100644 --- a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py @@ -18,11 +18,11 @@ def __hash__(self): return hash((self.from_key, self.to_key)) @abc.abstractmethod - def modulate(self, formula: Formula, **kwargs) -> Formula: + def modulate(self, projects: Sequence[Project], formula: Formula, **kwargs) -> Formula: return formula @abc.abstractmethod - def demodulate(self, formula: Formula, **kwargs) -> Formula: + def demodulate(self, projects: Sequence[Project], formula: Formula, **kwargs) -> Formula: return formula @@ -50,8 +50,8 @@ def demodulate(self, projects: Sequence[Project], formula: Formula) -> Formula: def find_modulator( - modulators: Sequence[Modulator], from_key: str = None, to_key: str = None -) -> Modulator: + modulators: Sequence[Modulator], from_key: str | None = None, to_key: str | None = None +) -> Modulator | None: for modulator in modulators: if from_key: if modulator.from_key == from_key: @@ -59,3 +59,5 @@ def find_modulator( if to_key: if modulator.to_key == to_key: return modulator + + return None diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 81cc9aa713e930..25eb662c6d069a 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -21,7 +21,6 @@ from sentry.sentry_metrics.querying.visitors.base import ( QueryConditionVisitor, QueryExpressionVisitor, - TVisited, ) from sentry.sentry_metrics.querying.visitors.query_condition import ModulatorConditionVisitor from sentry.snuba.metrics import parse_mri @@ -465,9 +464,9 @@ class ModulatorVisitor(QueryExpressionVisitor): def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): self.projects = projects self.modulators = modulators - self.applied_modulators = [] + self.applied_modulators: list[Modulator] = [] - def _visit_formula(self, formula: Formula) -> TVisited: + def _visit_formula(self, formula: Formula) -> Formula: formula = super()._visit_formula(formula) filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( @@ -481,7 +480,7 @@ def _visit_formula(self, formula: Formula) -> TVisited: return formula - def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: + def _visit_timeseries(self, timeseries: Timeseries) -> Timeseries: filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( timeseries.filters ) @@ -493,7 +492,9 @@ def _visit_timeseries(self, timeseries: Timeseries) -> TVisited: return timeseries - def _modulate_groupby(self, groupby: list[Column | AliasedExpression] | None = None): + def _modulate_groupby( + self, groupby: list[Column | AliasedExpression] | None = None + ) -> list[Column | AliasedExpression]: new_group_bys = [] for group in groupby: new_group = group From 304d798ff4569c2a7d28566a8a465df311459dc3 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 17 Apr 2024 10:58:21 +0200 Subject: [PATCH 15/16] add more tests & refactor --- .../sentry_metrics/querying/data/api.py | 10 +- .../data/modulation/modulation_value_map.py | 13 ++ .../querying/data/modulation/modulator.py | 51 +++++-- .../data/postprocessing/demodulation.py | 26 +++- .../querying/data/preparation/modulation.py | 13 +- .../querying/visitors/query_condition.py | 18 ++- .../querying/visitors/query_expression.py | 23 ++- .../sentry_metrics/querying/data/test_api.py | 132 ++++++++++++++++-- 8 files changed, 237 insertions(+), 49 deletions(-) create mode 100644 src/sentry/sentry_metrics/querying/data/modulation/modulation_value_map.py diff --git a/src/sentry/sentry_metrics/querying/data/api.py b/src/sentry/sentry_metrics/querying/data/api.py index c83731f6247a52..a9cefc4c0fff2e 100644 --- a/src/sentry/sentry_metrics/querying/data/api.py +++ b/src/sentry/sentry_metrics/querying/data/api.py @@ -8,6 +8,9 @@ from sentry.models.organization import Organization from sentry.models.project import Project from sentry.sentry_metrics.querying.data.execution import QueryExecutor +from sentry.sentry_metrics.querying.data.modulation.modulation_value_map import ( + QueryModulationValueMap, +) from sentry.sentry_metrics.querying.data.modulation.modulator import ( Modulator, Project2ProjectIDModulator, @@ -70,6 +73,7 @@ def run_queries( ) preparation_steps: list[PreparationStep] = [] + modulation_value_map = QueryModulationValueMap() modulators: list[Modulator] = [Project2ProjectIDModulator()] if features.has( @@ -77,7 +81,7 @@ def run_queries( ): preparation_steps.append(UnitsNormalizationStep()) - preparation_steps.append(QueryModulationStep(projects, modulators)) + preparation_steps.append(QueryModulationStep(projects, modulators, modulation_value_map)) # We run a series of preparation steps which operate on the entire list of queries. intermediate_queries = run_preparation_steps(intermediate_queries, *preparation_steps) @@ -88,7 +92,9 @@ def run_queries( executor.schedule(intermediate_query=intermediate_query, query_type=query_type) results = executor.execute() - results = run_postprocessing_steps(results, QueryDemodulationStep(projects, modulators)) + results = run_postprocessing_steps( + results, QueryDemodulationStep(projects, modulators, modulation_value_map) + ) # We wrap the result in a class that exposes some utils methods to operate on results. return MQLQueriesResult(results) diff --git a/src/sentry/sentry_metrics/querying/data/modulation/modulation_value_map.py b/src/sentry/sentry_metrics/querying/data/modulation/modulation_value_map.py new file mode 100644 index 00000000000000..cab03e1e4dbaee --- /dev/null +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulation_value_map.py @@ -0,0 +1,13 @@ +class QueryModulationValueMap(dict): + def __init__(self, *args, **kwargs): + self.update(*args, **kwargs) + + def __getitem__(self, key): + return dict.__getitem__(self, key) + + def __setitem__(self, key, value): + dict.__setitem__(self, key, value) + + def update(self, *args, **kwargs): + for key, value in dict(*args, **kwargs).items(): + self[key] = value diff --git a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py index f1f2d201b9f7da..2e6ecab11bf0e1 100644 --- a/src/sentry/sentry_metrics/querying/data/modulation/modulator.py +++ b/src/sentry/sentry_metrics/querying/data/modulation/modulator.py @@ -1,28 +1,40 @@ import abc -from collections import defaultdict from collections.abc import Sequence -from typing import Any from snuba_sdk import Formula from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.modulation.modulation_value_map import ( + QueryModulationValueMap, +) class Modulator(abc.ABC): def __init__(self, from_key: str, to_key: str): self.from_key = from_key self.to_key = to_key - self.value_map: dict[Any, Any] = defaultdict(lambda: None) def __hash__(self): return hash((self.from_key, self.to_key)) @abc.abstractmethod - def modulate(self, projects: Sequence[Project], formula: Formula, **kwargs) -> Formula: + def modulate( + self, + projects: Sequence[Project], + value_map: QueryModulationValueMap, + formula: Formula, + **kwargs, + ) -> Formula: return formula @abc.abstractmethod - def demodulate(self, projects: Sequence[Project], formula: Formula, **kwargs) -> Formula: + def demodulate( + self, + projects: Sequence[Project], + value_map: QueryModulationValueMap, + formula: Formula, + **kwargs, + ) -> Formula: return formula @@ -30,23 +42,32 @@ class Project2ProjectIDModulator(Modulator): def __init__(self, from_key: str = "project", to_key: str = "project_id"): self.from_key = from_key self.to_key = to_key - self.value_map: dict[Any, Any] = defaultdict(lambda: None) - def modulate(self, projects: Sequence[Project], formula: Formula) -> Formula: - if formula not in self.value_map: - self.value_map[formula] = None + def modulate( + self, + projects: Sequence[Project], + value_map: QueryModulationValueMap, + formula: Formula, + ) -> Formula: + if formula not in value_map: + value_map[formula] = None for project in projects: if project.slug == formula: - self.value_map[formula] = project.id - return self.value_map[formula] + value_map[formula] = project.id + return value_map[formula] - def demodulate(self, projects: Sequence[Project], formula: Formula) -> Formula: - if formula not in self.value_map: + def demodulate( + self, + projects: Sequence[Project], + value_map: QueryModulationValueMap, + formula: Formula, + ) -> Formula: + if formula not in value_map: for project in projects: if project.id == formula: - self.value_map[formula] = project.slug + value_map[formula] = project.slug - return self.value_map[formula] + return value_map[formula] def find_modulator( diff --git a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py index 9a06e39eaf4a5f..85fb6455f20289 100644 --- a/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py +++ b/src/sentry/sentry_metrics/querying/data/postprocessing/demodulation.py @@ -8,27 +8,41 @@ class QueryDemodulationStep(PostProcessingStep): - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + def __init__( + self, + projects: Sequence[Project], + modulators: Sequence[Modulator], + value_map: dict[Any, Any], + ): self.projects = projects self.modulators = modulators + self.value_map = value_map def run(self, query_results: list[QueryResult]) -> list[QueryResult]: for query_result in query_results: if query_result.totals: - query_result.totals = self._demodulate_data(query_result.totals) + query_result.totals = self._demodulate_data( + query_result.totals, query_result.totals_query.modulators + ) if query_result.series: - query_result.series = self._demodulate_data(query_result.series) + query_result.series = self._demodulate_data( + query_result.series, query_result.series_query.modulators + ) return query_results - def _demodulate_data(self, data: Sequence[Mapping[str, Any]]) -> Sequence[Mapping[str, Any]]: + def _demodulate_data( + self, data: Sequence[Mapping[str, Any]], modulators: list[Modulator] + ) -> Sequence[Mapping[str, Any]]: for element in data: updated_element = dict() keys_to_delete = [] for result_key in element.keys(): - for modulator in self.modulators: + for modulator in modulators: if modulator.to_key == result_key: - original_value = modulator.demodulate(self.projects, element[result_key]) + original_value = modulator.demodulate( + self.projects, self.value_map, element[result_key] + ) updated_element[modulator.from_key] = original_value keys_to_delete.append(result_key) diff --git a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py index eade4318751538..a0b005b2d8a8ba 100644 --- a/src/sentry/sentry_metrics/querying/data/preparation/modulation.py +++ b/src/sentry/sentry_metrics/querying/data/preparation/modulation.py @@ -2,20 +2,29 @@ from dataclasses import replace from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.modulation.modulation_value_map import ( + QueryModulationValueMap, +) from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator from sentry.sentry_metrics.querying.data.preparation.base import IntermediateQuery, PreparationStep from sentry.sentry_metrics.querying.visitors.query_expression import ModulatorVisitor class QueryModulationStep(PreparationStep): - def __init__(self, projects: Sequence[Project], modulators: list[Modulator]): + def __init__( + self, + projects: Sequence[Project], + modulators: list[Modulator], + value_map: QueryModulationValueMap, + ): self.projects = projects self.modulators = modulators + self.value_map = value_map def _get_modulated_intermediate_query( self, intermediate_query: IntermediateQuery ) -> IntermediateQuery: - visitor = ModulatorVisitor(self.projects, self.modulators) + visitor = ModulatorVisitor(self.projects, self.modulators, self.value_map) modulated_query = visitor.visit(intermediate_query.metrics_query.query) return replace( diff --git a/src/sentry/sentry_metrics/querying/visitors/query_condition.py b/src/sentry/sentry_metrics/querying/visitors/query_condition.py index bfd919917df360..4c5852f3d9179d 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_condition.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_condition.py @@ -4,6 +4,9 @@ from sentry.api.serializers import bulk_fetch_project_latest_releases from sentry.models.project import Project +from sentry.sentry_metrics.querying.data.modulation.modulation_value_map import ( + QueryModulationValueMap, +) from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import LatestReleaseNotFoundError from sentry.sentry_metrics.querying.types import QueryCondition @@ -100,10 +103,16 @@ def _visit_condition(self, condition: Condition) -> QueryCondition: class ModulatorConditionVisitor(QueryConditionVisitor): - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + def __init__( + self, + projects: Sequence[Project], + modulators: Sequence[Modulator], + value_map: QueryModulationValueMap, + ): self.projects = projects self.modulators = modulators self.applied_modulators = [] + self.value_map = value_map def _visit_condition(self, condition: Condition) -> TVisited: lhs = condition.lhs @@ -116,9 +125,12 @@ def _visit_condition(self, condition: Condition) -> TVisited: self.applied_modulators.append(modulator) if isinstance(rhs, list): - new_rhs = [modulator.modulate(self.projects, element) for element in rhs] + new_rhs = [ + modulator.modulate(self.projects, self.value_map, element) + for element in rhs + ] else: - new_rhs = modulator.modulate(self.projects, rhs) + new_rhs = modulator.modulate(self.projects, self.value_map, rhs) return Condition(lhs=new_lhs, op=condition.op, rhs=new_rhs) return condition diff --git a/src/sentry/sentry_metrics/querying/visitors/query_expression.py b/src/sentry/sentry_metrics/querying/visitors/query_expression.py index 25eb662c6d069a..ed05ec28eb9e39 100644 --- a/src/sentry/sentry_metrics/querying/visitors/query_expression.py +++ b/src/sentry/sentry_metrics/querying/visitors/query_expression.py @@ -6,6 +6,9 @@ from sentry.models.environment import Environment from sentry.models.project import Project from sentry.sentry_metrics.querying.constants import COEFFICIENT_OPERATORS +from sentry.sentry_metrics.querying.data.modulation.modulation_value_map import ( + QueryModulationValueMap, +) from sentry.sentry_metrics.querying.data.modulation.modulator import Modulator, find_modulator from sentry.sentry_metrics.querying.errors import InvalidMetricsQueryError from sentry.sentry_metrics.querying.types import QueryExpression @@ -461,17 +464,23 @@ class ModulatorVisitor(QueryExpressionVisitor): by API that need to be translated for Snuba to be able to query the data. """ - def __init__(self, projects: Sequence[Project], modulators: Sequence[Modulator]): + def __init__( + self, + projects: Sequence[Project], + modulators: Sequence[Modulator], + value_map: QueryModulationValueMap, + ): self.projects = projects self.modulators = modulators + self.value_map = value_map self.applied_modulators: list[Modulator] = [] def _visit_formula(self, formula: Formula) -> Formula: formula = super()._visit_formula(formula) - filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( - formula.filters - ) + filters = ModulatorConditionVisitor( + self.projects, self.modulators, self.value_map + ).visit_group(formula.filters) formula = formula.set_filters(filters) if formula.groupby: @@ -481,9 +490,9 @@ def _visit_formula(self, formula: Formula) -> Formula: return formula def _visit_timeseries(self, timeseries: Timeseries) -> Timeseries: - filters = ModulatorConditionVisitor(self.projects, self.modulators).visit_group( - timeseries.filters - ) + filters = ModulatorConditionVisitor( + self.projects, self.modulators, self.value_map + ).visit_group(timeseries.filters) timeseries = timeseries.set_filters(filters) if timeseries.groupby: diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index 226c506b723090..df22840f3c87ad 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -1638,7 +1638,7 @@ def test_condition_using_project_name_gets_visited_correctly(self) -> None: @with_feature("organizations:ddm-metrics-api-unit-normalization") def test_groupby_using_project_name_gets_visited_correctly(self) -> None: - self.new_project_1 = self.create_project(name="Bar Again") + self.new_project = self.create_project(name="Bar Again") for value, transaction, platform, env, time in ( (1, "/hello", "android", "prod", self.now()), (3, "/hello", "android", "prod", self.now()), @@ -1648,8 +1648,8 @@ def test_groupby_using_project_name_gets_visited_correctly(self) -> None: (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), ): self.store_metric( - self.new_project_1.organization.id, - self.new_project_1.id, + self.new_project.organization.id, + self.new_project.id, "distribution", TransactionMRI.DURATION.value, { @@ -1671,19 +1671,20 @@ def test_groupby_using_project_name_gets_visited_correctly(self) -> None: end=self.now() + timedelta(hours=1, minutes=30), interval=3600, organization=self.project.organization, - projects=[self.project, self.new_project_1], + projects=[self.project, self.new_project], environments=[], referrer="metrics.data.api", ) - data = results["data"] - assert len(data) == 1 - assert data[0][0]["by"] == {"project": self.new_project_1.slug} - assert data[0][0]["series"] == [ + data = results["data"][0] + data = sorted(data, key=lambda x: x["by"]["project"]) + assert len(data) == 2 + assert data[1]["by"] == {"project": self.new_project.slug} + assert data[1]["series"] == [ None, self.to_reference_unit(3.0), self.to_reference_unit(5.0), ] - assert data[0][0]["totals"] == self.to_reference_unit(4.0) + assert data[1]["totals"] == self.to_reference_unit(4.0) @with_feature("organizations:ddm-metrics-api-unit-normalization") def test_groupby_and_filter_by_project_name(self) -> None: @@ -1768,12 +1769,115 @@ def test_groupby_and_filter_by_project_name(self) -> None: environments=[], referrer="metrics.data.api", ) - data = results["data"] - assert len(data) == 1 - assert data[0][0]["by"] == {"project": self.new_project_1.slug} - assert data[0][0]["series"] == [ + data = results["data"][0] + assert len(data) == 2 + data = sorted(data, key=lambda x: x["by"]["project"]) + assert data[1]["by"] == {"project": self.new_project_1.slug} + assert data[1]["series"] == [ None, self.to_reference_unit(3.0), self.to_reference_unit(5.0), ] - assert data[0][0]["totals"] == self.to_reference_unit(4.0) + assert data[1]["totals"] == self.to_reference_unit(4.0) + + @with_feature("organizations:ddm-metrics-api-unit-normalization") + def test_groupby_using_project_id_does_not_get_demodulated_into_project(self) -> None: + self.new_project = self.create_project(name="Bar Again") + for value, transaction, platform, env, time in ( + (1, "/hello", "android", "prod", self.now()), + (3, "/hello", "android", "prod", self.now()), + (5, "/hello", "android", "prod", self.now()), + (2, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (5, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + ): + self.store_metric( + self.new_project.organization.id, + self.new_project.id, + "distribution", + TransactionMRI.DURATION.value, + { + "transaction": transaction, + "platform": platform, + "environment": env, + }, + self.ts(time), + value, + UseCaseID.TRANSACTIONS, + ) + + mql = self.mql("avg", TransactionMRI.DURATION.value, group_by="project_id") + query = MQLQuery(mql) + + results = self.run_query( + mql_queries=[query], + start=self.now() - timedelta(minutes=30), + end=self.now() + timedelta(hours=1, minutes=30), + interval=3600, + organization=self.project.organization, + projects=[self.project, self.new_project], + environments=[], + referrer="metrics.data.api", + ) + data = results["data"][0] + assert len(data) == 2 + data = sorted(data, key=lambda x: x["by"]["project_id"]) + assert data[0]["by"] == {"project_id": self.project.id} + assert data[1]["by"] == {"project_id": self.new_project.id} + assert data[1]["series"] == [ + None, + self.to_reference_unit(3.0), + self.to_reference_unit(5.0), + ] + assert data[1]["totals"] == self.to_reference_unit(4.0) + + @with_feature("organizations:ddm-metrics-api-unit-normalization") + def test_only_specific_queries_get_modulated_and_demodulated(self) -> None: + self.new_project = self.create_project(name="Bar Again") + for value, transaction, platform, env, time in ( + (1, "/hello", "android", "prod", self.now()), + (3, "/hello", "android", "prod", self.now()), + (5, "/hello", "android", "prod", self.now()), + (2, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (5, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + (8, "/hello", "android", "prod", self.now() + timedelta(hours=1, minutes=30)), + ): + self.store_metric( + self.new_project.organization.id, + self.new_project.id, + "distribution", + TransactionMRI.DURATION.value, + { + "transaction": transaction, + "platform": platform, + "environment": env, + }, + self.ts(time), + value, + UseCaseID.TRANSACTIONS, + ) + + mql_1 = self.mql("avg", TransactionMRI.DURATION.value, group_by="project_id") + mql_2 = self.mql("avg", TransactionMRI.DURATION.value, group_by="project") + query_1 = MQLQuery(mql_1) + query_2 = MQLQuery(mql_2) + + results = self.run_query( + mql_queries=[query_1, query_2], + start=self.now() - timedelta(minutes=30), + end=self.now() + timedelta(hours=1, minutes=30), + interval=3600, + organization=self.project.organization, + projects=[self.project, self.new_project], + environments=[], + referrer="metrics.data.api", + ) + data_1 = results["data"][0] + data_1 = sorted(data_1, key=lambda x: x["by"]["project_id"]) + data_2 = results["data"][1] + data_2 = sorted(data_2, key=lambda x: x["by"]["project"]) + + assert data_1[0]["by"] == {"project_id": self.project.id} + assert data_1[1]["by"] == {"project_id": self.new_project.id} + assert data_2[0]["by"] == {"project": self.project.slug} + assert data_2[1]["by"] == {"project": self.new_project.slug} From 10cf8efcae693cdd43f7e46a667a90e09ebfeace Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 17 Apr 2024 10:59:22 +0200 Subject: [PATCH 16/16] remove comment --- tests/sentry/sentry_metrics/querying/data/test_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sentry/sentry_metrics/querying/data/test_api.py b/tests/sentry/sentry_metrics/querying/data/test_api.py index df22840f3c87ad..218b780dfc3397 100644 --- a/tests/sentry/sentry_metrics/querying/data/test_api.py +++ b/tests/sentry/sentry_metrics/querying/data/test_api.py @@ -110,7 +110,6 @@ def to_reference_unit( _, _, unit = unit_family_and_unit return unit.convert(value) - # why do we still pipe this through? seems redundant def run_query( self, mql_queries: Sequence[MQLQuery],