Skip to content

Commit 4c5824d

Browse files
authored
feat(tags): Add dataset param to OrganizationTagKeyValues endpoint (#74525)
This PR adds a dataset parameter to the OrganizationTagKeyValues endpoint, allowing full control over which dataset will be queried. Our frontend currently queries the Discover dataset to retrieve tag values for the issue stream - we need this control to query only the Events and IssuePlatform datasets.
1 parent 8995f0e commit 4c5824d

File tree

4 files changed

+150
-5
lines changed

4 files changed

+150
-5
lines changed

src/sentry/api/endpoints/organization_tagkey_values.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sentry.api.paginator import SequencePaginator
1111
from sentry.api.serializers import serialize
1212
from sentry.api.utils import handle_query_errors
13+
from sentry.snuba.dataset import Dataset
1314
from sentry.tagstore.base import TAG_KEY_RE
1415

1516

@@ -31,6 +32,13 @@ def get(self, request: Request, organization, key) -> Response:
3132

3233
sentry_sdk.set_tag("query.tag_key", key)
3334

35+
dataset = None
36+
if request.GET.get("dataset"):
37+
try:
38+
dataset = Dataset(request.GET.get("dataset"))
39+
except ValueError:
40+
raise ParseError(detail="Invalid dataset parameter")
41+
3442
try:
3543
# still used by events v1 which doesn't require global views
3644
filter_params = self.get_snuba_params(request, organization, check_global_views=False)
@@ -47,6 +55,7 @@ def get(self, request: Request, organization, key) -> Response:
4755
key,
4856
filter_params["start"],
4957
filter_params["end"],
58+
dataset=dataset,
5059
query=request.GET.get("query"),
5160
order_by=validate_sort_field(request.GET.get("sort", "-last_seen")),
5261
include_transactions=request.GET.get("includeTransactions") == "1",

src/sentry/tagstore/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ def get_tag_value_paginator_for_projects(
232232
key,
233233
start=None,
234234
end=None,
235+
dataset: Dataset | None = None,
235236
query=None,
236237
order_by="-last_seen",
237238
include_transactions: bool = False,

src/sentry/tagstore/snuba/backend.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,7 @@ def get_tag_value_paginator_for_projects(
11761176
key,
11771177
start=None,
11781178
end=None,
1179+
dataset: Dataset = None,
11791180
query: str | None = None,
11801181
order_by="-last_seen",
11811182
include_transactions: bool = False,
@@ -1193,11 +1194,12 @@ def get_tag_value_paginator_for_projects(
11931194
if order_by == "-count":
11941195
order_by = "-times_seen"
11951196

1196-
dataset = Dataset.Events
1197-
if include_transactions:
1198-
dataset = Dataset.Discover
1199-
if include_replays:
1200-
dataset = Dataset.Replays
1197+
if not dataset:
1198+
dataset = Dataset.Events
1199+
if include_transactions:
1200+
dataset = Dataset.Discover
1201+
if include_replays:
1202+
dataset = Dataset.Replays
12011203

12021204
snuba_key = snuba.get_snuba_column_name(key, dataset=dataset)
12031205

tests/snuba/api/endpoints/test_organization_tagkey_values.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
from sentry.replays.testutils import mock_replay
99
from sentry.search.events.constants import RELEASE_ALIAS, SEMVER_ALIAS
10+
from sentry.snuba.dataset import Dataset
1011
from sentry.testutils.cases import APITestCase, ReplaysSnubaTestCase, SnubaTestCase
1112
from sentry.testutils.helpers.datetime import before_now, iso_format
1213
from sentry.utils.samples import load_data
14+
from tests.sentry.issues.test_utils import OccurrenceTestMixin
1315

1416

1517
class OrganizationTagKeyTestCase(APITestCase, SnubaTestCase):
@@ -808,3 +810,134 @@ def test_schema(self):
808810
"name",
809811
"value",
810812
]
813+
814+
815+
class DatasetParamOrganizationTagKeyValuesTest(OrganizationTagKeyTestCase, OccurrenceTestMixin):
816+
def setUp(self):
817+
super().setUp()
818+
819+
def run_dataset_test(self, key, expected, dataset: Dataset, **kwargs):
820+
# all tests here require that we search in transactions so make that the default here
821+
qs_params = kwargs.get("qs_params", {})
822+
qs_params["dataset"] = dataset.value
823+
kwargs["qs_params"] = qs_params
824+
super().run_test(key, expected, **kwargs)
825+
826+
def test_dataset_events(self):
827+
self.store_event(
828+
data={
829+
"event_id": "a" * 32,
830+
"tags": {"berry": "raspberry"},
831+
"timestamp": iso_format(self.min_ago),
832+
},
833+
project_id=self.project.id,
834+
)
835+
self.store_event(
836+
data={
837+
"event_id": "b" * 32,
838+
"tags": {"berry": "blueberry"},
839+
"timestamp": iso_format(self.min_ago),
840+
},
841+
project_id=self.project.id,
842+
)
843+
self.store_event(
844+
data={
845+
"event_id": "c" * 32,
846+
"tags": {"berry": "banana"},
847+
"timestamp": iso_format(self.min_ago),
848+
},
849+
project_id=self.project.id,
850+
)
851+
self.store_event(
852+
data={
853+
"event_id": "d" * 32,
854+
"tags": {"berry": "banana"},
855+
"timestamp": iso_format(self.min_ago),
856+
},
857+
project_id=self.project.id,
858+
)
859+
# Should appear in Events and Discover datasets, but not IssuePlatform
860+
self.run_dataset_test(
861+
"berry",
862+
expected=[("raspberry", 1), ("blueberry", 1), ("banana", 2)],
863+
dataset=Dataset.Events,
864+
)
865+
self.run_dataset_test(
866+
"berry",
867+
expected=[("raspberry", 1), ("blueberry", 1), ("banana", 2)],
868+
dataset=Dataset.Discover,
869+
)
870+
self.run_dataset_test(
871+
"berry",
872+
expected=[],
873+
dataset=Dataset.IssuePlatform,
874+
)
875+
876+
def test_dataset_issue_platform(self):
877+
self.store_event(
878+
data={
879+
"event_id": "a" * 32,
880+
"tags": {"stone_fruit": "peach"},
881+
"timestamp": iso_format(self.min_ago),
882+
},
883+
project_id=self.project.id,
884+
)
885+
self.process_occurrence(
886+
event_id=uuid.uuid4().hex,
887+
project_id=self.project.id,
888+
event_data={
889+
"title": "some problem",
890+
"platform": "python",
891+
"tags": {"stone_fruit": "cherry"},
892+
"timestamp": iso_format(self.min_ago),
893+
"received": iso_format(self.min_ago),
894+
},
895+
)
896+
897+
# (stone_fruit: cherry) should appear in IssuePlatform dataset,
898+
# but (sonte_fruit: peach) should not
899+
self.run_dataset_test(
900+
"stone_fruit",
901+
expected=[("cherry", 1)],
902+
dataset=Dataset.IssuePlatform,
903+
)
904+
self.run_dataset_test(
905+
"stone_fruit",
906+
expected=[("peach", 1)],
907+
dataset=Dataset.Events,
908+
)
909+
self.run_dataset_test(
910+
"stone_fruit",
911+
expected=[("peach", 1)],
912+
dataset=Dataset.Discover,
913+
)
914+
915+
def test_dataset_discover(self):
916+
event = load_data("transaction")
917+
event["tags"].extend([["fake_fruit", "tomato"]])
918+
event.update(
919+
{
920+
"transaction": "example_transaction",
921+
"event_id": uuid.uuid4().hex,
922+
"start_timestamp": iso_format(self.min_ago),
923+
"timestamp": iso_format(self.min_ago),
924+
}
925+
)
926+
event["measurements"]["lcp"]["value"] = 5000
927+
self.store_event(data=event, project_id=self.project.id)
928+
929+
self.run_dataset_test(
930+
"fake_fruit",
931+
expected=[],
932+
dataset=Dataset.IssuePlatform,
933+
)
934+
self.run_dataset_test(
935+
"fake_fruit",
936+
expected=[],
937+
dataset=Dataset.Events,
938+
)
939+
self.run_dataset_test(
940+
"fake_fruit",
941+
expected=[("tomato", 1)],
942+
dataset=Dataset.Discover,
943+
)

0 commit comments

Comments
 (0)