Skip to content

Commit 617d44b

Browse files
authored
ref(seer-grouping): Adjust types for group_id to hash switch (#70070)
This makes a few changes and additions to our types for Seer similar issue requests, in preparation for switching from sending the group id to sending the group hash. Specifically: - Make `group_id` optional in Seer request and response types, and add an optional `group_hash` property to both. - Add a `SeerSimilarIssueData` dataclass, to hold data from Seer about a single similar issue along with the issue's group id. Notes: - Though right now the contents of `SeerSimilarIssueData` is the same shape as that in `SimilarIssuesEmbeddingsData`, I chose to create a new type rather than reuse the existing one because once we make the `group_id` to `group_hash` switch, they will differ, in that the data which comes back from Seer will have the group's hash and the data we pass around will have the group's id. - I changed the name of `SimilarIssuesEmbeddingsData` to `RawSeerSimilarIssueData` to match the new dataclass. I'm not wedded to these names (both `SeerSimilarIssueData` and `RawSeerSimilarIssueData`) and open to suggestions here, but I specifically went away from the `SimilarIssuesEmbeddingsXXXXX` pattern because a) it made them easier to distinguish from the request and reaponse types, and b) I needed a name which indicated that the data is about a single similar issue rather than all of the similar issues and simply changing "Issues" to "Issue" wasn't obvious enough, and c) in `SimilarIssuesEmbeddingsXXXXX`, the "Issues" is really part of the "similar issues" descriptor on "embeddings", not something naming the contents of the data structure, so I needed "issue" to appear later on in the phrase. I could have gone with `SimilarIssuesEmbeddingsIssueData` and `RawSimilarIssuesEmbeddingsIssueData`, but those seemed a little cumbersome. I could be talked into it, though. - To keep things manageable, I'm going to do the switch to actually using `SeerSimilarIssueData` in a separate PR. [UPDATE: Done in #70240.]
1 parent 44b2e82 commit 617d44b

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

src/sentry/api/endpoints/group_similar_issues_embeddings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sentry.models.group import Group
1717
from sentry.models.user import User
1818
from sentry.seer.utils import (
19-
SimilarIssuesEmbeddingsData,
19+
RawSeerSimilarIssueData,
2020
SimilarIssuesEmbeddingsRequest,
2121
get_similar_issues_embeddings,
2222
)
@@ -108,7 +108,7 @@ class GroupSimilarIssuesEmbeddingsEndpoint(GroupEndpoint):
108108

109109
def get_formatted_results(
110110
self,
111-
similar_issues_data: Sequence[SimilarIssuesEmbeddingsData],
111+
similar_issues_data: Sequence[RawSeerSimilarIssueData],
112112
user: User | AnonymousUser,
113113
) -> Sequence[tuple[Mapping[str, Any], Mapping[str, Any]] | None]:
114114
"""

src/sentry/seer/utils.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from dataclasses import dataclass
23
from typing import NotRequired, TypedDict
34

45
import sentry_sdk
@@ -85,23 +86,36 @@ def detect_breakpoints(breakpoint_request) -> BreakpointResponse:
8586

8687

8788
class SimilarIssuesEmbeddingsRequest(TypedDict):
88-
group_id: int
8989
project_id: int
9090
stacktrace: str
9191
message: str
9292
k: NotRequired[int] # how many neighbors to find
9393
threshold: NotRequired[float]
94+
group_id: NotRequired[int] # TODO: Remove this once we stop sending it to seer
95+
group_hash: NotRequired[str] # TODO: Make this required once id -> hash change is done
9496

9597

96-
class SimilarIssuesEmbeddingsData(TypedDict):
97-
parent_group_id: int
98+
class RawSeerSimilarIssueData(TypedDict):
9899
stacktrace_distance: float
99100
message_distance: float
100101
should_group: bool
102+
parent_group_id: NotRequired[int] # TODO: Remove this once seer stops sending it
103+
parent_group_hash: NotRequired[str] # TODO: Make this required once id -> hash change is done
101104

102105

103106
class SimilarIssuesEmbeddingsResponse(TypedDict):
104-
responses: list[SimilarIssuesEmbeddingsData]
107+
responses: list[RawSeerSimilarIssueData]
108+
109+
110+
# Like the data that comes back from seer, but guaranteed to have a parent group id
111+
@dataclass
112+
class SeerSimilarIssueData:
113+
stacktrace_distance: float
114+
message_distance: float
115+
should_group: bool
116+
parent_group_id: int
117+
# TODO: See if we end up needing the hash here
118+
parent_group_hash: str | None = None
105119

106120

107121
def get_similar_issues_embeddings(

tests/sentry/api/endpoints/test_group_similar_issues_embeddings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
)
1212
from sentry.api.serializers.base import serialize
1313
from sentry.models.group import Group
14-
from sentry.seer.utils import SimilarIssuesEmbeddingsData, SimilarIssuesEmbeddingsResponse
14+
from sentry.seer.utils import RawSeerSimilarIssueData, SimilarIssuesEmbeddingsResponse
1515
from sentry.testutils.cases import APITestCase
1616
from sentry.testutils.helpers.features import with_feature
1717
from sentry.utils import json
@@ -652,13 +652,13 @@ def test_get_stacktrace_string_no_exception(self):
652652

653653
def test_get_formatted_results(self):
654654
new_group = self.create_group(project=self.project)
655-
response_1: SimilarIssuesEmbeddingsData = {
655+
response_1: RawSeerSimilarIssueData = {
656656
"message_distance": 0.05,
657657
"parent_group_id": self.similar_group.id,
658658
"should_group": True,
659659
"stacktrace_distance": 0.01,
660660
}
661-
response_2: SimilarIssuesEmbeddingsData = {
661+
response_2: RawSeerSimilarIssueData = {
662662
"message_distance": 0.49,
663663
"parent_group_id": new_group.id,
664664
"should_group": False,

0 commit comments

Comments
 (0)