Skip to content

Commit f5c74ad

Browse files
authored
ref(similarity): Manually filter times seen in backfill query (#76188)
Manually filter times seen in backfill query to prevent query from timing out Only end the backfill task for the project if the length of the unfiltered batch is 0 Remove retry on OperationalError
1 parent 3465d84 commit f5c74ad

File tree

2 files changed

+143
-66
lines changed

2 files changed

+143
-66
lines changed

src/sentry/tasks/embeddings_grouping/utils.py

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import sentry_sdk
99
from django.db.models import Q
10-
from django.db.utils import OperationalError
1110
from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable
1211
from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request
1312

@@ -122,58 +121,40 @@ def initialize_backfill(
122121

123122

124123
def _make_postgres_call_with_filter(group_id_filter: Q, project_id: int, batch_size: int):
124+
"""
125+
Return the filtered batch of group ids to be backfilled, the last group id in the raw batch,
126+
and the length of the raw batch.
127+
"""
128+
125129
groups_to_backfill_batch_raw = (
126130
Group.objects.filter(
127131
group_id_filter,
128132
project_id=project_id,
129133
type=ErrorGroupType.type_id,
130-
times_seen__gt=1,
131134
)
132-
.values_list("id", "data", "status", "last_seen")
135+
.values_list("id", "data", "status", "last_seen", "times_seen")
133136
.order_by("-id")[:batch_size]
134137
)
135138

136-
# Filter out groups that are pending deletion in memory so postgres won't make a bad query plan
139+
# Filter out groups that are pending deletion and have times_seen > 1 in memory so postgres won't make a bad query plan
137140
# Get the last queried group id while we are iterating; even if it's not valid to be backfilled
138141
# we want to keep the value to be used as an filter for the next batch
139-
groups_to_backfill_batch, batch_end_group_id = [], None
142+
groups_to_backfill_batch, batch_raw_end_group_id, backfill_batch_raw_length = [], None, 0
140143
for group in groups_to_backfill_batch_raw:
141-
if group[2] not in [
142-
GroupStatus.PENDING_DELETION,
143-
GroupStatus.DELETION_IN_PROGRESS,
144-
] and group[3] > datetime.now(UTC) - timedelta(days=90):
144+
if (
145+
group[2]
146+
not in [
147+
GroupStatus.PENDING_DELETION,
148+
GroupStatus.DELETION_IN_PROGRESS,
149+
]
150+
and group[3] > datetime.now(UTC) - timedelta(days=90)
151+
and group[4] > 1
152+
):
145153
groups_to_backfill_batch.append((group[0], group[1]))
146-
batch_end_group_id = group[0]
147-
148-
return groups_to_backfill_batch, batch_end_group_id
149-
150-
151-
def _make_postgres_call_with_retry(group_id_filter: Q, project_id: int, batch_size: int):
152-
"""
153-
Try postgres query. If it has an operational error, retry with a decreased batch size.
154-
"""
155-
try:
156-
groups_to_backfill_batch, batch_end_group_id = _make_postgres_call_with_filter(
157-
group_id_filter, project_id, batch_size
158-
)
159-
except OperationalError:
160-
batch_size = batch_size // 2
161-
try:
162-
logger.info(
163-
"tasks.backfill_seer_grouping_records.postgres_query_retry",
164-
extra={"project_id": project_id, "batch_size": batch_size},
165-
)
166-
groups_to_backfill_batch, batch_end_group_id = _make_postgres_call_with_filter(
167-
group_id_filter, project_id, batch_size
168-
)
169-
except OperationalError:
170-
logger.exception(
171-
"tasks.backfill_seer_grouping_records.postgres_query_operational_error",
172-
extra={"project_id": project_id, "batch_size": batch_size},
173-
)
174-
raise
154+
batch_raw_end_group_id = group[0]
155+
backfill_batch_raw_length += 1
175156

176-
return (groups_to_backfill_batch, batch_size, batch_end_group_id)
157+
return groups_to_backfill_batch, batch_raw_end_group_id, backfill_batch_raw_length
177158

178159

179160
@sentry_sdk.tracing.trace
@@ -184,20 +165,22 @@ def get_current_batch_groups_from_postgres(
184165
if last_processed_group_id is not None:
185166
group_id_filter = Q(id__lt=last_processed_group_id)
186167

187-
(groups_to_backfill_batch, batch_size, batch_end_group_id) = _make_postgres_call_with_retry(
188-
group_id_filter, project.id, batch_size
189-
)
190-
total_groups_to_backfill_length = len(groups_to_backfill_batch)
168+
(
169+
groups_to_backfill_batch,
170+
batch_end_group_id,
171+
backfill_batch_raw_length,
172+
) = _make_postgres_call_with_filter(group_id_filter, project.id, batch_size)
191173

192174
logger.info(
193175
"backfill_seer_grouping_records.batch",
194176
extra={
195177
"project_id": project.id,
196-
"batch_len": total_groups_to_backfill_length,
178+
"batch_len": len(groups_to_backfill_batch),
197179
"last_processed_group_id": batch_end_group_id,
198180
},
199181
)
200-
if total_groups_to_backfill_length == 0:
182+
183+
if backfill_batch_raw_length == 0:
201184
logger.info(
202185
"backfill_seer_grouping_records.no_more_groups",
203186
extra={"project_id": project.id},

tests/sentry/tasks/test_backfill_seer_grouping_records.py

Lines changed: 115 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import pytest
1111
from django.db.models import Q
12-
from django.db.utils import OperationalError
1312
from django.test import override_settings
1413
from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable
1514
from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request
@@ -1737,28 +1736,120 @@ def test_backfill_seer_grouping_records_manually_skip_project(self, mock_logger)
17371736

17381737
@with_feature("projects:similarity-embeddings-backfill")
17391738
@patch("sentry.tasks.embeddings_grouping.utils.logger")
1740-
@patch(
1741-
"sentry.tasks.embeddings_grouping.utils._make_postgres_call_with_filter",
1742-
side_effect=OperationalError,
1743-
)
1744-
def test_backfill_seer_grouping_records_postgres_exception(
1745-
self, mock_make_postgres_call_with_filter, mock_logger
1739+
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
1740+
def test_backfill_seer_grouping_records_empty_batch(
1741+
self, mock_post_bulk_grouping_records, mock_logger
17461742
):
17471743
"""
1748-
Test log after postgres query retries with decreased batch size
1744+
Test that if a backfill batch is empty due to the filtering of invalid groups, the backfill
1745+
task continues and calls the next batch.
17491746
"""
1747+
mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
1748+
project_invalid_batch = self.create_project(organization=self.organization)
17501749
batch_size = options.get("embeddings-grouping.seer.backfill-batch-size")
1751-
with pytest.raises(Exception), TaskRunner():
1752-
backfill_seer_grouping_records_for_project(self.project.id, None)
17531750

1754-
mock_logger.info.assert_called_with(
1755-
"tasks.backfill_seer_grouping_records.postgres_query_retry",
1756-
extra={"project_id": self.project.id, "batch_size": batch_size // 2},
1757-
)
1758-
mock_logger.exception.assert_called_with(
1759-
"tasks.backfill_seer_grouping_records.postgres_query_operational_error",
1760-
extra={"project_id": self.project.id, "batch_size": batch_size // 2},
1761-
)
1751+
# Create batch size valid groups
1752+
function_names = [f"another_function_{str(i)}" for i in range(batch_size)]
1753+
type_names = [f"AnotherError{str(i)}" for i in range(batch_size)]
1754+
value_names = ["error with value" for _ in range(batch_size)]
1755+
group_ids = []
1756+
for i in range(batch_size):
1757+
data = {
1758+
"exception": self.create_exception_values(
1759+
function_names[i], type_names[i], value_names[i]
1760+
),
1761+
"title": "title",
1762+
"timestamp": iso_format(before_now(seconds=10)),
1763+
}
1764+
event = self.store_event(
1765+
data=data, project_id=project_invalid_batch.id, assert_no_errors=False
1766+
)
1767+
event.group.times_seen = 2
1768+
# event.group.data["metadata"] = copy.deepcopy(default_metadata)
1769+
event.group.save()
1770+
group_ids.append(event.group.id)
1771+
group_ids.sort()
1772+
1773+
# Create batch size invalid groups (some with times seen == 1, others pending deletion)
1774+
function_names = [f"function_{str(i)}" for i in range(batch_size)]
1775+
type_names = [f"Error{str(i)}" for i in range(batch_size)]
1776+
value_names = ["error with value" for _ in range(batch_size)]
1777+
group_ids_invalid = []
1778+
for i in range(batch_size):
1779+
data = {
1780+
"exception": self.create_exception_values(
1781+
function_names[i], type_names[i], value_names[i]
1782+
),
1783+
"title": "title",
1784+
"timestamp": iso_format(before_now(seconds=10)),
1785+
}
1786+
event = self.store_event(
1787+
data=data, project_id=project_invalid_batch.id, assert_no_errors=False
1788+
)
1789+
event.group.times_seen = 1 if i < batch_size / 2 else 2
1790+
event.group.status = (
1791+
GroupStatus.PENDING_DELETION if i >= batch_size / 2 else GroupStatus.UNRESOLVED
1792+
)
1793+
event.group.save()
1794+
group_ids_invalid.append(event.group.id)
1795+
group_ids_invalid.sort()
1796+
1797+
with TaskRunner():
1798+
backfill_seer_grouping_records_for_project(project_invalid_batch.id, None)
1799+
1800+
expected_call_args_list = [
1801+
call(
1802+
"backfill_seer_grouping_records.start",
1803+
extra={"project_id": project_invalid_batch.id, "last_processed_index": None},
1804+
),
1805+
call(
1806+
"backfill_seer_grouping_records.batch",
1807+
extra={
1808+
"project_id": project_invalid_batch.id,
1809+
"batch_len": 0,
1810+
"last_processed_group_id": group_ids_invalid[0],
1811+
},
1812+
),
1813+
call(
1814+
"backfill_seer_grouping_records.start",
1815+
extra={
1816+
"project_id": project_invalid_batch.id,
1817+
"last_processed_index": group_ids_invalid[0],
1818+
},
1819+
),
1820+
call(
1821+
"backfill_seer_grouping_records.batch",
1822+
extra={
1823+
"project_id": project_invalid_batch.id,
1824+
"batch_len": batch_size,
1825+
"last_processed_group_id": group_ids[0],
1826+
},
1827+
),
1828+
call(
1829+
"backfill_seer_grouping_records.bulk_update",
1830+
extra={"project_id": project_invalid_batch.id, "num_updated": batch_size},
1831+
),
1832+
call(
1833+
"backfill_seer_grouping_records.start",
1834+
extra={
1835+
"project_id": project_invalid_batch.id,
1836+
"last_processed_index": group_ids[0],
1837+
},
1838+
),
1839+
call(
1840+
"backfill_seer_grouping_records.batch",
1841+
extra={
1842+
"project_id": project_invalid_batch.id,
1843+
"batch_len": 0,
1844+
"last_processed_group_id": None,
1845+
},
1846+
),
1847+
call(
1848+
"backfill_seer_grouping_records.no_more_groups",
1849+
extra={"project_id": project_invalid_batch.id},
1850+
),
1851+
]
1852+
assert mock_logger.info.call_args_list == expected_call_args_list
17621853

17631854
def test_make_postgres_call_with_filter_invalid(self):
17641855
"""
@@ -1779,8 +1870,11 @@ def test_make_postgres_call_with_filter_invalid(self):
17791870
event.group.save()
17801871
deleted_group = event.group
17811872

1782-
groups_to_backfill_batch, batch_end_group_id = _make_postgres_call_with_filter(
1783-
Q(), self.project.id, batch_size
1784-
)
1873+
(
1874+
groups_to_backfill_batch,
1875+
batch_end_group_id,
1876+
backfill_batch_raw_length,
1877+
) = _make_postgres_call_with_filter(Q(), self.project.id, batch_size)
17851878
assert groups_to_backfill_batch == []
17861879
assert batch_end_group_id == deleted_group.id
1880+
assert backfill_batch_raw_length == 1

0 commit comments

Comments
 (0)