From 897c7b2d277cce149536aca94329441a07c5da70 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 15 May 2024 08:21:00 -0700 Subject: [PATCH] dev(similarity): add more telemetry to backfill --- .../tasks/backfill_seer_grouping_records.py | 33 ++++++++++++------- .../test_backfill_seer_grouping_records.py | 8 ++--- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/sentry/tasks/backfill_seer_grouping_records.py b/src/sentry/tasks/backfill_seer_grouping_records.py index 5876518d6d6213..1cb423d3052ac6 100644 --- a/src/sentry/tasks/backfill_seer_grouping_records.py +++ b/src/sentry/tasks/backfill_seer_grouping_records.py @@ -33,7 +33,6 @@ from sentry.utils.snuba import bulk_snuba_queries BATCH_SIZE = 20 -SEER_BACKFILL_DELAY_PER_RECORD = 0.1 BACKFILL_NAME = "backfill_grouping_records" LAST_PROCESSED_REDIS_KEY = "grouping_record_backfill.last_processed_id" @@ -67,6 +66,14 @@ def backfill_seer_grouping_records( Task to backfill seer grouping_records table. Pass in last_processed_id = 0 if running project for the first time, else None """ + logger.info( + "backfill_seer_grouping_records.start", + extra={ + "project_id": project_id, + "last_processed_id": last_processed_id, + "dry_run": dry_run, + }, + ) project = Project.objects.get_from_cache(id=project_id) if not features.has("projects:similarity-embeddings-backfill", project): return @@ -141,13 +148,14 @@ def backfill_seer_grouping_records( project, rows, group_id_message_batch, group_hashes_dict ) - response = post_bulk_grouping_records( - CreateGroupingRecordsRequest( - group_id_list=group_id_batch, - data=data["data"], - stacktrace_list=data["stacktrace_list"], + with metrics.timer(f"{BACKFILL_NAME}.post_bulk_grouping_records", sample_rate=1.0): + response = post_bulk_grouping_records( + CreateGroupingRecordsRequest( + group_id_list=group_id_batch, + data=data["data"], + stacktrace_list=data["stacktrace_list"], + ) ) - ) if response["success"]: groups = Group.objects.filter(project_id=project.id, id__in=group_id_batch) for group in groups: @@ -174,7 +182,6 @@ def backfill_seer_grouping_records( ) # needed for typing backfill_seer_grouping_records.apply_async( args=[project.id, last_processed_id], - countdown=BATCH_SIZE * SEER_BACKFILL_DELAY_PER_RECORD, ) return @@ -204,7 +211,9 @@ def lookup_group_data_stacktrace_bulk_with_fallback( "group_id": group_id, "event_id": event_id, } - logger.info("tasks.backfill_seer_grouping_records.event_lookup_error", extra=extra) + logger.exception( + "tasks.backfill_seer_grouping_records.event_lookup_error", extra=extra + ) continue except KeyError: extra = { @@ -212,7 +221,7 @@ def lookup_group_data_stacktrace_bulk_with_fallback( "project_id": project.id, "group_id": group_id, } - logger.info("tasks.backfill_seer_grouping_records.no_group_hash", extra=extra) + logger.exception("tasks.backfill_seer_grouping_records.no_group_hash", extra=extra) continue return bulk_group_data_stacktraces @@ -249,7 +258,7 @@ def lookup_group_data_stacktrace_bulk( "group_data": json.dumps(rows), "error": e.message, } - logger.info( + logger.exception( "tasks.backfill_seer_grouping_records.bulk_event_lookup_exception", extra=extra, ) @@ -322,7 +331,7 @@ def lookup_group_data_stacktrace_single( "event_id": event_id, "error": e.message, } - logger.info( + logger.exception( "tasks.backfill_seer_grouping_records.event_lookup_exception", extra=extra ) diff --git a/tests/sentry/tasks/test_backfill_seer_grouping_records.py b/tests/sentry/tasks/test_backfill_seer_grouping_records.py index 1078e3f5421e1d..0ad6f845e7729b 100644 --- a/tests/sentry/tasks/test_backfill_seer_grouping_records.py +++ b/tests/sentry/tasks/test_backfill_seer_grouping_records.py @@ -172,7 +172,7 @@ def test_lookup_group_data_stacktrace_single_exceptions(self, mock_logger, mock_ self.group_hashes[event.group.id], ) assert (group_data, stacktrace_string) == (None, "") - mock_logger.info.assert_called_with( + mock_logger.exception.assert_called_with( "tasks.backfill_seer_grouping_records.event_lookup_exception", extra={ "organization_id": self.project.organization.id, @@ -261,7 +261,7 @@ def test_lookup_group_data_stacktrace_bulk_exceptions(self, mock_logger, mock_ge assert invalid_event_ids == set() assert bulk_group_data_stacktraces["data"] == [] assert bulk_group_data_stacktraces["stacktrace_list"] == [] - mock_logger.info.assert_called_with( + mock_logger.exception.assert_called_with( "tasks.backfill_seer_grouping_records.bulk_event_lookup_exception", extra={ "organization_id": self.project.organization.id, @@ -510,7 +510,7 @@ def test_lookup_group_data_stacktrace_bulk_with_fallback_no_hash( assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces assert bulk_group_data_stacktraces["data"] == expected_group_data assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces - mock_logger.info.assert_called_with( + mock_logger.exception.assert_called_with( "tasks.backfill_seer_grouping_records.no_group_hash", extra={ "organization_id": self.project.organization.id, @@ -550,7 +550,7 @@ def test_lookup_group_data_stacktrace_bulk_with_fallback_event_lookup_error(self ] assert bulk_group_data_stacktraces["data"] == expected_group_data assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces - mock_logger.info.assert_called_with( + mock_logger.exception.assert_called_with( "tasks.backfill_seer_grouping_records.event_lookup_error", extra={ "organization_id": self.project.organization.id,