diff --git a/src/sentry/issues/grouptype.py b/src/sentry/issues/grouptype.py index e687ebe5b3ec34..b6b189309e141f 100644 --- a/src/sentry/issues/grouptype.py +++ b/src/sentry/issues/grouptype.py @@ -247,10 +247,6 @@ class PerformanceGroupTypeDefaults: noise_config = NoiseConfig() -class CronGroupTypeDefaults: - notification_config = NotificationConfig(context=[]) - - class ReplayGroupTypeDefaults: notification_config = NotificationConfig(context=[]) @@ -518,36 +514,27 @@ class ProfileFunctionRegressionType(GroupType): @dataclass(frozen=True) -class MonitorCheckInFailure(CronGroupTypeDefaults, GroupType): +class MonitorIncidentType(GroupType): type_id = 4001 slug = "monitor_check_in_failure" - description = "Monitor Check In Failed" + description = "Crons Monitor Failed" category = GroupCategory.CRON.value released = True creation_quota = Quota(3600, 60, 60_000) # 60,000 per hour, sliding window of 60 seconds default_priority = PriorityLevel.HIGH + notification_config = NotificationConfig(context=[]) @dataclass(frozen=True) -class MonitorCheckInTimeout(CronGroupTypeDefaults, GroupType): +class MonitorCheckInTimeoutDeprecated(MonitorIncidentType, GroupType): + # This is deprecated, only kept around for it's type_id type_id = 4002 - slug = "monitor_check_in_timeout" - description = "Monitor Check In Timeout" - category = GroupCategory.CRON.value - released = True - creation_quota = Quota(3600, 60, 60_000) # 60,000 per hour, sliding window of 60 seconds - default_priority = PriorityLevel.HIGH @dataclass(frozen=True) -class MonitorCheckInMissed(CronGroupTypeDefaults, GroupType): +class MonitorCheckInMissedDeprecated(MonitorIncidentType, GroupType): + # This is deprecated, only kept around for it's type_id type_id = 4003 - slug = "monitor_check_in_missed" - description = "Monitor Check In Missed" - category = GroupCategory.CRON.value - released = True - creation_quota = Quota(3600, 60, 60_000) # 60,000 per hour, sliding window of 60 seconds - default_priority = PriorityLevel.HIGH @dataclass(frozen=True) diff --git a/src/sentry/monitors/constants.py b/src/sentry/monitors/constants.py index ca6b15bb4a9b27..8f06d3589dff4a 100644 --- a/src/sentry/monitors/constants.py +++ b/src/sentry/monitors/constants.py @@ -7,9 +7,6 @@ # current limit is 28 days MAX_TIMEOUT = 40_320 -# Format to use in the issue subtitle for the missed check-in timestamp -SUBTITLE_DATETIME_FORMAT = "%b %d, %I:%M %p %Z" - # maximum value for incident + recovery thresholds to be set # affects the performance of recent check-ins query # lowering this may invalidate monitors + block check-ins diff --git a/src/sentry/monitors/logic/mark_failed.py b/src/sentry/monitors/logic/mark_failed.py index f6df630b628bfd..73b20e3e69d4e1 100644 --- a/src/sentry/monitors/logic/mark_failed.py +++ b/src/sentry/monitors/logic/mark_failed.py @@ -7,13 +7,8 @@ from django.db.models import Q from sentry import features -from sentry.issues.grouptype import ( - MonitorCheckInFailure, - MonitorCheckInMissed, - MonitorCheckInTimeout, -) +from sentry.issues.grouptype import MonitorIncidentType from sentry.models.organization import Organization -from sentry.monitors.constants import SUBTITLE_DATETIME_FORMAT, TIMEOUT from sentry.monitors.models import ( CheckInStatus, MonitorCheckIn, @@ -243,10 +238,8 @@ def create_issue_platform_occurrence( monitor_env = failed_checkin.monitor_environment current_timestamp = datetime.now(timezone.utc) - occurrence_data = get_occurrence_data(failed_checkin) - # Get last successful check-in to show in evidence display - last_successful_checkin_timestamp = "None" + last_successful_checkin_timestamp = "Never" last_successful_checkin = monitor_env.get_last_successful_checkin() if last_successful_checkin: last_successful_checkin_timestamp = last_successful_checkin.date_added.isoformat() @@ -257,11 +250,11 @@ def create_issue_platform_occurrence( project_id=monitor_env.monitor.project_id, event_id=uuid.uuid4().hex, fingerprint=[incident.grouphash], - type=occurrence_data["group_type"], + type=MonitorIncidentType, issue_title=f"Monitor failure: {monitor_env.monitor.name}", - subtitle=occurrence_data["subtitle"], + subtitle="Your monitor has reached its failure threshold.", evidence_display=[ - IssueEvidence(name="Failure reason", value=occurrence_data["reason"], important=True), + IssueEvidence(name="Failure reason", value="incident", important=True), IssueEvidence( name="Environment", value=monitor_env.get_environment().name, important=False ), @@ -272,9 +265,9 @@ def create_issue_platform_occurrence( ), ], evidence_data={}, - culprit=occurrence_data["reason"], + culprit="incident", detection_time=current_timestamp, - level=occurrence_data["level"], + level="error", assignee=monitor_env.monitor.owner_actor, ) @@ -324,36 +317,3 @@ def get_monitor_environment_context(monitor_environment: MonitorEnvironment): "status": monitor_environment.get_status_display(), "type": monitor_environment.monitor.get_type_display(), } - - -def get_occurrence_data(checkin: MonitorCheckIn): - if checkin.status == CheckInStatus.MISSED: - expected_time = ( - checkin.expected_time.astimezone(checkin.monitor.timezone).strftime( - SUBTITLE_DATETIME_FORMAT - ) - if checkin.expected_time - else "the expected time" - ) - return { - "group_type": MonitorCheckInMissed, - "level": "warning", - "reason": "missed_checkin", - "subtitle": f"No check-in reported on {expected_time}.", - } - - if checkin.status == CheckInStatus.TIMEOUT: - duration = (checkin.monitor.config or {}).get("max_runtime") or TIMEOUT - return { - "group_type": MonitorCheckInTimeout, - "level": "error", - "reason": "duration", - "subtitle": f"Check-in exceeded maximum duration of {duration} minutes.", - } - - return { - "group_type": MonitorCheckInFailure, - "level": "error", - "reason": "error", - "subtitle": "An error occurred during the latest check-in.", - } diff --git a/tests/sentry/integrations/slack/notifications/test_issue_alert.py b/tests/sentry/integrations/slack/notifications/test_issue_alert.py index 4d56a3046f1901..55b50db848401b 100644 --- a/tests/sentry/integrations/slack/notifications/test_issue_alert.py +++ b/tests/sentry/integrations/slack/notifications/test_issue_alert.py @@ -11,7 +11,7 @@ from sentry.digests.backends.redis import RedisBackend from sentry.digests.notifications import event_to_record from sentry.integrations.slack.message_builder.issues import get_tags -from sentry.issues.grouptype import MonitorCheckInFailure +from sentry.issues.grouptype import MonitorIncidentType from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence from sentry.models.identity import Identity, IdentityStatus from sentry.models.integrations.external_actor import ExternalActor @@ -157,7 +157,7 @@ def test_crons_issue_alert_user_block(self): IssueEvidence("Evidence 2", "Value 2", False), IssueEvidence("Evidence 3", "Value 3", False), ], - MonitorCheckInFailure, + MonitorIncidentType, datetime.now(UTC), "info", "/api/123", @@ -165,7 +165,7 @@ def test_crons_issue_alert_user_block(self): occurrence.save() event.occurrence = occurrence - event.group.type = MonitorCheckInFailure.type_id + event.group.type = MonitorIncidentType.type_id notification = AlertRuleNotification( Notification(event=event, rule=self.rule), ActionTargetType.MEMBER, self.user.id ) diff --git a/tests/sentry/integrations/slack/test_message_builder.py b/tests/sentry/integrations/slack/test_message_builder.py index adb06be87ea502..7da9cfc267173a 100644 --- a/tests/sentry/integrations/slack/test_message_builder.py +++ b/tests/sentry/integrations/slack/test_message_builder.py @@ -26,7 +26,7 @@ from sentry.issues.grouptype import ( ErrorGroupType, FeedbackGroup, - MonitorCheckInFailure, + MonitorIncidentType, PerformanceP95EndpointRegressionGroupType, ProfileFileIOGroupType, ) @@ -1321,7 +1321,7 @@ def setUp(self): type=PerformanceP95EndpointRegressionGroupType.type_id ) - self.cron_issue = self.create_group(type=MonitorCheckInFailure.type_id) + self.cron_issue = self.create_group(type=MonitorIncidentType.type_id) self.feedback_issue = self.create_group( type=FeedbackGroup.type_id, substatus=GroupSubStatus.NEW ) diff --git a/tests/sentry/issues/test_ingest.py b/tests/sentry/issues/test_ingest.py index d1600b7f4a01e9..cb12627d233684 100644 --- a/tests/sentry/issues/test_ingest.py +++ b/tests/sentry/issues/test_ingest.py @@ -14,7 +14,7 @@ GroupCategory, GroupType, GroupTypeRegistry, - MonitorCheckInFailure, + MonitorIncidentType, NoiseConfig, ) from sentry.issues.ingest import ( @@ -248,7 +248,7 @@ def test_existing_group_different_category(self) -> None: new_event = self.store_event(data={}, project_id=self.project.id) new_occurrence = self.build_occurrence( - fingerprint=["some-fingerprint"], type=MonitorCheckInFailure.type_id + fingerprint=["some-fingerprint"], type=MonitorIncidentType.type_id ) with mock.patch("sentry.issues.ingest.logger") as logger: assert save_issue_from_occurrence(new_occurrence, new_event, None) is None diff --git a/tests/sentry/mail/test_adapter.py b/tests/sentry/mail/test_adapter.py index 2e2d6d9e8d0226..94e3d21653528e 100644 --- a/tests/sentry/mail/test_adapter.py +++ b/tests/sentry/mail/test_adapter.py @@ -17,7 +17,7 @@ from sentry.api.serializers.models.userreport import UserReportWithGroupSerializer from sentry.digests.notifications import build_digest, event_to_record from sentry.event_manager import EventManager, get_event_type -from sentry.issues.grouptype import MonitorCheckInFailure +from sentry.issues.grouptype import MonitorIncidentType from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence from sentry.mail import build_subject_prefix, mail_adapter from sentry.models.activity import Activity @@ -328,7 +328,7 @@ def test_simple_notification_generic(self): IssueEvidence("Evidence 2", "Value 2", False), IssueEvidence("Evidence 3", "Value 3", False), ], - MonitorCheckInFailure, + MonitorIncidentType, timezone.now(), "info", "/api/123", @@ -336,7 +336,7 @@ def test_simple_notification_generic(self): occurrence.save() event.occurrence = occurrence - event.group.type = MonitorCheckInFailure.type_id + event.group.type = MonitorIncidentType.type_id rule = Rule.objects.create(project=self.project, label="my rule") ProjectOwnership.objects.create(project_id=self.project.id, fallthrough=True) @@ -384,7 +384,7 @@ def test_simple_notification_generic_no_evidence(self): "1234", {"Test": 123}, [], # no evidence - MonitorCheckInFailure, + MonitorIncidentType, timezone.now(), "info", "/api/123", @@ -392,7 +392,7 @@ def test_simple_notification_generic_no_evidence(self): occurrence.save() event.occurrence = occurrence - event.group.type = MonitorCheckInFailure.type_id + event.group.type = MonitorIncidentType.type_id rule = Rule.objects.create(project=self.project, label="my rule") ProjectOwnership.objects.create(project_id=self.project.id, fallthrough=True) diff --git a/tests/sentry/migrations/test_0692_backfill_group_priority_again.py b/tests/sentry/migrations/test_0692_backfill_group_priority_again.py index 10355f1b98d0de..facbbd7b658420 100644 --- a/tests/sentry/migrations/test_0692_backfill_group_priority_again.py +++ b/tests/sentry/migrations/test_0692_backfill_group_priority_again.py @@ -6,7 +6,7 @@ from sentry.issues.grouptype import ( ErrorGroupType, FeedbackGroup, - MonitorCheckInFailure, + MonitorIncidentType, PerformanceConsecutiveHTTPQueriesGroupType, PerformanceP95EndpointRegressionGroupType, ReplayDeadClickType, @@ -114,7 +114,7 @@ def _create_groups_to_backfill(self, project: Project) -> None: { "status": GroupStatus.UNRESOLVED, "substatus": GroupSubStatus.ESCALATING, - "type": MonitorCheckInFailure.type_id, + "type": MonitorIncidentType.type_id, }, PriorityLevel.HIGH, ), @@ -181,7 +181,7 @@ def _create_groups_to_backfill(self, project: Project) -> None: ( "cron group with log level WARNING", { - "type": MonitorCheckInFailure.type_id, + "type": MonitorIncidentType.type_id, "level": logging.WARNING, }, PriorityLevel.MEDIUM, @@ -190,7 +190,7 @@ def _create_groups_to_backfill(self, project: Project) -> None: "cron group with log level ERROR", { "substatus": GroupSubStatus.ONGOING, - "type": MonitorCheckInFailure.type_id, + "type": MonitorIncidentType.type_id, "level": logging.ERROR, }, PriorityLevel.HIGH, @@ -198,7 +198,7 @@ def _create_groups_to_backfill(self, project: Project) -> None: ( "cron group with log level DEBUG", { - "type": MonitorCheckInFailure.type_id, + "type": MonitorIncidentType.type_id, "level": logging.DEBUG, }, PriorityLevel.HIGH, diff --git a/tests/sentry/monitors/logic/test_mark_failed.py b/tests/sentry/monitors/logic/test_mark_failed.py index df8f44dcb9b601..2d13f9069f5ecb 100644 --- a/tests/sentry/monitors/logic/test_mark_failed.py +++ b/tests/sentry/monitors/logic/test_mark_failed.py @@ -5,15 +5,10 @@ from django.utils import timezone -from sentry.issues.grouptype import ( - MonitorCheckInFailure, - MonitorCheckInMissed, - MonitorCheckInTimeout, -) +from sentry.issues.grouptype import MonitorIncidentType from sentry.issues.ingest import process_occurrence_data from sentry.models.groupassignee import GroupAssignee from sentry.models.grouphash import GroupHash -from sentry.monitors.constants import SUBTITLE_DATETIME_FORMAT from sentry.monitors.logic.mark_failed import mark_failed from sentry.monitors.models import ( CheckInStatus, @@ -283,11 +278,11 @@ def test_mark_failed_default_params_issue_platform(self, mock_produce_occurrence "project_id": self.project.id, "fingerprint": [monitor_incidents[0].grouphash], "issue_title": f"Monitor failure: {monitor.name}", - "subtitle": "An error occurred during the latest check-in.", + "subtitle": "Your monitor has reached its failure threshold.", "resource_id": None, "evidence_data": {}, "evidence_display": [ - {"name": "Failure reason", "value": "error", "important": True}, + {"name": "Failure reason", "value": "incident", "important": True}, { "name": "Environment", "value": monitor_environment.get_environment().name, @@ -299,9 +294,9 @@ def test_mark_failed_default_params_issue_platform(self, mock_produce_occurrence "important": False, }, ], - "type": MonitorCheckInFailure.type_id, + "type": MonitorIncidentType.type_id, "level": "error", - "culprit": "error", + "culprit": "incident", }, ) == dict(occurrence) @@ -341,228 +336,6 @@ def test_mark_failed_default_params_issue_platform(self, mock_produce_occurrence }, ) == dict(event) - @with_feature("organizations:issue-platform") - @patch("sentry.issues.producer.produce_occurrence_to_kafka") - def test_mark_failed_with_timeout_reason_issue_platform(self, mock_produce_occurrence_to_kafka): - monitor = Monitor.objects.create( - name="test monitor", - organization_id=self.organization.id, - project_id=self.project.id, - type=MonitorType.CRON_JOB, - config={ - "schedule": [1, "month"], - "schedule_type": ScheduleType.INTERVAL, - "max_runtime": 10, - "checkin_margin": None, - }, - ) - monitor_environment = MonitorEnvironment.objects.create( - monitor=monitor, - environment_id=self.environment.id, - status=monitor.status, - ) - successful_check_in = MonitorCheckIn.objects.create( - monitor=monitor, - monitor_environment=monitor_environment, - project_id=self.project.id, - status=CheckInStatus.OK, - ) - last_checkin = timezone.now() - - failed_checkin = MonitorCheckIn.objects.create( - monitor=monitor, - monitor_environment=monitor_environment, - project_id=self.project.id, - status=CheckInStatus.TIMEOUT, - date_added=last_checkin, - duration=monitor.config.get("max_runtime"), - ) - assert mark_failed(failed_checkin, ts=failed_checkin.date_added) - - monitor_environment.refresh_from_db() - assert monitor_environment.status == MonitorStatus.ERROR - - monitor_incidents = MonitorIncident.objects.filter(monitor_environment=monitor_environment) - assert len(monitor_incidents) == 1 - - assert len(mock_produce_occurrence_to_kafka.mock_calls) == 1 - - kwargs = mock_produce_occurrence_to_kafka.call_args.kwargs - occurrence = kwargs["occurrence"] - event = kwargs["event_data"] - occurrence = occurrence.to_dict() - - assert dict( - occurrence, - **{ - "project_id": self.project.id, - "fingerprint": [monitor_incidents[0].grouphash], - "issue_title": f"Monitor failure: {monitor.name}", - "subtitle": "Check-in exceeded maximum duration of 10 minutes.", - "resource_id": None, - "evidence_data": {}, - "evidence_display": [ - {"name": "Failure reason", "value": "duration", "important": True}, - { - "name": "Environment", - "value": monitor_environment.get_environment().name, - "important": False, - }, - { - "name": "Last successful check-in", - "value": successful_check_in.date_added.isoformat(), - "important": False, - }, - ], - "type": MonitorCheckInTimeout.type_id, - "level": "error", - "culprit": "duration", - }, - ) == dict(occurrence) - - assert dict( - event, - **{ - "contexts": { - "monitor": { - "status": "error", - "type": "cron_job", - "config": { - "schedule_type": 2, - "schedule": [1, "month"], - "max_runtime": 10, - "checkin_margin": None, - }, - "id": str(monitor.guid), - "name": monitor.name, - "slug": str(monitor.slug), - } - }, - "environment": monitor_environment.get_environment().name, - "event_id": occurrence["event_id"], - "fingerprint": [monitor_incidents[0].grouphash], - "platform": "other", - "project_id": monitor.project_id, - "sdk": None, - "tags": { - "monitor.id": str(monitor.guid), - "monitor.slug": str(monitor.slug), - "monitor.incident": str(monitor_incidents[0].id), - }, - }, - ) == dict(event) - - @with_feature("organizations:issue-platform") - @patch("sentry.issues.producer.produce_occurrence_to_kafka") - def test_mark_failed_with_missed_reason_issue_platform(self, mock_produce_occurrence_to_kafka): - last_checkin = timezone.now().replace(second=0, microsecond=0) - next_checkin = last_checkin + timedelta(hours=1) - - monitor = Monitor.objects.create( - name="test monitor", - organization_id=self.organization.id, - project_id=self.project.id, - type=MonitorType.CRON_JOB, - config={ - "schedule": [1, "hour"], - "schedule_type": ScheduleType.INTERVAL, - "max_runtime": None, - "checkin_margin": None, - }, - ) - monitor_environment = MonitorEnvironment.objects.create( - monitor=monitor, - environment_id=self.environment.id, - last_checkin=last_checkin, - next_checkin=next_checkin, - next_checkin_latest=next_checkin + timedelta(minutes=1), - status=monitor.status, - ) - - failed_checkin = MonitorCheckIn.objects.create( - monitor=monitor, - monitor_environment=monitor_environment, - project_id=self.project.id, - status=CheckInStatus.MISSED, - expected_time=next_checkin, - date_added=next_checkin + timedelta(minutes=1), - ) - assert mark_failed(failed_checkin, ts=failed_checkin.date_added) - - monitor.refresh_from_db() - monitor_environment.refresh_from_db() - assert monitor_environment.status == MonitorStatus.ERROR - - monitor_incidents = MonitorIncident.objects.filter(monitor_environment=monitor_environment) - assert len(monitor_incidents) == 1 - - assert len(mock_produce_occurrence_to_kafka.mock_calls) == 1 - - kwargs = mock_produce_occurrence_to_kafka.call_args.kwargs - occurrence = kwargs["occurrence"] - event = kwargs["event_data"] - occurrence = occurrence.to_dict() - - assert dict( - occurrence, - **{ - "project_id": self.project.id, - "fingerprint": [monitor_incidents[0].grouphash], - "issue_title": f"Monitor failure: {monitor.name}", - "subtitle": f"No check-in reported on {next_checkin.strftime(SUBTITLE_DATETIME_FORMAT)}.", - "resource_id": None, - "evidence_data": {}, - "evidence_display": [ - {"name": "Failure reason", "value": "missed_checkin", "important": True}, - { - "name": "Environment", - "value": monitor_environment.get_environment().name, - "important": False, - }, - { - "name": "Last successful check-in", - "value": "None", - "important": False, - }, - ], - "type": MonitorCheckInMissed.type_id, - "level": "warning", - "culprit": "missed_checkin", - }, - ) == dict(occurrence) - - assert dict( - event, - **{ - "contexts": { - "monitor": { - "status": "error", - "type": "cron_job", - "config": { - "schedule_type": 2, - "schedule": [1, "hour"], - "max_runtime": None, - "checkin_margin": None, - }, - "id": str(monitor.guid), - "name": monitor.name, - "slug": str(monitor.slug), - } - }, - "environment": monitor_environment.get_environment().name, - "event_id": occurrence["event_id"], - "fingerprint": [monitor_incidents[0].grouphash], - "platform": "other", - "project_id": monitor.project_id, - "sdk": None, - "tags": { - "monitor.id": str(monitor.guid), - "monitor.slug": str(monitor.slug), - "monitor.incident": str(monitor_incidents[0].id), - }, - }, - ) == dict(event) - @with_feature("organizations:issue-platform") @patch("sentry.issues.producer.produce_occurrence_to_kafka") def test_mark_failed_muted(self, mock_produce_occurrence_to_kafka): diff --git a/tests/sentry/tasks/test_weekly_reports.py b/tests/sentry/tasks/test_weekly_reports.py index 83621236bd4163..971d100129a18a 100644 --- a/tests/sentry/tasks/test_weekly_reports.py +++ b/tests/sentry/tasks/test_weekly_reports.py @@ -11,7 +11,7 @@ from django.utils import timezone from sentry.constants import DataCategory -from sentry.issues.grouptype import MonitorCheckInFailure, PerformanceNPlusOneGroupType +from sentry.issues.grouptype import MonitorIncidentType, PerformanceNPlusOneGroupType from sentry.models.group import GroupStatus from sentry.models.grouphistory import GroupHistoryStatus from sentry.models.notificationsettingoption import NotificationSettingOption @@ -381,7 +381,7 @@ def test_message_builder_simple(self, message_builder, record): self.create_performance_issue(fingerprint=f"{PerformanceNPlusOneGroupType.type_id}-group2") # store a crons issue just to make sure it's not counted in key_performance_issues - self.create_group(type=MonitorCheckInFailure.type_id) + self.create_group(type=MonitorIncidentType.type_id) prepare_organization_report(self.now.timestamp(), ONE_DAY * 7, self.organization.id) for call_args in message_builder.call_args_list: @@ -458,7 +458,7 @@ def test_message_builder_filter_resolved(self, message_builder, record): self.create_performance_issue(fingerprint=f"{PerformanceNPlusOneGroupType.type_id}-group2") # store a crons issue just to make sure it's not counted in key_performance_issues - self.create_group(type=MonitorCheckInFailure.type_id) + self.create_group(type=MonitorIncidentType.type_id) prepare_organization_report(self.now.timestamp(), ONE_DAY * 7, self.organization.id) for call_args in message_builder.call_args_list: