diff --git a/docs/notebooks/demo_miniDifiValidation.ipynb b/docs/notebooks/demo_miniDifiValidation.ipynb index 56bc55fe..aea80c2a 100644 --- a/docs/notebooks/demo_miniDifiValidation.ipynb +++ b/docs/notebooks/demo_miniDifiValidation.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "878bd071-d694-4bdf-91d2-16e29e090df8", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ " tshifted = df[\"midPointTai\"] - night_start_utc_days\n", " return tshifted.astype(int)\n", "\n", - "def has_tracklet(df, minlen_arcsec=1, maxdt_minutes=90):\n", + "def has_tracklet(df, minlen_arcsec=1, maxdt_minutes=90, min_observations=2):\n", " # this computes whether a given night has /any/ observations close enough to be\n", " # made into a tracklet.\n", " #\n", @@ -94,7 +94,12 @@ "\n", " maxdt = maxdt_minutes / 60. / 24.\n", " minlen = minlen_arcsec / 3600.\n", + " detections = []\n", "\n", + " if len(df)< min_observations:\n", + " return False\n", + " if min_observations == 1: # for edge case where one observation is needed for a tracklet\n", + " return True\n", " for i in range(len(df)):\n", " for j in range(i+1, len(df)):\n", " t0, t1 = df[\"midPointTai\"].iloc[i], df[\"midPointTai\"].iloc[j]\n", @@ -118,8 +123,11 @@ "\n", " # this combo will make a tracklet; return true!\n", " ##print(f'dt = {dt*24*60:.2f} min, sep = {dist*3600:.2f}\"')\n", - " return True\n", - " \n", + " detections.append(i)\n", + " detections.append(j)\n", + " if len(set(detections)) >= min_observations:\n", + " return True\n", + "\n", " return False\n", "\n", "def nights_with_tracklets(df):\n", @@ -129,7 +137,7 @@ " assert len(df[\"ssObjectId\"].unique()) == 1, \"This function must be run on a single object\"\n", "\n", " # Filter the DataFrame to only keep rows where the 'night' value appears 2 or more times\n", - " df_filtered = df[df.groupby('night')['night'].transform('count') >= 2].reset_index()\n", + " df_filtered = df[df.groupby('night')['night'].transform('count') >= 1].reset_index()\n", "\n", " # Now, for each night compute if it has a tracklet by calling has_tracklet()\n", " nightHasTracklet = df_filtered.groupby(\"night\").apply(has_tracklet, include_groups=False)\n", @@ -223,8 +231,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 20.1 s, sys: 48.9 ms, total: 20.2 s\n", - "Wall time: 20.3 s\n" + "CPU times: user 11.4 s, sys: 36.9 ms, total: 11.5 s\n", + "Wall time: 11.5 s\n" ] }, { @@ -276,8 +284,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.81 s, sys: 358 ms, total: 5.16 s\n", - "Wall time: 10.5 s\n" + "CPU times: user 1.1 s, sys: 190 ms, total: 1.29 s\n", + "Wall time: 1.47 s\n" ] } ], @@ -297,7 +305,7 @@ " )\n", "\n", "# run minidifi\n", - "minidifi = md.linkObservations(obsv, seed=42, p=1)" + "minidifi = md.linkObservations(obsv, seed=42, p=1, objectId=\"ssObjectId\", maxdt_minutes=90, minlen_arcsec=1, min_observations=2, window=14, nlink=3, night_start_utc_days=17.0 / 24.0)" ] }, { @@ -390,14 +398,6 @@ "print(\"Differences between miniDifi and brute force (note: empty is good!):\")\n", "res[~res[\"equal\"]]" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f785000", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/sorcha/modules/PPLinkingFilter.py b/src/sorcha/modules/PPLinkingFilter.py index 87489381..3f842713 100755 --- a/src/sorcha/modules/PPLinkingFilter.py +++ b/src/sorcha/modules/PPLinkingFilter.py @@ -76,6 +76,7 @@ def PPLinkingFilter( objectId="ssObjectId", maxdt_minutes=maximum_time * 24 * 60, minlen_arcsec=minimum_separation, + min_observations=min_observations, window=tracklet_interval, nlink=min_tracklets, p=detection_efficiency, diff --git a/src/sorcha/modules/PPMiniDifi.py b/src/sorcha/modules/PPMiniDifi.py index 926e314c..6bf71fea 100755 --- a/src/sorcha/modules/PPMiniDifi.py +++ b/src/sorcha/modules/PPMiniDifi.py @@ -45,7 +45,7 @@ def haversine_np(lon1, lat1, lon2, lat2): @njit(cache=True) -def hasTracklet(mjd, ra, dec, maxdt_minutes, minlen_arcsec): +def hasTracklet(mjd, ra, dec, maxdt_minutes, minlen_arcsec, min_observations): """ Given a set of observations in one night, calculate it has at least onedetectable tracklet. @@ -67,6 +67,9 @@ def hasTracklet(mjd, ra, dec, maxdt_minutes, minlen_arcsec): minlen_arcsec : float Minimum allowable distance separation between observations [Units: arcsec] + min_observations (int): + the minimum number of observations in a night required to form a tracklet. + Returns -------- : boolean @@ -78,25 +81,26 @@ def hasTracklet(mjd, ra, dec, maxdt_minutes, minlen_arcsec): ## tracklets by taking all observations in a night and computing ## all of theirs pairwise distances, then selecting on that. nobs = len(ra) - if nobs < 2: + if nobs < min_observations: return False - + if min_observations == 1: # for edge case where one observation is needed for a tracklet + return True maxdt = maxdt_minutes / (60 * 24) minlen = minlen_arcsec / 3600 - for i in range(nobs): for j in range(nobs): diff = mjd[i] - mjd[j] if diff > 0 and diff < maxdt: sep = haversine_np(ra[i], dec[i], ra[j], dec[j]) if sep > minlen: - return True + if (i + 1) - (j) >= min_observations: + return True return False @njit(cache=True) -def trackletsInNights(night, mjd, ra, dec, maxdt_minutes, minlen_arcsec): +def trackletsInNights(night, mjd, ra, dec, maxdt_minutes, minlen_arcsec, min_observations): """ Calculate, for a given set of observations sorted by observation time, whether or not it has at least one discoverable tracklet in each night. @@ -121,6 +125,9 @@ def trackletsInNights(night, mjd, ra, dec, maxdt_minutes, minlen_arcsec): minlen_arcsec : float Minimum allowable distance separation between observations [Units: arcsec] + min_observations (int): + the minimum number of observations in a night required to form a tracklet. + Returns -------- nights : float or array of floats @@ -139,7 +146,7 @@ def trackletsInNights(night, mjd, ra, dec, maxdt_minutes, minlen_arcsec): # for each night, test if it has a tracklet b = 0 for k, e in enumerate(i): - hasTrk[k] = hasTracklet(mjd[b:e], ra[b:e], dec[b:e], maxdt_minutes, minlen_arcsec) + hasTrk[k] = hasTracklet(mjd[b:e], ra[b:e], dec[b:e], maxdt_minutes, minlen_arcsec, min_observations) b = e return nights, hasTrk @@ -232,7 +239,9 @@ def discoveryOpportunities(nights, nightHasTracklets, window, nlink, p, rng): return discIdx, disc -def linkObject(obsv, seed, maxdt_minutes, minlen_arcsec, window, nlink, p, night_start_utc_days): +def linkObject( + obsv, seed, maxdt_minutes, minlen_arcsec, min_observations, window, nlink, p, night_start_utc_days +): """ For a set of observations of a single object, calculate if there are any tracklets, if there are enough tracklets to form a discovery window, and then report back all of @@ -262,6 +271,9 @@ def linkObject(obsv, seed, maxdt_minutes, minlen_arcsec, window, nlink, p, night minlen_arcsec : float Minimum allowable distance separation between observations [Units: arcsec] + min_observations (int): + the minimum number of observations in a night required to form a tracklet. + window : float Number of tracklets required with <= this window to complete a detection @@ -313,7 +325,9 @@ def linkObject(obsv, seed, maxdt_minutes, minlen_arcsec, window, nlink, p, night seed %= 0xFFFF_FFFF rng = np.random.default_rng(seed) - nights, hasTrk = trackletsInNights(night, mjd, ra, dec, maxdt_minutes, minlen_arcsec) + nights, hasTrk = trackletsInNights( + night, mjd, ra, dec, maxdt_minutes, minlen_arcsec, min_observations + ) discIdx, discNights = discoveryOpportunities(nights, hasTrk, window, nlink, p, rng) if discIdx != -1: discoveryChances = len(discNights) diff --git a/src/sorcha/utilities/sorchaConfigs.py b/src/sorcha/utilities/sorchaConfigs.py index f1a7ea78..ad4ef9f0 100644 --- a/src/sorcha/utilities/sorchaConfigs.py +++ b/src/sorcha/utilities/sorchaConfigs.py @@ -532,6 +532,13 @@ def _validate_linkingfilter_configs(self): logging.error("ERROR: ssp_number_observations is zero or negative.") sys.exit("ERROR: ssp_number_observations is zero or negative.") + if self.ssp_number_observations > 1 and self.ssp_separation_threshold == 0.0: + logging.error("ERROR: ssp_separation_threshold is zero.") + sys.exit("ERROR: ssp_separation_threshold is zero.") + if self.ssp_separation_threshold < 0.0: + logging.error("ERROR: ssp_separation_threshold is negative.") + sys.exit("ERROR: ssp_separation_threshold is negative.") + if self.ssp_number_tracklets < 1: logging.error("ERROR: ssp_number_tracklets is zero or less.") sys.exit("ERROR: ssp_number_tracklets is zero or less.") @@ -544,10 +551,6 @@ def _validate_linkingfilter_configs(self): logging.error("ERROR: ssp_detection_efficiency out of bounds (should be between 0 and 1).") sys.exit("ERROR: ssp_detection_efficiency out of bounds (should be between 0 and 1).") - if self.ssp_separation_threshold <= 0.0: - logging.error("ERROR: ssp_separation_threshold is zero or negative.") - sys.exit("ERROR: ssp_separation_threshold is zero or negative.") - if self.ssp_maximum_time < 0: logging.error("ERROR: ssp_maximum_time is negative.") sys.exit("ERROR: ssp_maximum_time is negative.") diff --git a/tests/sorcha/test_PPLinkingFilter.py b/tests/sorcha/test_PPLinkingFilter.py index 5858b7f8..dba2fdf8 100755 --- a/tests/sorcha/test_PPLinkingFilter.py +++ b/tests/sorcha/test_PPLinkingFilter.py @@ -1,6 +1,7 @@ import pandas as pd import numpy as np + from sorcha.utilities.dataUtilitiesForTests import get_test_filepath @@ -111,6 +112,7 @@ def test_PPLinkingFilter_discoveryChances(): objectId="ssObjectId", maxdt_minutes=max_time_separation * 24 * 60, minlen_arcsec=min_angular_separation, + min_observations = min_observations, window=min_tracklet_window, nlink=min_tracklets, p=detection_efficiency, @@ -158,6 +160,7 @@ def test_PPLinkingFilter_nlink1(): objectId="ssObjectId", maxdt_minutes=max_time_separation * 24 * 60, minlen_arcsec=min_angular_separation, + min_observations = min_observations, window=min_tracklet_window, nlink=min_tracklets, p=detection_efficiency, @@ -393,3 +396,220 @@ def test_PPLinkingFilter_nodrop(): assert all(~linked_observations[linked_observations["ObjID"] == "unlinked_object"]["object_linked"]) assert len(linked_observations[linked_observations["ObjID"] == "linked_object"]) == 6 assert len(linked_observations[linked_observations["ObjID"] == "unlinked_object"]) == 6 + + +def test_PPLinkingFilter_minobs(): + """ + Testing min_observations for one night to consider a tracklet + """ + from sorcha.modules.PPLinkingFilter import PPLinkingFilter + + # min obs 5 and 5 detections on 1 night. should return all 5 observations + min_observations = 5 + min_angular_separation = 0.5 + max_time_separation = 0.0625 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + obj_id = ["pretend_object"] * 5 + field_id = np.arange(1, 6) + t0 = 60000.0 + times = np.asarray([0.03, 0.06,0.07,0.08,0.09]) + t0 + ra = [142, 142.1,142.2,142.3,142.4] + dec = [8, 8.1,8.2,8.3,8.4] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + + assert len(linked_observations) == 5 + + + # min obs 5 and 5 detections on 1 night, but only 4 observations can link into a tracklet. + # should return 0 + min_observations = 5 + min_angular_separation = 0.5 + max_time_separation = 0.0625 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + obj_id = ["pretend_object"] * 5 + field_id = np.arange(1, 6) + t0 = 60000.0 + times = np.asarray([0.03, 0.06,0.07,0.08,0.15]) + t0 #last time has difference > max_time_separation so unlinkable + ra = [142, 142.1,142.2,142.3,142.15] + dec = [8, 8.1,8.2,8.3,8.4] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + + assert len(linked_observations) == 0 + + + # min obs 6 and 5 detections on 1 night. Should return empty dataframe + + + min_observations = 6 + min_angular_separation = 0.5 + max_time_separation = 0.0625 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + # create object that should link 6 observations on one night to tracklet but only give it 5 observations for that night. + obj_id = ["pretend_object"] * 5 + field_id = np.arange(1, 6) + t0 = 60000.0 + times = np.asarray([0.03, 0.06,0.07,0.08,0.09]) + t0 + ra = [142, 142.1,142.2,142.3,142.4] + dec = [8, 8.1,8.2,8.3,8.4] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + + assert len(linked_observations) == 0 + + # min obs 1 and 2 detections on two different nights + + min_observations = 1 + min_angular_separation = 0 + max_time_separation = 0 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + # create object that should link 1 observation on one night to tracklet. + obj_id = ["pretend_object"] * 2 + field_id = np.arange(1,3) + t0 = 60000.0 + times = np.asarray([0.03,1000]) + t0 + ra = [142,152] + dec = [8,9] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + print(linked_observations) + assert len(linked_observations) == 2 + + + + # min obs 4 and 4 detections on 1 night, but only 2 observations can link into a tracklet either side. + # should return 0 + min_observations = 4 + min_angular_separation = 0.5 + max_time_separation = 0.0625 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + obj_id = ["pretend_object"] * 4 + field_id = np.arange(1, 5) + t0 = 60000.0 + times = np.asarray([0.03, 0.04,0.15,0.16]) + t0 # obs split in two with time difference > max_time_separation so unlinkable + ra = [142, 142.1,142.5,142.6] + dec = [8, 8.1,8.5,8.6] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + + assert len(linked_observations) == 0 + + # min obs 4 and 6 detections on 1 night, where only 4 observations can link into a tracklet. + # should return 6 + min_observations = 4 + min_angular_separation = 0.5 + max_time_separation = 0.0625 + min_tracklets = 1 + min_tracklet_window = 15 + detection_efficiency = 1 + night_start_utc = 17.0 + + obj_id = ["pretend_object"] * 6 + field_id = np.arange(1, 7) + t0 = 60000.0 + times = np.asarray([0.03, 0.04,0.15,0.16,0.17,0.18]) + t0 # obs split in two with time difference > max_time_separation 2 on left and 4 on right + ra = [142, 142.1,142.5,142.6,142.7,142.8] + dec = [8, 8.1,8.5,8.6,8.7,8.8] + + observations = pd.DataFrame( + {"ObjID": obj_id, "FieldID": field_id, "fieldMJD_TAI": times, "RA_deg": ra, "Dec_deg": dec} + ) + + linked_observations = PPLinkingFilter( + observations, + detection_efficiency, + min_observations, + min_tracklets, + min_tracklet_window, + min_angular_separation, + max_time_separation, + night_start_utc, + ) + + assert len(linked_observations) == 6 \ No newline at end of file diff --git a/tests/sorcha/test_sorchaConfigs.py b/tests/sorcha/test_sorchaConfigs.py index 989f8f4c..0ff73c60 100644 --- a/tests/sorcha/test_sorchaConfigs.py +++ b/tests/sorcha/test_sorchaConfigs.py @@ -725,12 +725,23 @@ def test_linkingfilter_bounds(key_name): test_configs = linkingfilterConfigs(**linkingfilter_configs) assert error_text.value.code == f"ERROR: {key_name} is negative." - elif key_name == "ssp_separation_threshold" or key_name == "ssp_number_observations": + elif key_name == "ssp_number_observations": linkingfilter_configs[key_name] = -5 with pytest.raises(SystemExit) as error_text: test_configs = linkingfilterConfigs(**linkingfilter_configs) assert error_text.value.code == f"ERROR: {key_name} is zero or negative." + elif key_name == "ssp_separation_threshold": + linkingfilter_configs[key_name] = -5 + with pytest.raises(SystemExit) as error_text: + test_configs = linkingfilterConfigs(**linkingfilter_configs) + assert error_text.value.code == f"ERROR: {key_name} is negative." + + linkingfilter_configs[key_name] = 0 + with pytest.raises(SystemExit) as error_text: + test_configs = linkingfilterConfigs(**linkingfilter_configs) + assert error_text.value.code == f"ERROR: {key_name} is zero." + elif key_name == "ssp_number_tracklets": linkingfilter_configs[key_name] = -5 with pytest.raises(SystemExit) as error_text: