Swapping out pandas

bsipocz · bsipocz · commit 85d7d4cf8772 · 2025-03-31T19:44:59.000-07:00
diff --git a/tutorials/euclid_access/1_Euclid_intro_MER_images.md b/tutorials/euclid_access/1_Euclid_intro_MER_images.md
@@ -65,7 +65,6 @@ Each MER image is approximately 1.47 GB. Downloading can take some time.
 import re
 
 import numpy as np
-import pandas as pd
 
 import matplotlib.pyplot as plt
 from matplotlib.patches import Ellipse
@@ -80,9 +79,6 @@ from astropy import units as u
 
 from astroquery.ipac.irsa import Irsa
 import sep
-
-# Copy-on-write is more performant and avoids unexpected modifications of the original DataFrame.
-pd.options.mode.copy_on_write = True
 ```
 
 ## 1. Search for multiwavelength Euclid Q1 MER mosaics that cover the star HD 168151
diff --git a/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md b/tutorials/euclid_access/5_Euclid_intro_SPE_catalog.md
@@ -50,7 +50,7 @@ If you have questions about this notebook, please contact the [IRSA helpdesk](ht
 
 ```{code-cell} ipython3
 # Uncomment the next line to install dependencies if needed
-# !pip install matplotlib pandas astropy 'astroquery>=0.4.10'
+# !pip install matplotlib astropy 'astroquery>=0.4.10'
 ```
 
 ```{code-cell} ipython3
@@ -59,11 +59,10 @@ import urllib
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
 
 from astropy.coordinates import SkyCoord
 from astropy.io import fits
-from astropy.table import Table
+from astropy.table import QTable
 from astropy import units as u
 from astropy.utils.data import download_file
 from astropy.visualization import ImageNormalize, PercentileInterval, AsinhStretch
@@ -80,41 +79,41 @@ search_radius = 10 * u.arcsec
 coord = SkyCoord.from_name('HD 168151')
 ```
 
-### Use IRSA to search for all Euclid data on this target
+```{tip}
+The IRSA SIA collections can be listed using using the ``list_collections`` method, we can filter on the ones containing "euclid" in the collection name:
 
-This searches specifically in the euclid_DpdMerBksMosaic "collection" which is the MER images and catalogs.
+    Irsa.list_collections(filter='euclid')
+```
 
-```{code-cell} ipython3
-im_table = Irsa.query_sia(pos=(coord, search_radius), collection='euclid_DpdMerBksMosaic')
++++
 
-## Convert the table to pandas dataframe
-df_im_irsa=im_table.to_pandas()
-```
+### Use IRSA to search for all Euclid data on this target
+
+This searches specifically in the ``euclid_DpdMerBksMosaic`` collection which is the MER images and catalogs.
 
 ```{code-cell} ipython3
-## Change the settings so we can see all the columns in the dataframe and the full column width
-## (to see the full long URL)
-pd.set_option('display.max_columns', None)
-pd.set_option('display.max_colwidth', None)
+image_table = Irsa.query_sia(pos=(coord, search_radius), collection='euclid_DpdMerBksMosaic')
 ```
 
-#### This dataframe contains other non-Euclid datasets that have been "Euclidized", meaning they have been put on the same pixel scale as the Euclid data. For this example we just want to look at the Euclid data, so select Euclid for the facility name, and choose science as the data product subtype.
+This table lists all MER mosaic images available in this search position. These mosaics include the Euclid VIS, Y, J, H images, as well as ground-based telescopes which have been put on the same pixel scale. For more information, see the [Euclid documentation at IPAC](https://euclid.caltech.edu/page/euclid-faq-tech/).
 
-```{code-cell} ipython3
-df_im_euclid=df_im_irsa[ (df_im_irsa['dataproduct_subtype']=='science') &  (df_im_irsa['facility_name']=='Euclid')]
+Note that there are various image types are returned as well, we filter out the `science` images from these:
 
-df_im_euclid.head()
+```{code-cell} ipython3
+science_images = image_table[image_table['dataproduct_subtype'] == 'science']
+science_images
 ```
 
-## Choose the VIS image and pull the filename:
+### Choose the VIS image and pull the Tile ID
 
-```{code-cell} ipython3
-filename=df_im_euclid[df_im_euclid['energy_bandpassname']=='VIS']['access_url'].to_list()[0]
++++
 
-# ## Extract the tileID from the filename
-tileID=re.search(r'TILE\s*(\d{9})', filename).group(1)
+Extract the tile ID from the ``obs_id`` column. The values in this column are made a combination of the 9 digit tile ID and the abbreviation of the instrument.
+
+```{code-cell} ipython3
+tileID = science_images[science_images['energy_bandpassname'] == 'VIS']['obs_id'][0][:9]
 
-print('The MER tile ID for this object is :',tileID)
+print(f'The MER tile ID for this object is : {tileID}')
 ```
 
 ## 2. Download SPE catalog from IRSA directly to this notebook
@@ -137,10 +136,14 @@ table_lines = 'euclid_q1_spe_lines_line_features'
 - List the column names
 
 ```{code-cell} ipython3
-columns_info = Irsa.list_columns(catalog=table_lines)
+columns_info = Irsa.list_columns(catalog=table_mer)
 print(len(columns_info))
 ```
 
+```{code-cell} ipython3
+Irsa.list_columns(catalog=table_1dspectra, full=True)??
+```
+
 ```{code-cell} ipython3
 # Full list of columns and their description
 columns_info
@@ -159,28 +162,25 @@ We specify the following conditions on our search:
 Finally we sort the data by descending spe_line_snr_gf to have the largest SNR H-alpha lines detected at the top.
 
 ```{code-cell} ipython3
-adql = f"SELECT DISTINCT mer.object_id,mer.ra, mer.dec, mer.tileid, mer.flux_y_templfit, \
-lines.spe_line_snr_gf,lines.spe_line_snr_di, lines.spe_line_name, lines.spe_line_central_wl_gf,\
-lines.spe_line_ew_gf, galaxy.spe_z_err, galaxy.spe_z,galaxy.spe_z_prob, lines.spe_line_flux_gf, lines.spe_line_flux_err_gf \
-FROM {table_mer} AS mer \
-JOIN {table_lines} AS lines \
-ON mer.object_id = lines.object_id \
-JOIN {table_galaxy_candidates} AS galaxy \
-ON lines.object_id = galaxy.object_id AND lines.spe_rank = galaxy.spe_rank \
-WHERE lines.spe_line_snr_gf >5 \
-AND lines.spe_line_name = 'Halpha' \
-AND mer.tileid = {tileID} \
-AND galaxy.spe_z_prob > 0.99 \
-AND galaxy.spe_z BETWEEN 1.4 AND 1.6 \
-AND lines.spe_line_flux_gf > 2E-16 \
-ORDER BY lines.spe_line_snr_gf DESC \
-"
+adql_query = ("SELECT DISTINCT mer.object_id,mer.ra, mer.dec, mer.tileid, mer.flux_y_templfit, "
+    "lines.spe_line_snr_gf,lines.spe_line_snr_di, lines.spe_line_name, lines.spe_line_central_wl_gf, "
+    "lines.spe_line_ew_gf, galaxy.spe_z_err, galaxy.spe_z,galaxy.spe_z_prob, "
+    "lines.spe_line_flux_gf, lines.spe_line_flux_err_gf "
+    f"FROM {table_mer} AS mer "
+    f"JOIN {table_lines} AS lines "
+    "ON mer.object_id = lines.object_id "
+    f"JOIN {table_galaxy_candidates} AS galaxy "
+    "ON lines.object_id = galaxy.object_id AND lines.spe_rank = galaxy.spe_rank "
+    "WHERE lines.spe_line_snr_gf >5 "
+    "AND lines.spe_line_name = 'Halpha' "
+    f"AND mer.tileid = {tileID} "
+    "AND galaxy.spe_z_prob > 0.99 "
+    "AND galaxy.spe_z BETWEEN 1.4 AND 1.6 "
+    "AND lines.spe_line_flux_gf > 2E-16 "
+    "ORDER BY lines.spe_line_snr_gf DESC ")
 
 # Use TAP with this ADQL string
-result = Irsa.query_tap(adql)
-
-# Convert table to pandas dataframe and drop duplicates
-result_table = result.to_qtable()
+result_table = Irsa.query_tap(adql_query).to_qtable()
 
 result_table['spe_line_flux_gf'].info.format = ".8e"  # Scientific notation with 8 decimal places
 result_table['spe_line_flux_err_gf'].info.format = ".8e"
@@ -202,34 +202,31 @@ obj_tab
 ```{code-cell} ipython3
 adql_object = f"SELECT *  FROM {table_1dspectra}  WHERE objectid = {obj_id}"
 
-result2 = Irsa.query_tap(adql_object)
-df2 = result2.to_table().to_pandas()
-df2
+result_table2 = Irsa.query_tap(adql_object).to_qtable()
 ```
 
 ### The following steps to read in the spectrum follows the 3_Euclid_intro_1D_spectra notebook.
 
 This involves reading in the spectrum without readin in the full FITS file, just pulling the extension we want.
 
 ```{code-cell} ipython3
-file_uri = urllib.parse.urljoin(Irsa.tap_url, result2['uri'][0])
+file_uri = urllib.parse.urljoin(Irsa.tap_url, result_table2['uri'][0])
 file_uri
 ```
 
 ```{code-cell} ipython3
 with fits.open(file_uri) as hdul:
-    hdu = hdul[df2['hdu'].iloc[0]]
-    dat = Table.read(hdu, format='fits', hdu=1)
-    df_obj_irsa = dat.to_pandas()
+    spectrum = QTable.read(hdul[result_table2['hdu'][0]], format='fits')
+    spec_header = hdul[result_table2['hdu'][0]].header
 ```
 
 ### Now the data are read in, plot the spectrum with the H-alpha line labeled
 
 Divide by 10000 to convert from Angstrom to micron
 
 ```{code-cell} ipython3
-wavelengths = obj_tab['spe_line_central_wl_gf']/10000.
-line_names = obj_tab['spe_line_name']
+wavelengths = object_spectra['spe_line_central_wl_gf']/10000.
+line_names = object_spectra['spe_line_name']
 snr_gf = obj_tab['spe_line_snr_gf']
 
 plt.plot(df_obj_irsa['WAVELENGTH']/10000., df_obj_irsa['SIGNAL'])