diff --git a/environmental-data-science/Nexrad_Demo.ipynb b/environmental-data-science/Nexrad_Demo.ipynb new file mode 100644 index 0000000..76296cf --- /dev/null +++ b/environmental-data-science/Nexrad_Demo.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bebb3140", + "metadata": {}, + "source": [ + "

An introduction to S3, Boto and Nexrad on S3

" + ] + }, + { + "cell_type": "markdown", + "id": "be36a17d", + "metadata": {}, + "source": [ + "Adapted from AMS Short Course which was adapted and thank to the first tutorial by Valliappa Lakshmanan, formerly at Climate Corp now at Google.\n", + "\n", + "Amazon Simple Storage Service (Amazon S3) is object storage with a simple web service interface to store and retrieve any amount of data from anywhere on the web. It is designed to deliver 99.999999999% durability, and scale past trillions of objects worldwide.\n", + "\n", + "Boto3 is a Python package that provides interfaces to Amazon Web Services." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c49bcb45-c276-4b7f-85e3-93ab363ffe0f", + "metadata": {}, + "outputs": [], + "source": [ + "# install cartopy in the default conda env:\n", + "# source activate default\n", + "# conda install python=3.8\n", + "\n", + "# install cartopy in the default conda env:\n", + "# conda install cartopy\n", + "# restart the kernel" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "74f207ea-e299-43c5-8064-fc56ef88a8ac", + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install jupyter netcdf4 numpy matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88f9b171", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "## You are using the Python ARM Radar Toolkit (Py-ART), an open source\n", + "## library for working with weather radar data. Py-ART is partly\n", + "## supported by the U.S. Department of Energy as part of the Atmospheric\n", + "## Radiation Measurement (ARM) Climate Research Facility, an Office of\n", + "## Science user facility.\n", + "##\n", + "## If you use this software to prepare a publication, please cite:\n", + "##\n", + "## JJ Helmus and SM Collis, JORS 2016, doi: 10.5334/jors.119\n", + "\n" + ] + }, + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pooch'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_50/2322843766.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdatetime\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtimedelta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpyart\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmatplotlib\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtempfile\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/default/lib/python3.8/site-packages/pyart/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfilters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtesting\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 56\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maux_io\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/default/lib/python3.8/site-packages/pyart/testing/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mtmpdirs\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mInTemporaryDirectory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0msample_objects\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmake_normal_storm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mexample_data\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mget_test_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0m__all__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0ms\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'_'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/default/lib/python3.8/site-packages/pyart/testing/example_data.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpkg_resources\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpooch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m DATASETS = pooch.create(\n\u001b[1;32m 5\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpooch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mos_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'pyart-datasets'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pooch'" + ] + } + ], + "source": [ + "import boto3\n", + "from boto.s3.connection import S3Connection\n", + "from datetime import timedelta, datetime\n", + "import os\n", + "import pyart\n", + "from matplotlib import pyplot as plt\n", + "import tempfile\n", + "import numpy as np\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72f54797", + "metadata": {}, + "outputs": [], + "source": [ + "conn = S3Connection(anon = True)\n", + "bucket = conn.get_bucket('noaa-nexrad-level2')" + ] + }, + { + "cell_type": "markdown", + "id": "bad1368d", + "metadata": {}, + "source": [ + "From https://aws.amazon.com/noaa-big-data/nexrad/ :\n", + "\n", + "The NEXRAD Level II archive data is hosted in the “noaa-nexrad-level2” Amazon S3 bucket in S3’s US East region. The address for the public bucket is:\n", + "\n", + " http://noaa-nexrad-level2.s3.amazonaws.com\n", + "\n", + " https://noaa-nexrad-level2.s3.amazonaws.com\n", + "\n", + "Each volume scan file is its own object in Amazon S3. The basic data format is the following:\n", + " /////\n", + "\n", + "Where:\n", + "\n", + " is the year the data was collected\n", + " is the month of the year the data was collected\n", + " is the day of the month the data was collected\n", + " is the NEXRAD ground station (map of ground stations)\n", + " is the name of the file containing the data. \n", + "These are compressed files (compressed with gzip). The file name has more precise timestamp information.\n", + "\n", + "All files in the archive use the same compressed format (.gz). The data file names are, for example, KAKQ20010101_080138.gz. The file naming convention is:\n", + "\n", + " GGGGYYYYMMDD_TTTTTT\n", + "\n", + "Where:\n", + "\n", + " GGGG = Ground station ID (map of ground stations) \n", + " YYYY = year \n", + " MM = month \n", + " DD = day \n", + " TTTTTT = time when data started to be collected (GMT)\n", + "\n", + "Note that the 2015 files have an additional field on the file name. It adds “_V06” to the end of the file name. An example is KABX20150303_001050_V06.gz." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b16d69a2", + "metadata": {}, + "outputs": [], + "source": [ + "my_pref = '2011/05/20/KVNX/'\n", + "bucket_list = list(bucket.list(prefix = my_pref))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2231cc46", + "metadata": {}, + "outputs": [], + "source": [ + "home_dir = os.path.expanduser('~')\n", + "bucket_list[0].get_contents_to_filename(os.path.join(home_dir,'nexrad_tempfile'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ebf044a", + "metadata": {}, + "outputs": [], + "source": [ + "radar = pyart.io.read(os.path.join(home_dir,'nexrad_tempfile'))\n", + "# print(radar.info())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "927850b7", + "metadata": {}, + "outputs": [], + "source": [ + "my_figure = plt.figure(figsize = [10,8])\n", + "my_display = pyart.graph.RadarDisplay(radar)\n", + "my_display.plot_ppi('reflectivity', 0, vmin = -12, vmax = 64)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "713ff4eb", + "metadata": {}, + "outputs": [], + "source": [ + "#Helper function for the search\n", + "def _nearestDate(dates, pivot):\n", + " return min(dates, key=lambda x: abs(x - pivot))\n", + "\n", + "def get_radar_from_aws(site, datetime_t):\n", + " \"\"\"\n", + " Get the closest volume of NEXRAD data to a particular datetime.\n", + " Parameters\n", + " ----------\n", + " site : string\n", + " four letter radar designation\n", + " datetime_t : datetime\n", + " desired date time\n", + " Returns\n", + " -------\n", + " radar : Py-ART Radar Object\n", + " Radar closest to the queried datetime\n", + " \"\"\"\n", + "\n", + " #First create the query string for the bucket knowing\n", + " #how NOAA and AWS store the data\n", + "\n", + " my_pref = datetime_t.strftime('%Y/%m/%d/') + site\n", + "\n", + " #Connect to the bucket\n", + "\n", + " conn = S3Connection(anon = True)\n", + " bucket = conn.get_bucket('noaa-nexrad-level2')\n", + "\n", + " #Get a list of files\n", + "\n", + " bucket_list = list(bucket.list(prefix = my_pref))\n", + "\n", + " #we are going to create a list of keys and datetimes to allow easy searching\n", + "\n", + " keys = []\n", + " datetimes = []\n", + "\n", + " #populate the list\n", + "\n", + " for i in range(len(bucket_list)):\n", + " this_str = str(bucket_list[i].key)\n", + " if 'gz' in this_str:\n", + " endme = this_str[-22:-4]\n", + " fmt = '%Y%m%d_%H%M%S_V0'\n", + " dt = datetime.strptime(endme, fmt)\n", + " datetimes.append(dt)\n", + " keys.append(bucket_list[i])\n", + "\n", + " if this_str[-3::] == 'V06':\n", + " endme = this_str[-19::]\n", + " fmt = '%Y%m%d_%H%M%S_V06'\n", + " dt = datetime.strptime(endme, fmt)\n", + " datetimes.append(dt)\n", + " keys.append(bucket_list[i])\n", + " \n", + " #find the closest available radar to your datetime\n", + "\n", + " closest_datetime = _nearestDate(datetimes, datetime_t)\n", + " index = datetimes.index(closest_datetime)\n", + "\n", + " localfile = tempfile.NamedTemporaryFile()\n", + " keys[index].get_contents_to_filename(localfile.name)\n", + " radar = pyart.io.read(localfile.name)\n", + " return radar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc8ba436", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "# there is a float deprecation warning that we'll ignore for now\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "base_date = \"20161006_192700\"\n", + "fmt = '%Y%m%d_%H%M%S' \n", + "b_d = datetime.strptime(base_date, fmt)\n", + "\n", + "my_radar = get_radar_from_aws('KAMX',b_d )\n", + "max_lat = 27\n", + "min_lat = 24\n", + "min_lon = -81\n", + "max_lon = -77\n", + "\n", + "lal = np.arange(min_lat, max_lat, .5)\n", + "lol = np.arange(min_lon, max_lon, .5)\n", + "\n", + "display = pyart.graph.RadarMapDisplay(my_radar)\n", + "fig = plt.figure(figsize = [10,8])\n", + "display.plot_ppi_map('reflectivity', sweep = 0, resolution = '110m',\n", + " vmin = -8, vmax = 64, mask_outside = False,\n", + " cmap = pyart.graph.cm.NWSRef,\n", + " min_lat = min_lat, min_lon = min_lon,\n", + " max_lat = max_lat, max_lon = max_lon,\n", + " lat_lines = lal, lon_lines = lol)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88f85458", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "default:Python", + "language": "python", + "name": "conda-env-default-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/environmental-data-science/README.md b/environmental-data-science/README.md new file mode 100644 index 0000000..7e370c6 --- /dev/null +++ b/environmental-data-science/README.md @@ -0,0 +1,11 @@ +# Updated introduction to S3, Boto and Nexrad on S3 +Adapated from and thank you to Scott Collis, ARM precipitation radar instrument translator (see here) and the original tutorial by Valliappa Lakshmanan, formerly at Climate Corp now at Google. This version is updated to use AWS SageMaker Studio Lab and Python 3.8. + +
+ +## AWS SageMaker Studio Lab +You can sign up for SageMaker Studio Lab and use it for free without an AWS account. You can run for 4 hours with GPU or 12 hours with CPU and then logout and log back in for another session. Your data and notebooks are persisted. After clicking the launch button below, choose "download whole repo" and then "build conda environment" when prompted. + +When it's done installing and configuring the conda environment, open the "Nexrad_Demo.ipynb" notebook. Click-Enter to run each row and wait a moment to see the results of each line before proceeding to the next. The line marker should change to a number when it's successfully run that line, ie "[5]" means that it has run line 5. + +Open In SageMaker Studio Lab

diff --git a/environmental-data-science/environment.yml b/environmental-data-science/environment.yml new file mode 100644 index 0000000..c857364 --- /dev/null +++ b/environmental-data-science/environment.yml @@ -0,0 +1,17 @@ +name: aws_nexrad_smsl +channels: +- conda-forge +- defaults +dependencies: +- python>=3.8 +- numpy +- matplotlib +- jupyter +- netcdf4 +- xarray +- scipy +- boto +- boto3 +- arm_pyart +- cartopy +- pooch