From a42f9bf23c76a1e847cf36d113fc78093a980966 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 11:11:18 +0100 Subject: [PATCH 01/22] fix: amending path for notebook --- examples/examples.ipynb | 2466 ++++++++++++++++++++------------------- 1 file changed, 1241 insertions(+), 1225 deletions(-) diff --git a/examples/examples.ipynb b/examples/examples.ipynb index fd5e32a5..898f48b1 100644 --- a/examples/examples.ipynb +++ b/examples/examples.ipynb @@ -3,1305 +3,283 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, - "outputs": [], "source": [ + "sys.path.append('../')\n", + "\n", "from change_detection import functions as chg\n", "%load_ext autoreload\n", "%autoreload 2" - ] + ], + "outputs": [ + { + "output_type": "error", + "ename": "ModuleNotFoundError", + "evalue": "No module named 'change_detection'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mchange_detection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfunctions\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mchg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'load_ext'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'autoreload'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'autoreload'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'2'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'change_detection'" + ] + } + ], + "metadata": {} }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Run from csv" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], + "execution_count": null, "source": [ "csv_test = chg.ChangeDetection('csv_test',\n", " csv_name='csv_test_file.csv')\n", "csv_test.run()" - ] + ], + "outputs": [], + "metadata": {} }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Test single SQL query" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": false - }, - "outputs": [], + "execution_count": null, "source": [ "bq_test = chg.ChangeDetection('bq_test')\n", "bq_test.run()" - ] + ], + "outputs": [], + "metadata": { + "scrolled": false + } }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Single measure" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "scrolled": true - }, - "outputs": [], + "execution_count": null, "source": [ "lp = chg.ChangeDetection('practice_data_lpfentanylir',\n", " measure=True)\n", "lp.run()" - ] + ], + "outputs": [], + "metadata": { + "scrolled": true + } }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Measures - low-priority CCG level" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 46, - "metadata": { - "scrolled": true - }, - "outputs": [], + "execution_count": null, "source": [ "from change_detection import *\n", "lp = chg.ChangeDetection('ccg_data_lp%',\n", " measure=True)\n", "lp.run()" - ] + ], + "outputs": [], + "metadata": { + "scrolled": true + } }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Measures - low-priority practice level" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 45, - "metadata": { - "scrolled": true - }, - "outputs": [], + "execution_count": null, "source": [ "lp = chg.ChangeDetection('practice_data_lp%',\n", " measure=True)\n", "lp.run()" - ] + ], + "outputs": [], + "metadata": { + "scrolled": true + } }, { "cell_type": "markdown", - "metadata": {}, "source": [ "### Measures - opioids practice level" - ] + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], + "execution_count": null, "source": [ "from change_detection import *\n", "opioids = chg.ChangeDetection('practice_data_opi%',\n", " measure=True)\n", "opioids.run()" - ] + ], + "outputs": [], + "metadata": {} }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, + "execution_count": null, + "source": [ + "opioids.concatenate_outputs()" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is.nbreakis.tfirstis.tfirst.pknownis.tfirst.pknown.offsis.tfirst.offsis.tfirst.bigis.slope.mais.slope.ma.propis.slope.ma.prop.levis.intlev.initlevis.intlev.finallevis.intlev.levdis.intlev.levdprop
measurename
practice_data_opioidomeA81057129.029.0NaNNaN29.0382.0682000.0056290.53125067490.23376579716.416179-12226.182414-0.181155
A830370NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A88601213.013.013.013.013.0-12361.812484-0.3115380.77490952041.73910136089.14215615952.5969450.306535
A890220NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B810420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B8111919.09.09.09.09.0593.2104790.0210980.51923127523.56231658370.507229-30846.944914-1.120747
B81619126.026.026.026.026.0424.0998670.0106130.51428639534.54813554378.043484-14843.495349-0.375456
B82024110.010.010.010.010.0479.7535220.0063620.50980474924.43647199391.866112-24467.429641-0.326561
B83614232.032.044.044.044.0-568.787763-0.0394260.72281713817.81517011457.1025552360.7126150.170846
B840140NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B850410NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B850510NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B856190NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B860040NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C810060NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C81054239.039.044.044.044.0-261.074522-0.0105580.62501923922.92481923505.218309417.7065110.017461
C820190NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820660NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820790NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C82095126.026.026.026.026.0189.3727970.0092410.51428620302.42347926930.471366-6628.047888-0.326466
C826270NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C8266713.03.03.03.03.0116.3770070.0046280.51724125030.27879031780.145175-6749.866385-0.269668
C84034138.038.038.038.038.0390.8000150.0082750.52173946837.94694255826.347292-8988.400350-0.191904
C8403612.02.02.02.02.0-263.270779-0.0036240.50847572901.75948557368.78349915532.9759860.213067
C84103214.014.036.036.036.0-701.880711-0.0311500.68106221875.55343618783.8498633091.7035720.141331
C85628110.010.010.010.010.0-162.735962-0.0063070.50980425963.69658417664.1624978299.5340870.319659
C86015230.030.031.031.031.0-399.383730-0.0051840.57710068523.60600265063.3422883460.2637150.050497
C86022215.015.015.015.015.03959.7430300.0720640.55751450988.03111158090.525890-7102.494779-0.139297
C880230NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.............................................
practice_data_opioidspercentJ820750NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
J8216528.08.027.027.08.0-0.007368-0.0327390.6838800.2324140.2216400.0107730.046355
K81068219.019.031.031.031.0-0.005371-0.0301840.5823960.1663700.1387030.0276660.166294
K816360NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
K82603221.021.023.023.023.0-0.004457-0.0188500.5405620.1577670.1412780.0164890.104514
K84065229.029.029.029.029.0-0.198854-0.8507600.5109780.4325920.0434280.3891640.899610
L8164415.05.0NaNNaN5.0-0.001525-0.0153600.5087720.1008430.0138910.0869520.862252
L84025219.019.0NaNNaN19.0-0.185777-3.0326990.8647890.2470350.0322120.2148230.869607
L84068217.017.0NaNNaN17.0-0.015393-0.0451540.5066750.3562930.0524900.3038040.852679
L850440NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M826200NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M8312125.05.05.05.05.0-0.029839-0.1697700.5042430.2197060.0421780.1775280.808026
M8507813.03.03.03.03.0-0.002478-0.0205100.5084750.123279-0.0229040.1461831.185788
M851180NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M8567162.02.04.04.011.00.1428571.90243946.681374-0.0018320.001229-0.0030601.670902
M85781254.054.054.054.054.00.0568880.9411415.4768820.0035580.013945-0.010387-2.919479
M86612115.015.015.015.015.0-0.011348-0.0222720.5106380.520876-0.0124820.5333581.023963
M886300NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M920420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N820670NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N821080NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N85032213.013.0NaNNaN13.0-0.014965-0.0671820.5039330.2377130.1486250.0890880.374770
N8563423.03.0NaNNaN3.0-0.010103-0.0168250.5694930.6105840.5396220.0709620.116220
P81655240.040.040.040.040.00.0349650.9735580.8982460.0009500.039876-0.038926-40.988946
P8402629.09.0NaNNaN9.0-0.012740-0.0303765.6523400.4321460.4298920.0022540.005216
P85622124.024.024.024.024.0-0.007682-0.0302090.5263160.261974-0.0299390.2919141.114284
P886060NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Y003520NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Y02622245.045.048.048.048.0-0.043245-0.2167920.5384780.051206-0.1094120.1606183.136675
Y02636229.029.039.039.029.00.0205140.2278782.4915090.0695090.077742-0.008234-0.118455
\n", - "

300 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " is.nbreak is.tfirst is.tfirst.pknown \\\n", - "measure name \n", - "practice_data_opioidome A81057 1 29.0 29.0 \n", - " A83037 0 NaN NaN \n", - " A88601 2 13.0 13.0 \n", - " A89022 0 NaN NaN \n", - " B81042 0 NaN NaN \n", - " B81119 1 9.0 9.0 \n", - " B81619 1 26.0 26.0 \n", - " B82024 1 10.0 10.0 \n", - " B83614 2 32.0 32.0 \n", - " B84014 0 NaN NaN \n", - " B85041 0 NaN NaN \n", - " B85051 0 NaN NaN \n", - " B85619 0 NaN NaN \n", - " B86004 0 NaN NaN \n", - " C81006 0 NaN NaN \n", - " C81054 2 39.0 39.0 \n", - " C82019 0 NaN NaN \n", - " C82042 0 NaN NaN \n", - " C82066 0 NaN NaN \n", - " C82079 0 NaN NaN \n", - " C82095 1 26.0 26.0 \n", - " C82627 0 NaN NaN \n", - " C82667 1 3.0 3.0 \n", - " C84034 1 38.0 38.0 \n", - " C84036 1 2.0 2.0 \n", - " C84103 2 14.0 14.0 \n", - " C85628 1 10.0 10.0 \n", - " C86015 2 30.0 30.0 \n", - " C86022 2 15.0 15.0 \n", - " C88023 0 NaN NaN \n", - "... ... ... ... \n", - "practice_data_opioidspercent J82075 0 NaN NaN \n", - " J82165 2 8.0 8.0 \n", - " K81068 2 19.0 19.0 \n", - " K81636 0 NaN NaN \n", - " K82603 2 21.0 21.0 \n", - " K84065 2 29.0 29.0 \n", - " L81644 1 5.0 5.0 \n", - " L84025 2 19.0 19.0 \n", - " L84068 2 17.0 17.0 \n", - " L85044 0 NaN NaN \n", - " M82620 0 NaN NaN \n", - " M83121 2 5.0 5.0 \n", - " M85078 1 3.0 3.0 \n", - " M85118 0 NaN NaN \n", - " M85671 6 2.0 2.0 \n", - " M85781 2 54.0 54.0 \n", - " M86612 1 15.0 15.0 \n", - " M88630 0 NaN NaN \n", - " M92042 0 NaN NaN \n", - " N82067 0 NaN NaN \n", - " N82108 0 NaN NaN \n", - " N85032 2 13.0 13.0 \n", - " N85634 2 3.0 3.0 \n", - " P81655 2 40.0 40.0 \n", - " P84026 2 9.0 9.0 \n", - " P85622 1 24.0 24.0 \n", - " P88606 0 NaN NaN \n", - " Y00352 0 NaN NaN \n", - " Y02622 2 45.0 45.0 \n", - " Y02636 2 29.0 29.0 \n", - "\n", - " is.tfirst.pknown.offs is.tfirst.offs \\\n", - "measure name \n", - "practice_data_opioidome A81057 NaN NaN \n", - " A83037 NaN NaN \n", - " A88601 13.0 13.0 \n", - " A89022 NaN NaN \n", - " B81042 NaN NaN \n", - " B81119 9.0 9.0 \n", - " B81619 26.0 26.0 \n", - " B82024 10.0 10.0 \n", - " B83614 44.0 44.0 \n", - " B84014 NaN NaN \n", - " B85041 NaN NaN \n", - " B85051 NaN NaN \n", - " B85619 NaN NaN \n", - " B86004 NaN NaN \n", - " C81006 NaN NaN \n", - " C81054 44.0 44.0 \n", - " C82019 NaN NaN \n", - " C82042 NaN NaN \n", - " C82066 NaN NaN \n", - " C82079 NaN NaN \n", - " C82095 26.0 26.0 \n", - " C82627 NaN NaN \n", - " C82667 3.0 3.0 \n", - " C84034 38.0 38.0 \n", - " C84036 2.0 2.0 \n", - " C84103 36.0 36.0 \n", - " C85628 10.0 10.0 \n", - " C86015 31.0 31.0 \n", - " C86022 15.0 15.0 \n", - " C88023 NaN NaN \n", - "... ... ... \n", - "practice_data_opioidspercent J82075 NaN NaN \n", - " J82165 27.0 27.0 \n", - " K81068 31.0 31.0 \n", - " K81636 NaN NaN \n", - " K82603 23.0 23.0 \n", - " K84065 29.0 29.0 \n", - " L81644 NaN NaN \n", - " L84025 NaN NaN \n", - " L84068 NaN NaN \n", - " L85044 NaN NaN \n", - " M82620 NaN NaN \n", - " M83121 5.0 5.0 \n", - " M85078 3.0 3.0 \n", - " M85118 NaN NaN \n", - " M85671 4.0 4.0 \n", - " M85781 54.0 54.0 \n", - " M86612 15.0 15.0 \n", - " M88630 NaN NaN \n", - " M92042 NaN NaN \n", - " N82067 NaN NaN \n", - " N82108 NaN NaN \n", - " N85032 NaN NaN \n", - " N85634 NaN NaN \n", - " P81655 40.0 40.0 \n", - " P84026 NaN NaN \n", - " P85622 24.0 24.0 \n", - " P88606 NaN NaN \n", - " Y00352 NaN NaN \n", - " Y02622 48.0 48.0 \n", - " Y02636 39.0 39.0 \n", + " is.tfirst.pknown.offs is.tfirst.offs \\\n", + "measure name \n", + "practice_data_opioidome A81057 NaN NaN \n", + " A83037 NaN NaN \n", + " A88601 13.0 13.0 \n", + " A89022 NaN NaN \n", + " B81042 NaN NaN \n", + " B81119 9.0 9.0 \n", + " B81619 26.0 26.0 \n", + " B82024 10.0 10.0 \n", + " B83614 44.0 44.0 \n", + " B84014 NaN NaN \n", + " B85041 NaN NaN \n", + " B85051 NaN NaN \n", + " B85619 NaN NaN \n", + " B86004 NaN NaN \n", + " C81006 NaN NaN \n", + " C81054 44.0 44.0 \n", + " C82019 NaN NaN \n", + " C82042 NaN NaN \n", + " C82066 NaN NaN \n", + " C82079 NaN NaN \n", + " C82095 26.0 26.0 \n", + " C82627 NaN NaN \n", + " C82667 3.0 3.0 \n", + " C84034 38.0 38.0 \n", + " C84036 2.0 2.0 \n", + " C84103 36.0 36.0 \n", + " C85628 10.0 10.0 \n", + " C86015 31.0 31.0 \n", + " C86022 15.0 15.0 \n", + " C88023 NaN NaN \n", + "... ... ... \n", + "practice_data_opioidspercent J82075 NaN NaN \n", + " J82165 27.0 27.0 \n", + " K81068 31.0 31.0 \n", + " K81636 NaN NaN \n", + " K82603 23.0 23.0 \n", + " K84065 29.0 29.0 \n", + " L81644 NaN NaN \n", + " L84025 NaN NaN \n", + " L84068 NaN NaN \n", + " L85044 NaN NaN \n", + " M82620 NaN NaN \n", + " M83121 5.0 5.0 \n", + " M85078 3.0 3.0 \n", + " M85118 NaN NaN \n", + " M85671 4.0 4.0 \n", + " M85781 54.0 54.0 \n", + " M86612 15.0 15.0 \n", + " M88630 NaN NaN \n", + " M92042 NaN NaN \n", + " N82067 NaN NaN \n", + " N82108 NaN NaN \n", + " N85032 NaN NaN \n", + " N85634 NaN NaN \n", + " P81655 40.0 40.0 \n", + " P84026 NaN NaN \n", + " P85622 24.0 24.0 \n", + " P88606 NaN NaN \n", + " Y00352 NaN NaN \n", + " Y02622 48.0 48.0 \n", + " Y02636 39.0 39.0 \n", "\n", " is.tfirst.big is.slope.ma \\\n", "measure name \n", @@ -1560,23 +538,1058 @@ " Y02636 -0.008234 -0.118455 \n", "\n", "[300 rows x 13 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is.nbreakis.tfirstis.tfirst.pknownis.tfirst.pknown.offsis.tfirst.offsis.tfirst.bigis.slope.mais.slope.ma.propis.slope.ma.prop.levis.intlev.initlevis.intlev.finallevis.intlev.levdis.intlev.levdprop
measurename
practice_data_opioidomeA81057129.029.0NaNNaN29.0382.0682000.0056290.53125067490.23376579716.416179-12226.182414-0.181155
A830370NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A88601213.013.013.013.013.0-12361.812484-0.3115380.77490952041.73910136089.14215615952.5969450.306535
A890220NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B810420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B8111919.09.09.09.09.0593.2104790.0210980.51923127523.56231658370.507229-30846.944914-1.120747
B81619126.026.026.026.026.0424.0998670.0106130.51428639534.54813554378.043484-14843.495349-0.375456
B82024110.010.010.010.010.0479.7535220.0063620.50980474924.43647199391.866112-24467.429641-0.326561
B83614232.032.044.044.044.0-568.787763-0.0394260.72281713817.81517011457.1025552360.7126150.170846
B840140NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B850410NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B850510NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B856190NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
B860040NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C810060NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C81054239.039.044.044.044.0-261.074522-0.0105580.62501923922.92481923505.218309417.7065110.017461
C820190NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820660NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C820790NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C82095126.026.026.026.026.0189.3727970.0092410.51428620302.42347926930.471366-6628.047888-0.326466
C826270NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
C8266713.03.03.03.03.0116.3770070.0046280.51724125030.27879031780.145175-6749.866385-0.269668
C84034138.038.038.038.038.0390.8000150.0082750.52173946837.94694255826.347292-8988.400350-0.191904
C8403612.02.02.02.02.0-263.270779-0.0036240.50847572901.75948557368.78349915532.9759860.213067
C84103214.014.036.036.036.0-701.880711-0.0311500.68106221875.55343618783.8498633091.7035720.141331
C85628110.010.010.010.010.0-162.735962-0.0063070.50980425963.69658417664.1624978299.5340870.319659
C86015230.030.031.031.031.0-399.383730-0.0051840.57710068523.60600265063.3422883460.2637150.050497
C86022215.015.015.015.015.03959.7430300.0720640.55751450988.03111158090.525890-7102.494779-0.139297
C880230NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.............................................
practice_data_opioidspercentJ820750NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
J8216528.08.027.027.08.0-0.007368-0.0327390.6838800.2324140.2216400.0107730.046355
K81068219.019.031.031.031.0-0.005371-0.0301840.5823960.1663700.1387030.0276660.166294
K816360NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
K82603221.021.023.023.023.0-0.004457-0.0188500.5405620.1577670.1412780.0164890.104514
K84065229.029.029.029.029.0-0.198854-0.8507600.5109780.4325920.0434280.3891640.899610
L8164415.05.0NaNNaN5.0-0.001525-0.0153600.5087720.1008430.0138910.0869520.862252
L84025219.019.0NaNNaN19.0-0.185777-3.0326990.8647890.2470350.0322120.2148230.869607
L84068217.017.0NaNNaN17.0-0.015393-0.0451540.5066750.3562930.0524900.3038040.852679
L850440NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M826200NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M8312125.05.05.05.05.0-0.029839-0.1697700.5042430.2197060.0421780.1775280.808026
M8507813.03.03.03.03.0-0.002478-0.0205100.5084750.123279-0.0229040.1461831.185788
M851180NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M8567162.02.04.04.011.00.1428571.90243946.681374-0.0018320.001229-0.0030601.670902
M85781254.054.054.054.054.00.0568880.9411415.4768820.0035580.013945-0.010387-2.919479
M86612115.015.015.015.015.0-0.011348-0.0222720.5106380.520876-0.0124820.5333581.023963
M886300NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
M920420NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N820670NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N821080NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
N85032213.013.0NaNNaN13.0-0.014965-0.0671820.5039330.2377130.1486250.0890880.374770
N8563423.03.0NaNNaN3.0-0.010103-0.0168250.5694930.6105840.5396220.0709620.116220
P81655240.040.040.040.040.00.0349650.9735580.8982460.0009500.039876-0.038926-40.988946
P8402629.09.0NaNNaN9.0-0.012740-0.0303765.6523400.4321460.4298920.0022540.005216
P85622124.024.024.024.024.0-0.007682-0.0302090.5263160.261974-0.0299390.2919141.114284
P886060NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Y003520NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Y02622245.045.048.048.048.0-0.043245-0.2167920.5384780.051206-0.1094120.1606183.136675
Y02636229.029.039.039.029.00.0205140.2278782.4915090.0695090.077742-0.008234-0.118455
\n", + "

300 rows × 13 columns

\n", + "
" ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], - "source": [ - "opioids.concatenate_outputs()" - ] + "metadata": { + "scrolled": true + } } ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.11 64-bit ('base': conda)" }, "language_info": { "codemirror_mode": { @@ -1588,9 +1601,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.8.11" + }, + "interpreter": { + "hash": "1f7c46737145f996545461311d61f0ce1c36e41df7ffe8325325ea1e61b99178" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From 11e1d82ad513e91f9378f9e1da71c46d0351f1a5 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 11:35:56 +0100 Subject: [PATCH 02/22] feat: example.py to run examples with logging --- examples/examples.py | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 examples/examples.py diff --git a/examples/examples.py b/examples/examples.py new file mode 100644 index 00000000..4d50e4d1 --- /dev/null +++ b/examples/examples.py @@ -0,0 +1,65 @@ + +import logging +import multiprocessing +import sys + +sys.path.append('../') + +from change_detection import functions as chg + +if __name__ == '__main__': + multiprocessing.log_to_stderr() + logger = multiprocessing.get_logger() + logger.setLevel(logging.INFO) + + csv_test = chg.ChangeDetection('csv_test', + csv_name='csv_test_file.csv') + + csv_test.run() + +# # %% [markdown] +# # ### Test single SQL query + +# # %% +# bq_test = chg.ChangeDetection('bq_test') +# bq_test.run() + +# # %% [markdown] +# # ### Single measure + +# # %% +# lp = chg.ChangeDetection('practice_data_lpfentanylir', +# measure=True) +# lp.run() + +# # %% [markdown] +# # ### Measures - low-priority CCG level + +# # %% +# from change_detection import * +# lp = chg.ChangeDetection('ccg_data_lp%', +# measure=True) +# lp.run() + +# # %% [markdown] +# # ### Measures - low-priority practice level + +# # %% +# lp = chg.ChangeDetection('practice_data_lp%', +# measure=True) +# lp.run() + +# # %% [markdown] +# # ### Measures - opioids practice level + +# # %% +# from change_detection import * +# opioids = chg.ChangeDetection('practice_data_opi%', +# measure=True) +# opioids.run() + + +# # %% +# opioids.concatenate_outputs() + + From c8550ac8d5cf8e21cc4181fa4ea3e8487dce93ee Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 11:47:33 +0100 Subject: [PATCH 03/22] feat: reporting process ID --- change_detection/functions.py | 6 ++++++ examples/examples.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index a88b6ac1..872619c0 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -6,6 +6,7 @@ import pandas as pd import numpy as np from ebmdatalab import bq +import sys ''' required R packages: @@ -337,6 +338,11 @@ def run(self): p1.start() p2 = Process(target = self.detect_change) p2.start() + + if self.verbose == True: + print("*** Process initiated: ", end='') + print( p2 ) + sys.stdout.flush() def concatenate_outputs(self, folder_suffix=''): assert self.measure, "Not to be used on single outputs" diff --git a/examples/examples.py b/examples/examples.py index 4d50e4d1..2d8c60ec 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -13,7 +13,9 @@ logger.setLevel(logging.INFO) csv_test = chg.ChangeDetection('csv_test', - csv_name='csv_test_file.csv') + verbose=True, + overwrite=True, + csv_name='csv_test_file.csv') csv_test.run() From df3e61fffd89d0e6abac6030d62c8e3074ecae08 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 13:30:11 +0100 Subject: [PATCH 04/22] fix: dealing with unexpected column names --- change_detection/functions.py | 61 ++++++++++++++++++++++++++++++----- examples/examples.py | 15 ++++++--- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 872619c0..ea5d9ab3 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -46,6 +46,10 @@ def __init__(self, sample=False, measure=False, custom_measure=False, + code_variable = 'code', + numerator_variable = 'numerator', + denominator_variable = 'denominator', + date_variable = 'month', direction='both', use_cache=True, csv_name='bq_cache.csv', @@ -60,6 +64,14 @@ def __init__(self, self.sample = sample self.measure = measure self.custom_measure = custom_measure + self.code_variable = code_variable + self.numerator_variable = numerator_variable + self.denominator_variable = denominator_variable + self.date_variable = date_variable + self.expected_columns = {"code": self.code_variable, + "month": self.date_variable, + "numerator": self.numerator_variable, + "denominator": self.denominator_variable} self.direction = direction self.use_cache = use_cache self.csv_name = csv_name @@ -155,6 +167,26 @@ def get_data(self): csv_path=csv_path, use_cache=self.use_cache) + def amend_column_names(self,df): + for expected_name, actual_name in self.expected_columns.items(): + if (expected_name != actual_name): + if (self.verbose): + print(f"Replacing column '{actual_name}' with expected column '{expected_name}'") + df[expected_name] = df[actual_name] + return df + + def check_column_names(self,df): + check_message = [] + + columns_missing = np.setdiff1d( + list(self.expected_columns.values()), + df.columns).tolist() + + for c in columns_missing: + check_message.append(f"!!! Expected column '{c}' is missing") + + return check_message + def shape_dataframe(self): ''' Returns data in a dataframe in the format needed for `r_detect()` @@ -169,6 +201,12 @@ def shape_dataframe(self): time.sleep(0.5) #time.sleep(3) input_df = pd.read_csv(csv_path) + input_df = self.amend_column_names(input_df) + column_check_message = self.check_column_names(input_df) + + if ( len( column_check_message ) > 0 ): + raise NameError( '\n'.join(column_check_message) ) + input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) ## R script requires this header format: @@ -313,16 +351,23 @@ def run_if_needed(self, out_path): self.concatenate_split_dfs() def detect_change(self): - if self.measure: - for measure_name in self.measure_list: - folder_name = os.path.join(self.name, measure_name) - self.working_dir = self.get_working_dir(folder_name) + + try: + if self.measure: + for measure_name in self.measure_list: + folder_name = os.path.join(self.name, measure_name) + self.working_dir = self.get_working_dir(folder_name) + out_path = os.path.join(self.working_dir, 'r_output.csv') + self.run_if_needed(out_path) + else: + self.working_dir = self.get_working_dir(self.name) out_path = os.path.join(self.working_dir, 'r_output.csv') self.run_if_needed(out_path) - else: - self.working_dir = self.get_working_dir(self.name) - out_path = os.path.join(self.working_dir, 'r_output.csv') - self.run_if_needed(out_path) + + except NameError as e: + print(f"Columns of {self.csv_name} are not as expected") + print(f"You may have to specify the column names using the numberator_variable and/or denominator_variable") + sys.stdout.flush() def clear(self): os.system( 'cls' ) diff --git a/examples/examples.py b/examples/examples.py index 2d8c60ec..65eb8e7f 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -12,11 +12,18 @@ logger = multiprocessing.get_logger() logger.setLevel(logging.INFO) - csv_test = chg.ChangeDetection('csv_test', - verbose=True, - overwrite=True, - csv_name='csv_test_file.csv') + # csv_test = chg.ChangeDetection('csv_test', + # verbose=True, + # overwrite=True, + # csv_name='csv_test_file.csv') + csv_test = chg.ChangeDetection('csv_test', + verbose=True, + numerator_variable="indicator_numerator", + denominator_variable="indicator_denominator", + overwrite=True, + csv_name='csv_test_file_column-fix-required.csv') + csv_test.run() # # %% [markdown] From 2cd1442f398f69285aecc636e965a25f593633b9 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:40:57 +0100 Subject: [PATCH 05/22] fix: checking date format of input file --- change_detection/functions.py | 29 ++++++++++++++++++++++++++--- examples/examples.py | 9 ++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index ea5d9ab3..14f26e54 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -201,12 +201,31 @@ def shape_dataframe(self): time.sleep(0.5) #time.sleep(3) input_df = pd.read_csv(csv_path) + + ############################################################# + ### Checking input formatting ### + ############################################################# + + ### If the user nas specified other column + ### names via the [code/date/numerator/denominator}_variable + ### arguments, then these are replaced with the expected + ### column names in amend_column_names. input_df = self.amend_column_names(input_df) + + ### If expected column names are still missing, an + ### exception will be thrown. column_check_message = self.check_column_names(input_df) - if ( len( column_check_message ) > 0 ): raise NameError( '\n'.join(column_check_message) ) + ### Check the format of the date + try: + pd.to_datetime(input_df['month'], + format="%Y-%m-%d", + errors='raise') + except ValueError as e: + raise ValueError( f"Field '{self.date_variable}' should be of the format YYYY-MM-DD" ) + input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) ## R script requires this header format: @@ -363,12 +382,16 @@ def detect_change(self): self.working_dir = self.get_working_dir(self.name) out_path = os.path.join(self.working_dir, 'r_output.csv') self.run_if_needed(out_path) - + except NameError as e: print(f"Columns of {self.csv_name} are not as expected") print(f"You may have to specify the column names using the numberator_variable and/or denominator_variable") sys.stdout.flush() - + + except ValueError as e: + print( e ) + sys.stdout.flush() + def clear(self): os.system( 'cls' ) diff --git a/examples/examples.py b/examples/examples.py index 65eb8e7f..96a15876 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -17,12 +17,19 @@ # overwrite=True, # csv_name='csv_test_file.csv') + # csv_test = chg.ChangeDetection('csv_test', + # verbose=True, + # numerator_variable="indicator_numerator", + # denominator_variable="indicator_denominator", + # overwrite=True, + # csv_name='csv_test_file_column-fix-required.csv') + csv_test = chg.ChangeDetection('csv_test', verbose=True, numerator_variable="indicator_numerator", denominator_variable="indicator_denominator", overwrite=True, - csv_name='csv_test_file_column-fix-required.csv') + csv_name='csv_test_file_column-fix-required+date-format.csv') csv_test.run() From 245d12d0a65b21c165c114971a3df44952822332 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:54:38 +0100 Subject: [PATCH 06/22] feat: incoming date format can be specified --- change_detection/functions.py | 7 +++++-- examples/examples.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 14f26e54..f215d836 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -50,6 +50,7 @@ def __init__(self, numerator_variable = 'numerator', denominator_variable = 'denominator', date_variable = 'month', + date_format = "%Y-%m-%d", direction='both', use_cache=True, csv_name='bq_cache.csv', @@ -68,6 +69,7 @@ def __init__(self, self.numerator_variable = numerator_variable self.denominator_variable = denominator_variable self.date_variable = date_variable + self.date_format = date_format self.expected_columns = {"code": self.code_variable, "month": self.date_variable, "numerator": self.numerator_variable, @@ -221,10 +223,10 @@ def shape_dataframe(self): ### Check the format of the date try: pd.to_datetime(input_df['month'], - format="%Y-%m-%d", + format=self.date_format, errors='raise') except ValueError as e: - raise ValueError( f"Field '{self.date_variable}' should be of the format YYYY-MM-DD" ) + raise ValueError( f"Field '{self.date_variable}' is not of the required format '{self.date_format}'" ) input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) @@ -390,6 +392,7 @@ def detect_change(self): except ValueError as e: print( e ) + print("Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") sys.stdout.flush() def clear(self): diff --git a/examples/examples.py b/examples/examples.py index 96a15876..0eb86528 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -28,6 +28,7 @@ verbose=True, numerator_variable="indicator_numerator", denominator_variable="indicator_denominator", + date_format="%d.%m.%Y", overwrite=True, csv_name='csv_test_file_column-fix-required+date-format.csv') From 266c81f2fc9406e4ae734aa5ffedefd5fd36373b Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:55:14 +0100 Subject: [PATCH 07/22] bump to last commit --- examples/examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/examples.py b/examples/examples.py index 0eb86528..cec0a026 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -28,7 +28,7 @@ verbose=True, numerator_variable="indicator_numerator", denominator_variable="indicator_denominator", - date_format="%d.%m.%Y", + date_format="%d.%m.%y", overwrite=True, csv_name='csv_test_file_column-fix-required+date-format.csv') From 90882b738185bee8b4f8ca72a1874d8b26bd4e12 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:56:39 +0100 Subject: [PATCH 08/22] tidy: editing comments --- change_detection/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index f215d836..d852fb80 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -220,7 +220,8 @@ def shape_dataframe(self): if ( len( column_check_message ) > 0 ): raise NameError( '\n'.join(column_check_message) ) - ### Check the format of the date + ### Check the format of the date - reformat to requested + ### date format try: pd.to_datetime(input_df['month'], format=self.date_format, From 6e47899d157fb6e470c8dda1bb35ef5789cf4a82 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:57:16 +0100 Subject: [PATCH 09/22] bump to last commit --- change_detection/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index d852fb80..1d9e81b8 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -221,7 +221,7 @@ def shape_dataframe(self): raise NameError( '\n'.join(column_check_message) ) ### Check the format of the date - reformat to requested - ### date format + ### date format. try: pd.to_datetime(input_df['month'], format=self.date_format, From ee679f8207d350b4aa8460a817b72f7e8f9f796b Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:09:10 +0100 Subject: [PATCH 10/22] chore: improved info/error messages --- change_detection/functions.py | 17 +++++++++-------- examples/examples.py | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 1d9e81b8..c92052ae 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -173,7 +173,7 @@ def amend_column_names(self,df): for expected_name, actual_name in self.expected_columns.items(): if (expected_name != actual_name): if (self.verbose): - print(f"Replacing column '{actual_name}' with expected column '{expected_name}'") + print(f"[INFO] Replacing column '{actual_name}' with expected column '{expected_name}'") df[expected_name] = df[actual_name] return df @@ -185,8 +185,8 @@ def check_column_names(self,df): df.columns).tolist() for c in columns_missing: - check_message.append(f"!!! Expected column '{c}' is missing") - + check_message.append(f"[ERROR] Expected column '{c}' is missing.") + return check_message def shape_dataframe(self): @@ -227,7 +227,7 @@ def shape_dataframe(self): format=self.date_format, errors='raise') except ValueError as e: - raise ValueError( f"Field '{self.date_variable}' is not of the required format '{self.date_format}'" ) + raise ValueError( f"[ERROR] Field '{self.date_variable}' is not of the required format '{self.date_format}'" ) input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) @@ -387,13 +387,14 @@ def detect_change(self): self.run_if_needed(out_path) except NameError as e: - print(f"Columns of {self.csv_name} are not as expected") - print(f"You may have to specify the column names using the numberator_variable and/or denominator_variable") + print( e ) + print(f"[ERROR] Columns of {self.csv_name} are not as expected") + print(f"[ERROR] Specify the column names using the numberator_variable and/or denominator_variable") sys.stdout.flush() except ValueError as e: print( e ) - print("Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") + print("[ERROR] Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") sys.stdout.flush() def clear(self): @@ -412,7 +413,7 @@ def run(self): p2.start() if self.verbose == True: - print("*** Process initiated: ", end='') + print("\n[INFO] Process initiated: ", end='') print( p2 ) sys.stdout.flush() diff --git a/examples/examples.py b/examples/examples.py index cec0a026..0eb86528 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -28,7 +28,7 @@ verbose=True, numerator_variable="indicator_numerator", denominator_variable="indicator_denominator", - date_format="%d.%m.%y", + date_format="%d.%m.%Y", overwrite=True, csv_name='csv_test_file_column-fix-required+date-format.csv') From 669221c32a7d0f342450d42637542295bb3a6f6f Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:14:49 +0100 Subject: [PATCH 11/22] fix: accommodating integer 'code' values --- change_detection/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index c92052ae..1208b6ae 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -232,7 +232,7 @@ def shape_dataframe(self): input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) ## R script requires this header format: - input_df['code'] = 'ratio_quantity.' + input_df['code'] + input_df['code'] = 'ratio_quantity.' + input_df['code'].apply(str) input_df = input_df.set_index(['month', 'code']) ## drop small numbers From 87ccce18314462babea1536321ed8528ba8eed37 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:36:45 +0100 Subject: [PATCH 12/22] fix: capturing reformatted date --- change_detection/functions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 1208b6ae..b4c0bee0 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -223,9 +223,10 @@ def shape_dataframe(self): ### Check the format of the date - reformat to requested ### date format. try: - pd.to_datetime(input_df['month'], - format=self.date_format, - errors='raise') + date_tmp = pd.to_datetime(input_df['month'], + format=self.date_format, + errors='raise') + input_df['month'] = pd.to_datetime( date_tmp, format="%Y-%m-%d" ) except ValueError as e: raise ValueError( f"[ERROR] Field '{self.date_variable}' is not of the required format '{self.date_format}'" ) From 56fbd7d948258eb8eef34d93a21f7c47d01d8729 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:37:27 +0100 Subject: [PATCH 13/22] fix: removing additional data --- change_detection/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/change_detection/functions.py b/change_detection/functions.py index b4c0bee0..800cd973 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -230,6 +230,9 @@ def shape_dataframe(self): except ValueError as e: raise ValueError( f"[ERROR] Field '{self.date_variable}' is not of the required format '{self.date_format}'" ) + ### Retain only those columns that we're expecting. + input_df = input_df[list(self.expected_columns.keys())] + input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) ## R script requires this header format: From 3137e96f8152016b004ee205ca7baa3eec61c086 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:44:06 +0100 Subject: [PATCH 14/22] chore: improved info/error messages --- change_detection/functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 800cd973..ace5e5ff 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -184,8 +184,8 @@ def check_column_names(self,df): list(self.expected_columns.values()), df.columns).tolist() - for c in columns_missing: - check_message.append(f"[ERROR] Expected column '{c}' is missing.") + if ( len( columns_missing ) > 0 ): + check_message.append(f"[ERROR] Expected columns missing: {', '.join( columns_missing )}") return check_message @@ -392,13 +392,13 @@ def detect_change(self): except NameError as e: print( e ) - print(f"[ERROR] Columns of {self.csv_name} are not as expected") - print(f"[ERROR] Specify the column names using the numberator_variable and/or denominator_variable") + print(f" Columns of {self.csv_name} are not as expected") + print(f" Specify the column names using the [code/date/numerator/denominator]_variable parameter(s)") sys.stdout.flush() except ValueError as e: print( e ) - print("[ERROR] Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") + print(" Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") sys.stdout.flush() def clear(self): From 4ed669e6ccba77cdc1c12872dab5f81d3d1f1057 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:45:26 +0100 Subject: [PATCH 15/22] feat: adding another example --- examples/examples.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/examples/examples.py b/examples/examples.py index 0eb86528..08d00d18 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -24,13 +24,25 @@ # overwrite=True, # csv_name='csv_test_file_column-fix-required.csv') + # csv_test = chg.ChangeDetection('csv_test', + # verbose=True, + # numerator_variable="indicator_numerator", + # denominator_variable="indicator_denominator", + # date_format="%d.%m.%y", + # overwrite=True, + # csv_name='csv_test_file_column-fix-required+date-format.csv') + csv_test = chg.ChangeDetection('csv_test', verbose=True, - numerator_variable="indicator_numerator", - denominator_variable="indicator_denominator", - date_format="%d.%m.%Y", + code_variable="practice", + numerator_variable="indicator_a_numerator", + denominator_variable="indicator_a_denominator", + date_variable="date", + #ßdate_format="%Y-%m-%d", overwrite=True, - csv_name='csv_test_file_column-fix-required+date-format.csv') + csv_name='measure_indicator_a_rate.csv') + + csv_test.run() From 099abde9d2fb586f9a55127556d0c9d6f4cd8f3d Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Wed, 25 Aug 2021 15:45:59 +0100 Subject: [PATCH 16/22] bump to previous commit --- examples/examples.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/examples.py b/examples/examples.py index 08d00d18..fb33b497 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -38,12 +38,10 @@ numerator_variable="indicator_a_numerator", denominator_variable="indicator_a_denominator", date_variable="date", - #ßdate_format="%Y-%m-%d", + date_format="%Y-%m-%d", overwrite=True, csv_name='measure_indicator_a_rate.csv') - - csv_test.run() # # %% [markdown] From 436ddb040b0ee1dae0cd9a9108b16692a5391b2c Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 13:59:43 +0100 Subject: [PATCH 17/22] feat: param for directory + file existance check --- change_detection/functions.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index ace5e5ff..80fdb1d6 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -31,6 +31,7 @@ def run_r_script(path): command = 'Rscript' path2script = os.path.join(os.getcwd(), path) cmd = [command, path2script] + return subprocess.call(cmd) @@ -51,6 +52,8 @@ def __init__(self, denominator_variable = 'denominator', date_variable = 'month', date_format = "%Y-%m-%d", + base_dir = os.getcwd(), + data_subdir = 'data', direction='both', use_cache=True, csv_name='bq_cache.csv', @@ -74,6 +77,8 @@ def __init__(self, "month": self.date_variable, "numerator": self.numerator_variable, "denominator": self.denominator_variable} + self.base_dir = base_dir + self.data_subdir = data_subdir self.direction = direction self.use_cache = use_cache self.csv_name = csv_name @@ -83,7 +88,7 @@ def __init__(self, def get_working_dir(self, folder): folder_name = folder.replace('%', '') - return os.path.join(os.getcwd(), 'data', folder_name) + return os.path.join(self.base_dir, self.data_subdir, folder_name) def create_dir(self, dir_path): os.makedirs(dir_path, exist_ok=True) @@ -109,11 +114,12 @@ def get_measure_list(self): csv_path=csv_path, use_cache=self.use_cache ) + return measure_list['table_id'] def get_custom_measure_list(self): return [entry.name.split('.',1)[0] for entry - in os.scandir('data/measure_sql/{name}'.format(name=self.name)) + in os.scandir('{subdir}/measure_sql/{name}'.format(name=self.name,subdir=self.data_subdir)) if entry.name.endswith('.sql')] def get_measure_query(self, measure_name): @@ -162,6 +168,7 @@ def get_data(self): use_cache=self.use_cache) else: get_data_dir = self.get_working_dir(self.name) + self.create_dir(get_data_dir) query = self.get_custom_query() csv_path = os.path.join(get_data_dir, self.csv_name) @@ -279,7 +286,7 @@ def shape_dataframe(self): def run_r_script(self, i, script_name, input_name, output_name, *args): ''' - have reduced outputs (a bit faster that way) - - for debugging purposes use `verbose` argument" + - for debugging purposes use `verbose` argument" ''' ## Define R command command = 'Rscript' @@ -322,7 +329,7 @@ def r_detect(self): df = pd.DataFrame(item) df.to_csv(os.path.join(self.working_dir, input_name)) - + process = self.run_r_script(i, script_name, input_name, @@ -379,6 +386,12 @@ def run_if_needed(self, out_path): def detect_change(self): try: + if self.csv_name != 'bq_cache.csv': + self.working_dir = self.get_working_dir(self.name) + csv_path = os.path.join(self.working_dir, self.csv_name) + if not os.path.isfile(csv_path): + raise FileNotFoundError(f"[ERROR] File {csv_path} does not exist") + if self.measure: for measure_name in self.measure_list: folder_name = os.path.join(self.name, measure_name) @@ -401,6 +414,11 @@ def detect_change(self): print(" Specify the date format using the date_format parameter (by default this is '%Y-%m-%d')") sys.stdout.flush() + except FileNotFoundError as e: + print( e ) + print(" Check the name and location of the input .csv file") + sys.stdout.flush() + def clear(self): os.system( 'cls' ) From b2184526d377de75cdfcaad329dad2b23cb54ce8 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 14:03:48 +0100 Subject: [PATCH 18/22] feat: reporting of R script command (when verbose) --- change_detection/functions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/change_detection/functions.py b/change_detection/functions.py index 80fdb1d6..d3f90542 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -299,6 +299,10 @@ def run_r_script(self, i, script_name, input_name, output_name, *args): for arg in args: arguments.append(arg) + if ( self.verbose ): + print( f"[INFO/R command] [{' '.join(cmd)} {' '.join(arguments)}]") + sys.stdout.flush() + ## run the command if i == 0: if self.verbose: From bbdf57b36998f20b097d2a89ab35eb6ba2638457 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 14:05:48 +0100 Subject: [PATCH 19/22] bug: adding directory creation for CSV files --- change_detection/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/change_detection/functions.py b/change_detection/functions.py index d3f90542..677a0bc0 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -435,6 +435,9 @@ def run(self): if self.csv_name == 'bq_cache.csv': p1 = Process(target = self.get_data) p1.start() + else: + get_data_dir = self.get_working_dir(self.name) + self.create_dir(get_data_dir) p2 = Process(target = self.detect_change) p2.start() From 916c91e2fddb268b0d1d83ea981cddc471cb7893 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 14:30:56 +0100 Subject: [PATCH 20/22] feat: handling invalid draw_figures values --- change_detection/functions.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/change_detection/functions.py b/change_detection/functions.py index 677a0bc0..73ab03d1 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -83,9 +83,14 @@ def __init__(self, self.use_cache = use_cache self.csv_name = csv_name self.overwrite = overwrite - self.draw_figures = draw_figures + self.draw_figures = self._draw_figures_check(draw_figures) self.bq_folder = bq_folder + def _draw_figures_check(self, v): + if not (v in ['yes','no'] ): + raise Exception("The 'draw_figures' parameter should be 'yes' or 'no'") + return(v) + def get_working_dir(self, folder): folder_name = folder.replace('%', '') return os.path.join(self.base_dir, self.data_subdir, folder_name) From aff14969bcfba1b9b4014b027a435c3b08f320a2 Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 16:19:15 +0100 Subject: [PATCH 21/22] feat: message when figure dir is created --- change_detection/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/change_detection/functions.py b/change_detection/functions.py index 73ab03d1..c27bef11 100644 --- a/change_detection/functions.py +++ b/change_detection/functions.py @@ -97,6 +97,9 @@ def get_working_dir(self, folder): def create_dir(self, dir_path): os.makedirs(dir_path, exist_ok=True) + if ( self.verbose == True ): + print( f"[INFO] Creating 'figures' directory in {dir_path}" ) + sys.stdout.flush() os.makedirs(os.path.join(dir_path, 'figures'), exist_ok=True) def get_measure_list(self): From b7b3c6c74793ff5fb384524861f553387f64fbbe Mon Sep 17 00:00:00 2001 From: Lisa Hopcroft <54442530+LisaHopcroft@users.noreply.github.com> Date: Thu, 26 Aug 2021 16:20:34 +0100 Subject: [PATCH 22/22] docs: updated examples to demo new features --- examples/examples.py | 167 +++++++++++++++++++++++++------------------ 1 file changed, 98 insertions(+), 69 deletions(-) diff --git a/examples/examples.py b/examples/examples.py index fb33b497..b1fc511e 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -3,8 +3,8 @@ import multiprocessing import sys +### Importing the local change_detection functions sys.path.append('../') - from change_detection import functions as chg if __name__ == '__main__': @@ -12,27 +12,60 @@ logger = multiprocessing.get_logger() logger.setLevel(logging.INFO) - # csv_test = chg.ChangeDetection('csv_test', - # verbose=True, - # overwrite=True, - # csv_name='csv_test_file.csv') - - # csv_test = chg.ChangeDetection('csv_test', - # verbose=True, - # numerator_variable="indicator_numerator", - # denominator_variable="indicator_denominator", - # overwrite=True, - # csv_name='csv_test_file_column-fix-required.csv') - - # csv_test = chg.ChangeDetection('csv_test', - # verbose=True, - # numerator_variable="indicator_numerator", - # denominator_variable="indicator_denominator", - # date_format="%d.%m.%y", - # overwrite=True, - # csv_name='csv_test_file_column-fix-required+date-format.csv') - - csv_test = chg.ChangeDetection('csv_test', + ################################################################# + ################################################################# + ### Examples of analyses where .csv files are provided ########## + ################################################################# + ################################################################# + + ### A ChangeDetection analysis informed by data provided in the file + ### csv_test/csv_test_file.csv. To draw figures from the analysis, + ### uncomment the draw_figures line; figures will be saved to a + ### figures directory in the csv_test/figures directory. + csv_test_1 = chg.ChangeDetection('csv_test', + # draw_figures='yes', + csv_name='csv_test_file.csv') + csv_test_1.run() + + ### A ChangeDetection analysis informed by data provided in the file + ### csv_test/csv_test_file_column-fix-required.csv. The column names + ### in the file are not as expected so the numerator_variable and + ### denominator_variable are provided so as to identify the columns + ### of interest. Analyses are carried out, even if the output files + ### already exist (overwrite=True) and additional information is printed + ### to stdout (verbose=True). + csv_test_2 = chg.ChangeDetection('csv_test', + verbose=True, + numerator_variable="indicator_numerator", + denominator_variable="indicator_denominator", + overwrite=True, + csv_name='csv_test_file_column-fix-required.csv') + csv_test_2.run() + + ### A ChangeDetection analysis informed by data provided in the file + ### csv_test/csv_test_file_column-fix-required+date-format.csv. The column + ### names in the file are provided as shown in the previous example. In + ### addition, the date format in the csv file varies from what is expected; + ### to accommodate that, the existing format is provided, which allows + ### conversion. Other parameters (verbose=True and overwrite=True) are + ### as in the previous example. + csv_test_3 = chg.ChangeDetection('csv_test', + verbose=True, + numerator_variable="indicator_numerator", + denominator_variable="indicator_denominator", + date_format="%d.%m.%y", + overwrite=True, + csv_name='csv_test_file_column-fix-required+date-format.csv') + csv_test_3.run() + + ### A ChangeDetection analysis informed by data provided in the file + ### pincer-test/measure_indicator_a_rate.csv. Other parameters explained + ### in previous examples. This file contains more columns than are required + ### but the ChangeDetection funtions are able to handle that. If the inputs + ### and outputs are in a separate place, these can be specified by the + ### base_dir and data_subdir parameters (note that the base_dir needs to be + ### provided as an absolute reference, not a relative reference). + measure_indicator = chg.ChangeDetection('pincer-test', verbose=True, code_variable="practice", numerator_variable="indicator_a_numerator", @@ -40,53 +73,49 @@ date_variable="date", date_format="%Y-%m-%d", overwrite=True, + draw_figures='yes', + #base_dir = '/place/to/directory', + #data_subdir='dirname', csv_name='measure_indicator_a_rate.csv') - - csv_test.run() - -# # %% [markdown] -# # ### Test single SQL query - -# # %% -# bq_test = chg.ChangeDetection('bq_test') -# bq_test.run() - -# # %% [markdown] -# # ### Single measure - -# # %% -# lp = chg.ChangeDetection('practice_data_lpfentanylir', -# measure=True) -# lp.run() - -# # %% [markdown] -# # ### Measures - low-priority CCG level - -# # %% -# from change_detection import * -# lp = chg.ChangeDetection('ccg_data_lp%', -# measure=True) -# lp.run() - -# # %% [markdown] -# # ### Measures - low-priority practice level - -# # %% -# lp = chg.ChangeDetection('practice_data_lp%', -# measure=True) -# lp.run() - -# # %% [markdown] -# # ### Measures - opioids practice level - -# # %% -# from change_detection import * -# opioids = chg.ChangeDetection('practice_data_opi%', -# measure=True) -# opioids.run() - - -# # %% -# opioids.concatenate_outputs() + measure_indicator.run() + + ################################################################# + ################################################################# + ### Examples of analyses where data are obtained via queries #### + ################################################################# + ################################################################# + + ### A ChangeDetection analysis informed by data obtained via + ### an SQL query to ebmdatalab (access is managed via the + ### ebmdatalab module. The items of interested are defined using + ### the name parameter, with wildcards as used in SQL (eg below, + ### all objects that match ccg_data_lpa.* will be analysed). + ccg_data = chg.ChangeDetection(name='ccg_data_lpa%', + measure=True, + overwrite=True, + verbose=True) + ccg_data.run() + + ### Where multiple objects are analysed, the data can be + ### concatenated, but only after the processes have completed. + ### The command for this is provided below. + # ccg_data_ALL = ccg_data.concatenate_outputs() + + ### A ChangeDetection analysis informed by data obtained via + ### an SQL query to ebmdatalab (access is managed via the + ### ebmdatalab module. This query will look at a specific + ### object - practice_data_lpfentanylir - as no wildcards are + ### provided. + lpfentanylir_data = chg.ChangeDetection('practice_data_lpfentanylir', + measure=True, + verbose=True) + lpfentanylir_data.run() + + ### Here is another example, but it takes a very long time + ### to run! + # opioids = chg.ChangeDetection('practice_data_opi%', + # measure=True, + # verbose=True) + # opioids.run()