I am running into an issue. The ReportDate field is something that comes with the csv pull but that is not what I want used on the X-axis. I want DataDate = ReportDate - 1
I am getting the following error:
I am getting the following error:
Error:Traceback (most recent call last):
at block 1, line 107
at block 1, line 91, in plot_county_7DAvg(df, title, size)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/_decorators.pyline 46, in inner_f(*args, **kwargs)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/relational.pyline 692, in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/relational.pyline 367, in __init__(self, data, variables, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/_core.pyline 605, in __init__(self, data, variables)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/_core.pyline 668, in assign_variables(self, data, variables)
at /opt/python/envs/default/lib/python3.8/site-packages/seaborn/_core.pyline 903, in _assign_variables_longform(self, data, **kwargs)
ValueError: Could not interpret value `DataDate` for parameter `x`
The full code is here: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# !/usr/bin/env python author = "Michael Brown" license = "Based off of sript by Sreenivas Bhattiprolu of Python for Microscopists" import pandas as pd import datetime as dt from matplotlib import pyplot as plt from datetime import timedelta from warnings import simplefilter simplefilter(action = "ignore" , category = pd.errors.PerformanceWarning) # =================================================================== # Example of Calcuation for Case Rate # =================================================================== # CVD['Daily100k'] = (CVD['DailyCases'] / statePopulation) * 100000 # CVD['100k7D'] = CVD['Daily100k'].rolling(window=7).mean() #Creating new data frame AACounty = pd.DataFrame() AACounty[ 'ReportDate' ] = CVD[ 'ReportDate' ] AACounty[ 'DataDate' ] = CVD[ 'ReportDate' ] #converting Report Date to string AACounty[ 'ReportDate' ] = AACounty[ 'ReportDate' ].astype( 'str' ) AACounty[ 'DataDate' ] = AACounty[ 'DataDate' ].astype( 'str' ) #getting the date portion of the string AACounty[ 'ReportDate' ] = AACounty.ReportDate. str . slice ( 0 , 10 ) AACounty[ 'DataDate' ] = AACounty.DataDate. str . slice ( 0 , 10 ) #converting string to date format AACounty[ 'ReportDate' ] = [dt.datetime.strptime(x, '%m/%d/%Y' ) for x in AACounty[ 'ReportDate' ]] AACounty[ 'DataDate' ] = [dt.datetime.strptime(x, '%m/%d/%Y' ) for x in AACounty[ 'DataDate' ]] #adding new column for data date vs reported date - data date is 1 day prior to report date AACounty[ 'DataDate' ] = AACounty[ 'DataDate' ] - timedelta(days = 1 ) AnneArundelPopulationDecennial2020 = 588261 AnneArundelPOpulationACS2020 = 575421 AnneArundelPopulationACS2019 = 571275 AnneArundelPopulationPEP2019 = 579234 AnneArundelPopulationCounty = 579630 #Anne Arundel AACounty[ 'AnneArundel' ] = CVD[ 'ANNE' ] AACounty[ 'AADailyCases' ] = AACounty[ 'AnneArundel' ].diff() AACounty[ 'AA7Day' ] = AACounty[ 'AADailyCases' ].rolling(window = 7 ).mean() AACounty[ 'AADaily100k2020Census' ] = (AACounty[ 'AADailyCases' ] / AnneArundelPopulationDecennial2020) * 100000 AACounty[ 'AA100k7D-2020Census' ] = AACounty[ 'AADaily100k2020Census' ].rolling(window = 7 ).mean() AACounty[ 'AACounty100k2020ACS' ] = (AACounty[ 'AADailyCases' ] / AnneArundelPOpulationACS2020) * 100000 AACounty[ 'AA100k7D-2020ACS' ] = AACounty[ 'AACounty100k2020ACS' ].rolling(window = 7 ).mean() AACounty[ 'AADaily100k2019ACS' ] = (AACounty[ 'AADailyCases' ] / AnneArundelPopulationACS2019) * 100000 AACounty[ 'AA100k7D-2019ACS' ] = AACounty[ 'AADaily100k2019ACS' ].rolling(window = 7 ).mean() AACounty[ 'AADaily100k2019PEP' ] = (AACounty[ 'AADailyCases' ] / AnneArundelPopulationPEP2019) * 100000 AACounty[ 'AA100k7D-2019PEP' ] = AACounty[ 'AADaily100k2019PEP' ].rolling(window = 7 ).mean() AACounty[ 'AADaily100kCounty' ] = (AACounty[ 'AADailyCases' ] / AnneArundelPopulationCounty) * 100000 AACounty[ 'AA100k7D-County' ] = AACounty[ 'AADaily100kCounty' ].rolling(window = 7 ).mean() # Saving Data to CSV print (AACounty.dtypes) AACounty.to_csv( 'AnneArundel.csv' ) #================================================================================= # 7-Day Running Average - Daily Cases #================================================================================= def plot_county_7DAvg(df, title = '7-Day Case Count' , size = 1 ): #================================================================================= # 7-Day Running Average - Daily Cases - All Counties #================================================================================= f, ax = plt.subplots( 1 , 1 , figsize = ( 4 * size, 2 * size)) g = sns.lineplot(x = "DataDate" , y = "AA7Day" , data = df, color = 'red' , label = "Anne Arundel" ) plt.legend(loc = 'upper left' ) plt.xlabel( 'Date' ) plt.ylabel( ' 7-Day Average ' ) plt.xticks(rotation = 90 ) plt.title( f ' {title} ' ) ax.grid(color = 'black' , linestyle = 'dotted' , linewidth = 0.75 ) plt.savefig( f '{title}.png' ) plt.show() cvd_case_rate_aggregate = AACounty.groupby([ 'ReportDate' ]). sum ().reset_index() sevenDayAverage60day = cvd_case_rate_aggregate.iloc[ - 60 :] sevenDayAverage30day = cvd_case_rate_aggregate.iloc[ - 30 :] sevenDayAverage7day = cvd_case_rate_aggregate.iloc[ - 7 :] plot_county_7DAvg(cvd_case_rate_aggregate, 'County Aggregate 7-Day Running Average' , size = 4 ) plot_county_7DAvg(sevenDayAverage60day, 'County Past 60 Days 7-Day Running Average' , size = 4 ) plot_county_7DAvg(sevenDayAverage30day, 'County Past 30 Days 7-Day Running Average' , size = 4 ) plot_county_7DAvg(sevenDayAverage7day, 'County Past 7 Days 7-Day Running Average' , size = 4 ) def plot_aacounty_7D100kAvg(df, title = 'Aggregate Anne Arundel County Case Rate' , size = 1 ): #================================================================================= # 7-Day Running Average - Case Rate - Anne Arundel County #================================================================================= f, ax = plt.subplots( 1 , 1 , figsize = ( 5 * size, 3 * size)) g = sns.lineplot(x = "ReportDate" , y = "AA100k7D-2020Census" , data = df, color = 'black' , linewidth = 3.0 , label = "Anne Arundel Decennial 2020" ) g = sns.lineplot(x = "ReportDate" , y = "AA100k7D-2020ACS" , data = df, color = 'darkgreen' , linewidth = 2.0 , label = "Anne Arundel ACS 2020" ) g = sns.lineplot(x = "ReportDate" , y = "AA100k7D-2019PEP" , data = df, color = 'darkblue' , linewidth = 3.0 , label = "Anne Arundel PEP 2019 Estimate" ) g = sns.lineplot(x = "ReportDate" , y = "AA100k7D-2019ACS" , data = df, color = 'darkmagenta' , linewidth = 2.0 , label = "Anne Arundel ACS 2019 Estimate" ) g = sns.lineplot(x = "ReportDate" , y = "AA100k7D-County" , data = df, color = 'red' , linewidth = 2.0 , label = "Anne Arundel Estimate" ) #g = sns.lineplot(x="ReportDate", y="AA100k7D", data=df, color='green', label="Anne Arundel") plt.rcParams[ "figure.figsize" ] = ( 200 , 100 ) plt.xlabel( 'Date' ) plt.ylabel( 'Cases per 100k' ) plt.xticks(rotation = 90 ) plt.title( f ' {title} ' ) ax.grid(color = 'black' , linestyle = 'dotted' , linewidth = 0.75 ) plt.savefig( f '{title}.png' ) plt.show() aasevenday100kcvd_case_rate_aggregate = AACounty.groupby([ 'ReportDate' ]). sum ().reset_index() aasevenday100ksevenDayAverage60day = aasevenday100kcvd_case_rate_aggregate.iloc[ - 60 :] aasevenday100ksevenDayAverage30day = aasevenday100kcvd_case_rate_aggregate.iloc[ - 30 :] aasevenday100ksevenDayAverage7day = aasevenday100kcvd_case_rate_aggregate.iloc[ - 7 :] plot_aacounty_7D100kAvg(aasevenday100kcvd_case_rate_aggregate, 'County Case Rate Aggregate 7-Day Running Average' , size = 4 ) plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage60day, 'County Case Rate Past 60 Days 7-Day Running Average' , size = 4 ) plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage30day, 'County Case Rate Past 30 Days 7-Day Running Average' , size = 4 ) plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage7day, 'County Case Rate Past 7 Days 7-Day Running Average' , size = 4 ) |