May-30-2021, 11:29 PM
I have the following code. It is grahing the aggregate case rate but I also want to do only the past 60 days. It produces 2 graphs. They are the same. I am confused on what I a doing wrong. Any suggestions would be helpful
#!/usr/bin/env python __author__ = "Michael Brown" import pandas as pd import datetime as dt from matplotlib import pyplot as plt import seaborn as sns statePopulation=6045680 CVD = pd.read_csv('https://opendata.arcgis.com/datasets/18582de727934249b92c52542395a3bf_0.csv') #Adding additional columns CVD['ndate'] = CVD['DATE']+'00' CVD['DailyCases'] = CVD['Count_'].diff() CVD['7Day'] = CVD['DailyCases'].rolling(window=7).mean() CVD['Daily100k'] = (CVD['DailyCases'] / statePopulation) * 100000 CVD['100k7D'] = CVD['Daily100k'].rolling(window=7).mean() #print(CVD.head()) #print(CVD.dtypes) #Convert string value of date to datetime format CVD['ndate'] = [dt.datetime.strptime(x,'%Y/%m/%d %H:%M:%S%z') for x in CVD['ndate']] #print (CVD.dtypes) #checking for missing data print(CVD.isnull().sum()) #Change Column titles to something more appropriate CVD.columns = ['ObjectID', 'OldDate', 'Count', 'Date', 'DailyCases', '7Day', 'Daily100k', '100k7D'] #Creating new data frame for past 60 days CVDpast60days = CVD.iloc[-60:] #Group them by date CVD = pd.DataFrame(CVD.groupby(['Date'])['DailyCases', '7Day', 'Daily100k','100k7D'].sum()).reset_index() #Group them by date CVDpast60days = pd.DataFrame(CVD.groupby(['Date'])['DailyCases', '7Day', 'Daily100k','100k7D'].sum()).reset_index() #print(CVD) #================================================================================= # Aggregate Case Rate #================================================================================= def plot_state_case_rate (df, title='Maryland State Case Rate', size = 1): f, ax = plt.subplots(1,1, figsize=(4*size,2*size)) g = sns.lineplot(x="Date", y="100k7D", data=df, color='blue', label="State Case Rate") plt.xlabel('Date') plt.ylabel(' Cases per 100k ') plt.xticks(rotation=90) plt.title(f' {title} ') ax.grid(color='black', linestyle='dotted', linewidth=0.75) plt.show() cvd_case_rate_aggregate = CVD.groupby(['Date']).sum().reset_index() #print (cvd_case_rate_aggregate) plot_state_case_rate(cvd_case_rate_aggregate, 'State Case Rate', size=4) #================================================================================= # Case Rate past 60 days #================================================================================= def plot_state_case_rate60days (df, title='Maryland State Case Rate', size = 1): f, az = plt.subplots(1,1, figsize=(4*size,2*size)) g = sns.lineplot(x="Date", y="100k7D", data=df, color='blue', label="State Case Rate") plt.xlabel('Date') plt.ylabel(' Cases per 100k ') plt.xticks(rotation=90) plt.title(f' {title} ') az.grid(color='black', linestyle='dotted', linewidth=0.75) plt.show() cvd_case_rate_aggregate60days = CVDpast60days.groupby(['Date']).sum().reset_index() #print (cvd_case_rate_aggregate60days) plot_state_case_rate60days(cvd_case_rate_aggregate60days, 'State Case Rate', size=4)