conditionals based on data frame

mbrown009 · (This post was last modified: Aug-12-2022, 01:41 AM by mbrown009.)

I want to do an if/else statement when it comes to a value in the data frame via panadas. The issue is that when I do this the variables within are local only accessible from within the if statement itself.

the current code is

# !/usr/bin/env python
author = "Michael Brown"
license = "Based off of sript by Sreenivas Bhattiprolu of Python for Microscopists"

import pandas as pd
import datetime as dt
from matplotlib import pyplot as plt
from datetime import timedelta
import seaborn as sns


from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

# ===================================================================
# Example of Calcuation for Case Rate
# ===================================================================
# CVD['Daily100k'] = (CVD['DailyCases'] / statePopulation) * 100000
# CVD['100k7D'] = CVD['Daily100k'].rolling(window=7).mean()



CVD = pd.read_csv('https://opendata.maryland.gov/api/views/mgd3-qk8t/rows.csv?accessType=DOWNLOAD')

#Creating new data frame 
AACounty = pd.DataFrame()
AACounty['ReportDate'] = CVD['ReportDate']
AACounty['DataDate'] = CVD['ReportDate']

#converting Report Date to string
AACounty['ReportDate'] = AACounty['ReportDate'].astype('str')
AACounty['DataDate'] = AACounty['DataDate'].astype('str')


#getting the date portion of the string
AACounty['ReportDate'] = AACounty.ReportDate.str.slice(0,10)
AACounty['DataDate'] = AACounty.DataDate.str.slice(0,10)
AACounty['Year'] = AACounty.DataDate.str.slice(6,10)
#AACounty['Year'] = pd.to_numeric(AACounty['Year'],errors = 'coerce')
AACounty['Year'] = AACounty['Year'].astype(float, errors = 'raise')




#converting string to date format
AACounty['ReportDate'] = [dt.datetime.strptime(x, '%m/%d/%Y')
                for x in AACounty['ReportDate']]

AACounty['DataDate'] = [dt.datetime.strptime(x, '%m/%d/%Y')
                for x in AACounty['DataDate']]

#adding new column for data date vs reported date - data date is 1 day prior to report date
AACounty['DataDate'] = AACounty['DataDate'] - timedelta(days=1)  


AnneArundelPopulationDecennial2020 =588261
AnneArundelPOpulationACS2020 = 575421
AnneArundelPopulationACS2019 = 571275
AnneArundelPopulationPEP2019 = 579234
AnneArundelPopulationCounty = 579630


if AACounty['Year'] == "2020":
    AACountyPopulation = 5





#Anne Arundel
AACounty['AnneArundel'] = CVD['ANNE']
AACounty['AADailyCases'] = AACounty['AnneArundel'].diff()
AACounty['AA7Day'] = AACounty['AADailyCases'].rolling(window=7).mean()

AACounty['AADaily100k2020Census'] = (AACounty['AADailyCases'] / AnneArundelPopulationDecennial2020) * 100000
AACounty['AA100k7D-2020Census'] = AACounty['AADaily100k2020Census'].rolling(window=7).mean()

AACounty['AACounty100k2020ACS'] = (AACounty['AADailyCases'] / AnneArundelPOpulationACS2020) * 100000
AACounty['AA100k7D-2020ACS'] = AACounty['AACounty100k2020ACS'].rolling(window=7).mean()


AACounty['AADaily100k2019ACS'] = (AACounty['AADailyCases'] / AnneArundelPopulationACS2019) * 100000
AACounty['AA100k7D-2019ACS'] = AACounty['AADaily100k2019ACS'].rolling(window=7).mean()

AACounty['AADaily100k2019PEP'] = (AACounty['AADailyCases'] / AnneArundelPopulationPEP2019) * 100000
AACounty['AA100k7D-2019PEP'] = AACounty['AADaily100k2019PEP'].rolling(window=7).mean()

AACounty['AADaily100kCounty'] = (AACounty['AADailyCases'] / AnneArundelPopulationCounty) * 100000
AACounty['AA100k7D-County'] = AACounty['AADaily100kCounty'].rolling(window=7).mean()

# Saving Data to CSV
print(AACounty.dtypes)
AACounty.to_csv('AnneArundel.csv')

#=================================================================================
# 7-Day Running Average - Daily Cases
#=================================================================================

def plot_county_7DAvg(df, title='7-Day Case Count', size=1):
    #=================================================================================
    # 7-Day Running Average - Daily Cases - All Counties
    #=================================================================================
    f, ax = plt.subplots(1, 1, figsize=(4 * size, 2 * size))
    g = sns.lineplot(x="DataDate", y="AA7Day", data=df, color='red', label="Anne Arundel")
    plt.legend(loc='upper left')
    plt.xlabel('Date')
    plt.ylabel(' 7-Day Average ')
    plt.xticks(rotation=90)
    plt.title(f' {title} ')
    ax.grid(color='black', linestyle='dotted', linewidth=0.75)
    plt.savefig(f'{title}.png')
    plt.show()



cvd_case_rate_aggregate = AACounty.groupby(['DataDate']).sum().reset_index()
sevenDayAverage60day = cvd_case_rate_aggregate.iloc[-60:]
sevenDayAverage30day = cvd_case_rate_aggregate.iloc[-30:]
sevenDayAverage7day = cvd_case_rate_aggregate.iloc[-7:]
plot_county_7DAvg(cvd_case_rate_aggregate, 'County Aggregate 7-Day Running Average', size=4)
plot_county_7DAvg(sevenDayAverage60day, 'County Past 60 Days 7-Day Running Average', size=4)
plot_county_7DAvg(sevenDayAverage30day, 'County Past 30 Days 7-Day Running Average', size=4)
plot_county_7DAvg(sevenDayAverage7day, 'County Past 7 Days 7-Day Running Average', size=4)


def plot_aacounty_7D100kAvg(df, title='Aggregate Anne Arundel County Case Rate', size=1):

    #=================================================================================
    # 7-Day Running Average - Case Rate - Anne Arundel County
    #=================================================================================
    f, ax = plt.subplots(1, 1, figsize=(5 * size, 3 * size))
    g = sns.lineplot(x="DataDate", y="AA100k7D-2020Census", data=df, color='black', linewidth=3.0, label="Anne Arundel Decennial 2020")
    g = sns.lineplot(x="DataDate", y="AA100k7D-2020ACS", data=df, color = 'darkgreen', linewidth=2.0, label="Anne Arundel ACS 2020")
    g = sns.lineplot(x="DataDate", y="AA100k7D-2019PEP", data=df, color='darkblue', linewidth=3.0, label="Anne Arundel PEP 2019 Estimate")
    g = sns.lineplot(x="DataDate", y="AA100k7D-2019ACS", data=df, color='darkmagenta', linewidth=2.0, label="Anne Arundel ACS 2019 Estimate")
    g = sns.lineplot(x="DataDate", y="AA100k7D-County", data=df, color='red', linewidth=2.0, label="Anne Arundel Estimate")



    #g = sns.lineplot(x="ReportDate", y="AA100k7D", data=df, color='green', label="Anne Arundel")
    plt.rcParams["figure.figsize"] = (200,100)    
    plt.xlabel('Date')
    plt.ylabel('Cases per 100k')
    plt.xticks(rotation=90)
    plt.title(f' {title} ')
    ax.grid(color='black', linestyle='dotted', linewidth=0.75)
    plt.savefig(f'{title}.png')
    plt.show()

aasevenday100kcvd_case_rate_aggregate = AACounty.groupby(['DataDate']).sum().reset_index()
aasevenday100ksevenDayAverage60day = aasevenday100kcvd_case_rate_aggregate.iloc[-60:]
aasevenday100ksevenDayAverage30day = aasevenday100kcvd_case_rate_aggregate.iloc[-30:]
aasevenday100ksevenDayAverage7day = aasevenday100kcvd_case_rate_aggregate.iloc[-7:]
plot_aacounty_7D100kAvg(aasevenday100kcvd_case_rate_aggregate, 'County Case Rate Aggregate 7-Day Running Average', size=4)
plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage60day, 'County Case Rate  Past 60 Days 7-Day Running Average', size=4)
plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage30day, 'County Case Rate  Past 30 Days 7-Day Running Average', size=4)
plot_aacounty_7D100kAvg(aasevenday100ksevenDayAverage7day, 'County Case Rate  Past 7 Days 7-Day Running Average', size=4)

I am getting the following error:

Error:Traceback (most recent call last):
  at block 1, line 63
  at /opt/python/envs/default/lib/python3.8/site-packages/pandas/core/generic.pyline 1537, in __nonzero__(self)
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

It may be that my eyes are tired but I am out of opinions. Any help would be great

What I am trying to do is to say that

if df['field'] is equal to 2019 then set AACPop = 555555

Then I would want to use that AACPop to then calculate the other items you see.

Any other suggestions would be helpful

**Larz60+** · Aug-12-2022, 08:18 AM

on line 40 AACounty['Year'] = AACounty['Year'].astype(float, errors = 'raise')
you convert AACounty['Year'] to a float

then on line 63 if AACounty['Year'] == "2020":
you try to compare to a string

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	how do you style data frame that has empty rows.	gsaray101	0	522	Sep-08-2023, 05:20 PM Last Post: gsaray101
	Doubt about conditionals in Python.	Carmazum	6	1,592	Apr-01-2023, 12:01 AM Last Post: Carmazum
	Grouping Data based on 30% bracket	purnima1	4	1,188	Mar-10-2023, 07:38 PM Last Post: deanhystad
	googletrans library to translate text language for using data frame is not running	gcozba2023	0	1,218	Mar-06-2023, 09:50 AM Last Post: gcozba2023
	Load multiple Jason data in one Data Frame	vijays3	6	1,535	Aug-12-2022, 05:17 PM Last Post: vijays3
	I have written a program that outputs data based on GPS signal	kalle	1	1,161	Jul-22-2022, 12:10 AM Last Post: mcmxl22
	Change elements of array based on position of input data	Cola_Reb	6	2,110	May-13-2022, 12:57 PM Last Post: Cola_Reb
	Efficiency with regard to nested conditionals or and statements	Mark17	13	3,154	May-06-2022, 05:16 PM Last Post: Mark17
	Merging two Data Frame on a special case	piku9290dgp	0	1,081	Mar-02-2022, 10:43 AM Last Post: piku9290dgp
	Nested conditionals vs conditionals connected by operators	dboxall123	8	3,040	Feb-18-2022, 09:34 PM Last Post: dboxall123

conditionals based on data frame

User Panel Messages

Announcements