Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Pandas DataFrame Code Query
#1
Hi there,

I have the following Python Code in Pandas, which I run in Jupyter Notebook :-

import pandas as pd
import requests
from bs4 import BeautifulSoup
import xlrd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

res1 = requests.get("http://web.archive.org/web/20180814155132/http://www.air-shows.org.uk/2018/05/airshow-news-battle-of-britain-memorial-flight-display-dates-2018/", verify=False)     
soup1 = BeautifulSoup(res1.content,'lxml')

table1 = soup1.find_all('div',{'class' : 'post-content entry-content'})
lis1,lis2=[],[]
for table in table1:
    lis1 = table.find_all('ul')
    lis2 = table.find_all('h3')
    
new_lis1=[]
for l in lis1:
    temp=l.find_all('li')
    temp_x=[]
    for t in temp:
        temp_x.append(t.text)
    new_lis1.append(temp_x)
    
    new_lis2=[]
for l in lis2:
    new_lis2.append(l.text)
    
all_new_list = []
for lis in new_lis1:
    temp_x=[]
    for item in lis:
        item=item.replace('\xa0'," ")
        item=item.replace('\xa0'," ")
        item=item.replace('Flypast – ',"")
        item=item.replace('Flypasts – ',"")
        item=item.replace('Lancaster – ',"")
        temp_x.append(item.split(" – "))
    all_new_list.append(temp_x)
    
temp_df = pd.DataFrame(columns=['LOCATION','DATE','MONTH','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])

j=0
date,loc,month=[],[],[]
for i in range(len(new_lis2)):
    for item in all_new_list[i]:
        loc.append(item[1])
        date.append(item[0])
        month.append(new_lis2[i])
        
temp_df['LOCATION']=loc
temp_df['DATE'] = date
temp_df['MONTH'] = month

for i,row in temp_df.iterrows():
    loc = row[0]
    loc = loc.replace("Spitfire x 2",'S x 2')
    loc = loc.replace("Spifire x 2",'S x 2')
    loc = loc.replace("Spitfire x 4",'S x 4')
    loc = loc.replace("Hurricane x 2",'H x 2')
    temp_df.iat[i,0] = loc
    

for i,row in temp_df.iterrows():
    text = row[0]
    date = row[1]
    month = row[2]
    if 'Lancaster' in text:
        temp_df.iat[i,3]='L'
    else:
        temp_df.iat[i,3]=None
    if 'Spitfire' in text:
        temp_df.iat[i,4]='S'

    elif 'S x 2' in text:
        temp_df.iat[i,4]='SS'


    elif 'S x 4' in text:
        temp_df.iat[i,4]='SSSS'
    else:
        temp_df.iat[i,4]=None
    if 'Hurricane' in text:
        temp_df.iat[i,5]='H'

    elif 'H x 2' in text:
        temp_df.iat[i,5]='HH'
    else:
        temp_df.iat[i,5]=None
    if 'Dakota' in text:
        temp_df.iat[i,6]='D'
    else:
        temp_df.iat[i,6]=None

    if '16' in date and 'Jun' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='S'
        temp_df.iat[i,5]='H'
        
    #elif '17' in date and 'Jun' in month:
        #temp_df.iat[i,3]='--'
        #temp_df.iat[i,4]='S'
        #temp_df.iat[i,5]='H'
    
    if '13' and '15' in date and 'Royal' in text and 'Jul' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='SS'
        temp_df.iat[i,5]='--'
        temp_df.iat[i,6]='D'
        
    #elif '14' in date and 'Jul' in month:
        #temp_df.iat[i,3]='L'
        #temp_df.iat[i,4]='SSS'
        #temp_df.iat[i,5]='HH'
        #temp_df.iat[i,6]='D'

    if '13' in date and 'Sep' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='S'
        temp_df.iat[i,5]='H'
        
for i,row in temp_df.iterrows():
    text = row[1]
    text=text.replace('th','')
    text=text.replace('st','')
    text=text.replace('nd','')
    text=text.replace('rd','')
    temp_df.iat[i,1] = text
    

for i,row in temp_df.iterrows():
    date = row[1]
    month = row[2]
    location = row[0]
    if '-' in date:
        if month == 'May' and 'June' not in date:
            date_list = date.split("-")
            for j in range(int(date_list[0]),int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'May' and 'June' in date:
            date=date.replace(" June","")
            date_list = date.split("-")
            for j in range(int(date_list[0]),32):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            for j in range(1,int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'June',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        
        if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' not in row[0]:
            date_list = date.split("-")
            for j in range(int(date_list[0]),int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' in row[0]:
            date_list = date.split("-")
            dic = {"LOCATION":row[0],"DATE":str(date_list[0]),"MONTH":row[2],"LANCASTER":'L',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'}
            temp_df=temp_df.append(dic, ignore_index = True)
            dic = {"LOCATION":row[0],"DATE":str(date_list[1]),"MONTH":row[2],"LANCASTER":'--',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'}
            temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'June' and 'July' in date:
            date=date.replace(" July","")
            date_list = date.split("-")
            for j in range(int(date_list[0]),31):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            for j in range(1,int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'July',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)

        if month == 'July' and 'August' not in date:
            date_list = date.split("-")
            for j in range(int(date_list[0]),int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'July' and 'August' in date:
            date=date.replace(" August","")
            date_list = date.split("-")
            for j in range(int(date_list[0]),32):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            for j in range(1,int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'August',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
            
            
        if month == 'August' and 'September' not in date:
            date_list = date.split("-")
            for j in range(int(date_list[0]),int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'August' and 'September' in date:
            date=date.replace(" September","")
            date_list = date.split("-")
            for j in range(int(date_list[0]),32):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            for j in range(1,int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'September',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)

        if month == 'September' and 'October' not in date:
            date_list = date.split("-")
            for j in range(int(date_list[0]),int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
        if month == 'September' and 'October' in date:
            date=date.replace(" October","")
            date_list = date.split("-")
            for j in range(int(date_list[0]),31):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            for j in range(1,int(date_list[1])+1):
                dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'October',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]}
                temp_df=temp_df.append(dic, ignore_index = True)
            temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index)
            

temp_df.reset_index(drop=True, inplace=True)

for i,row in temp_df.iterrows():
    date = row[1]
    date=date.replace(' (30 Reserve)',"")
    temp_df.iat[i,1]=date
    

for i,row in temp_df.iterrows():
    month = row[2]
    day = row[1]
    if month == 'May':
        date = '2018-'+str(5)+"-"+str(day)
    elif month == 'June':
        date = '2018-'+str(6)+"-"+str(day)
    elif month == 'July':
        date = '2018-'+str(7)+"-"+str(day)
        
    elif month == 'August':
        date = '2018-'+str(8)+"-"+str(day)
    elif month == 'September':
        date = '2018-'+str(9)+"-"+str(day)
    temp_df.iat[i,1] = date
temp_df = temp_df.drop('MONTH',axis=1)

for i,row in temp_df.iterrows():
    loc = row[0]
    if 'All available BBMF Aircraft' in loc:
        temp_df.iat[i,2] = 'L'
        temp_df.iat[i,3] = 'SSSSS'
        temp_df.iat[i,4] = 'HH'
        temp_df.iat[i,5] = 'D'
        

for i,row in temp_df.iterrows():
    loc = row[0]
    loc = loc.replace("Lancaster","")
    loc = loc.replace("Spitfire","")
    loc = loc.replace("Hurricane","")
    loc = loc.replace("Dakota","")
    loc = loc.replace("S x 2","")
    loc = loc.replace("S x 4","")
    loc = loc.replace("H x 2","")
    loc = loc.replace(" (, , , '","")
    loc = loc.replace(" (, , , )","")
    loc = loc.replace("  ()","")
    loc = loc.replace(" (, , )","")
    loc = loc.replace(" ()","")
    loc = loc.replace("(, , , '","")
    loc = loc.replace(" (, , , '","")
    loc = loc.replace(" (, )","")
    loc = loc.replace(" (SAT ONLY, , )","")
    
    loc = loc.replace(" (, , , ","")
    loc = loc.replace(" ()","")
    loc = loc.replace("All available BBMF Aircraft","")
    temp_df.iat[i,0]=loc
    
new_df = temp_df  
#temp_df.head()

new_df.loc[36,'LANCASTER']='--'

display = new_df[(new_df['LOCATION'].str.contains('[a-zA-Z]')) & (new_df['LANCASTER'].str.contains('L')) & (new_df['SPITFIRE'].str.contains('S', na=True)) & (new_df['DAKOTA'].str.contains('D', na=True))]    

#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6-|-8-')) & (df3['LANCASTER'].str.contains('L')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['DAKOTA'].str.contains('D', na=True))]    

#Use the above Line of Code when filtering DataFrame by Month
                                   
#Months = May Jun Jul Aug Sep                #('-6-')) For Multiple Months use ('-6-|-8-'))
#Months = -5- -6- -7- -8- -9-
                                  
#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6$|-8$')) & (df3['LANCASTER'].str.contains('L')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['DAKOTA'].str.contains('D', na=True))]    

#df3['DATE'].str.contains('-6$')) or ('-6$|-8$')) For more than one Day. Use minus sign in front of the number when filtering the DataFrame by Days of Month.

                          #('-6$'))                 #('-6$|-8$'))

#print(display)
   
#display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')
#display=display.sort_values(by=['DATE'])
##added two lines above to convert date format
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')    

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')

#display.drop([36], inplace=True) 
#display.rename(index={16: '314', 17: '315'}, inplace=True)
display=display.rename_axis('MyIdx')
display=display.sort_values(by=['DATE','MyIdx'])
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')
display.drop_duplicates(subset=['LOCATION', 'DATE'], keep='last', inplace=True)

#display.drop('DAKOTA', axis=1, inplace=True)
#display.drop('Date', axis=1, inplace=True)
#display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')
display=display[['LOCATION','DATE','LANCASTER','HURRICANE','SPITFIRE','DAKOTA']]
display=display.fillna('--')

df = (display[display['LOCATION'] == 'Hastings, East Sussex'])
display = pd.concat([display.iloc[:25], df, display.iloc[25:]], sort=False, ignore_index=True)

display['DATE'] = pd.to_datetime(display['DATE'], format='%d-%b-%Y').astype(str)
display.loc[25,'DATE'] = display.loc[25,'DATE'].replace('22', '21')

display = display.sort_values(by=['DATE','LOCATION'])
display.reset_index(drop=True, inplace=True)

display.loc[20] = ['Royal International Air Tattoo, RAF Fairford, ...','14-Jul-2018','L','HH','SSS','D']

#display.loc[[8,10],'LOCATION']='Flywheel Festival, Bicester, Oxfordshire'

display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')
display.to_csv("BBMF DISPLAY 2018.csv")
display
And I get the DataFrame output I want,

when I use the following parts of this Code :-

new_df.loc[36,'LANCASTER']='--'
And :-

display.loc[20] = ['Royal International Air Tattoo, RAF Fairford, ...','14-Jul-2018','L','HH','SSS','D']
The Row at index position 7 i.e. Row number 8 shows correctly, and the Row at display.loc[20] shows as I want it to.

However if I hash those two lines of Code out, and use the following two lines of Code instead, I don't get the correct DataFrame output :-

elif '17' in date and 'Jun' in month:
        temp_df.iat[i,3]='--'
        temp_df.iat[i,4]='S'
        temp_df.iat[i,5]='H'
And :-

elif '14' in date and 'Jul' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='SSS'
        temp_df.iat[i,5]='HH'
        temp_df.iat[i,6]='D'
Or :-

elif '14' in date and 'Royal' in text and 'Jul' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='SSS'
        temp_df.iat[i,5]='HH'
        temp_df.iat[i,6]='D'
In the First line of Code, the Haven Great Yarmouth Row for the 17th Jun, I wouldn't expect to show, and in the second line of Code, no changes happen, to the row in question in the DataFrame Output, when I use that line of Code, and I am not sure why ?

Yet when I use these two if Lines of Code, the corresponding Rows display correctly in the DataFrame Output :-

i.e. if use :-

if '13' and '15' in date and 'Royal' in text and 'Jul' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='SS'
        temp_df.iat[i,5]='--'
        temp_df.iat[i,6]='D'
The Rows where 'Royal' is in the Location and the correct corresponding Dates, the Rows are output correctly, i.e. those changes are made to the correct Rows.

And if I use :-

if '13' in date and 'Sep' in month:
        temp_df.iat[i,3]='L'
        temp_df.iat[i,4]='S'
        temp_df.iat[i,5]='H'
The Rows where Jersey and Guernsey are the Location, show correctly in the DataFrame output. Why do
those work and others don't ?

I think when I use :-

new_df.loc[36,'LANCASTER']='--'
that works i.e. the 17th June Row doesn't show, I think because that is a direct change to the DataFrame, and the filtering of Rows means no Lancaster is considered, so that Row doesn't show ?

Could anyone suggest where I have gone wrong, and suggest changes I could make to my Code, i.e. with the elif lines of Code in question ?

Does an 'elif' statement, always require a 'else:' statement ending in Python ? And if so, what should I type ?

Any help would be much appreciated

Best Regards

Eddie Winch Smile
Reply


Messages In This Thread
Pandas DataFrame Code Query - by eddywinch82 - Feb-05-2021, 03:04 PM
RE: Pandas DataFrame Code Query - by eddywinch82 - Feb-05-2021, 05:07 PM
RE: Pandas DataFrame Code Query - by nilamo - Feb-05-2021, 08:25 PM
RE: Pandas DataFrame Code Query - by eddywinch82 - Feb-08-2021, 06:54 PM
RE: Pandas DataFrame Code Query - by eddywinch82 - Feb-11-2021, 07:12 PM
RE: Pandas DataFrame Code Query - by nilamo - Feb-11-2021, 09:15 PM
RE: Pandas DataFrame Code Query - by eddywinch82 - Feb-12-2021, 09:55 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Python Alteryx QS-Passing pandas dataframe column inside SQL query where condition sanky1990 0 728 Dec-04-2023, 09:48 PM
Last Post: sanky1990
  Question on pandas.dataframe merging two colums shomikc 4 817 Jun-29-2023, 11:30 AM
Last Post: snippsat
  Pandas AttributeError: 'DataFrame' object has no attribute 'concat' Sameer33 5 5,587 Feb-17-2023, 06:01 PM
Last Post: Sameer33
  help how to get size of pandas dataframe into MB\GB mg24 1 2,344 Jan-28-2023, 01:23 PM
Last Post: snippsat
  pandas dataframe into csv .... exponent issue mg24 10 1,764 Jan-20-2023, 08:15 PM
Last Post: deanhystad
  How to assign a value to pandas dataframe column rows based on a condition klllmmm 0 828 Sep-08-2022, 06:32 AM
Last Post: klllmmm
  How to retrieve records in a DataFrame (Python/Pandas) that contains leading or trail mmunozjr 3 1,745 Sep-05-2022, 11:56 AM
Last Post: Pedroski55
  "Vlookup" in pandas dataframe doug2019 3 1,855 May-09-2022, 01:35 PM
Last Post: snippsat
  Regex Expression With Code Query In Pandas eddywinch82 8 2,332 Apr-13-2022, 09:12 AM
Last Post: snippsat
  Increase the speed of a python loop over a pandas dataframe mcva 0 1,313 Jan-21-2022, 06:24 PM
Last Post: mcva

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020