Pandas DataFrame Code Query - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: Pandas DataFrame Code Query (/thread-32320.html) |
Pandas DataFrame Code Query - eddywinch82 - Feb-05-2021 Hi there, I have the following Python Code in Pandas, which I run in Jupyter Notebook :- import pandas as pd import requests from bs4 import BeautifulSoup import xlrd pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) res1 = requests.get("http://web.archive.org/web/20180814155132/http://www.air-shows.org.uk/2018/05/airshow-news-battle-of-britain-memorial-flight-display-dates-2018/", verify=False) soup1 = BeautifulSoup(res1.content,'lxml') table1 = soup1.find_all('div',{'class' : 'post-content entry-content'}) lis1,lis2=[],[] for table in table1: lis1 = table.find_all('ul') lis2 = table.find_all('h3') new_lis1=[] for l in lis1: temp=l.find_all('li') temp_x=[] for t in temp: temp_x.append(t.text) new_lis1.append(temp_x) new_lis2=[] for l in lis2: new_lis2.append(l.text) all_new_list = [] for lis in new_lis1: temp_x=[] for item in lis: item=item.replace('\xa0'," ") item=item.replace('\xa0'," ") item=item.replace('Flypast – ',"") item=item.replace('Flypasts – ',"") item=item.replace('Lancaster – ',"") temp_x.append(item.split(" – ")) all_new_list.append(temp_x) temp_df = pd.DataFrame(columns=['LOCATION','DATE','MONTH','LANCASTER','SPITFIRE','HURRICANE','DAKOTA']) j=0 date,loc,month=[],[],[] for i in range(len(new_lis2)): for item in all_new_list[i]: loc.append(item[1]) date.append(item[0]) month.append(new_lis2[i]) temp_df['LOCATION']=loc temp_df['DATE'] = date temp_df['MONTH'] = month for i,row in temp_df.iterrows(): loc = row[0] loc = loc.replace("Spitfire x 2",'S x 2') loc = loc.replace("Spifire x 2",'S x 2') loc = loc.replace("Spitfire x 4",'S x 4') loc = loc.replace("Hurricane x 2",'H x 2') temp_df.iat[i,0] = loc for i,row in temp_df.iterrows(): text = row[0] date = row[1] month = row[2] if 'Lancaster' in text: temp_df.iat[i,3]='L' else: temp_df.iat[i,3]=None if 'Spitfire' in text: temp_df.iat[i,4]='S' elif 'S x 2' in text: temp_df.iat[i,4]='SS' elif 'S x 4' in text: temp_df.iat[i,4]='SSSS' else: temp_df.iat[i,4]=None if 'Hurricane' in text: temp_df.iat[i,5]='H' elif 'H x 2' in text: temp_df.iat[i,5]='HH' else: temp_df.iat[i,5]=None if 'Dakota' in text: temp_df.iat[i,6]='D' else: temp_df.iat[i,6]=None if '16' in date and 'Jun' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H' #elif '17' in date and 'Jun' in month: #temp_df.iat[i,3]='--' #temp_df.iat[i,4]='S' #temp_df.iat[i,5]='H' if '13' and '15' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SS' temp_df.iat[i,5]='--' temp_df.iat[i,6]='D' #elif '14' in date and 'Jul' in month: #temp_df.iat[i,3]='L' #temp_df.iat[i,4]='SSS' #temp_df.iat[i,5]='HH' #temp_df.iat[i,6]='D' if '13' in date and 'Sep' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H' for i,row in temp_df.iterrows(): text = row[1] text=text.replace('th','') text=text.replace('st','') text=text.replace('nd','') text=text.replace('rd','') temp_df.iat[i,1] = text for i,row in temp_df.iterrows(): date = row[1] month = row[2] location = row[0] if '-' in date: if month == 'May' and 'June' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'May' and 'June' in date: date=date.replace(" June","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'June',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' not in row[0]: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' in row[0]: date_list = date.split("-") dic = {"LOCATION":row[0],"DATE":str(date_list[0]),"MONTH":row[2],"LANCASTER":'L',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'} temp_df=temp_df.append(dic, ignore_index = True) dic = {"LOCATION":row[0],"DATE":str(date_list[1]),"MONTH":row[2],"LANCASTER":'--',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' in date: date=date.replace(" July","") date_list = date.split("-") for j in range(int(date_list[0]),31): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'July',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'July' and 'August' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'July' and 'August' in date: date=date.replace(" August","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'August',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'August' and 'September' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'August' and 'September' in date: date=date.replace(" September","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'September',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'September' and 'October' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'September' and 'October' in date: date=date.replace(" October","") date_list = date.split("-") for j in range(int(date_list[0]),31): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'October',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) temp_df.reset_index(drop=True, inplace=True) for i,row in temp_df.iterrows(): date = row[1] date=date.replace(' (30 Reserve)',"") temp_df.iat[i,1]=date for i,row in temp_df.iterrows(): month = row[2] day = row[1] if month == 'May': date = '2018-'+str(5)+"-"+str(day) elif month == 'June': date = '2018-'+str(6)+"-"+str(day) elif month == 'July': date = '2018-'+str(7)+"-"+str(day) elif month == 'August': date = '2018-'+str(8)+"-"+str(day) elif month == 'September': date = '2018-'+str(9)+"-"+str(day) temp_df.iat[i,1] = date temp_df = temp_df.drop('MONTH',axis=1) for i,row in temp_df.iterrows(): loc = row[0] if 'All available BBMF Aircraft' in loc: temp_df.iat[i,2] = 'L' temp_df.iat[i,3] = 'SSSSS' temp_df.iat[i,4] = 'HH' temp_df.iat[i,5] = 'D' for i,row in temp_df.iterrows(): loc = row[0] loc = loc.replace("Lancaster","") loc = loc.replace("Spitfire","") loc = loc.replace("Hurricane","") loc = loc.replace("Dakota","") loc = loc.replace("S x 2","") loc = loc.replace("S x 4","") loc = loc.replace("H x 2","") loc = loc.replace(" (, , , '","") loc = loc.replace(" (, , , )","") loc = loc.replace(" ()","") loc = loc.replace(" (, , )","") loc = loc.replace(" ()","") loc = loc.replace("(, , , '","") loc = loc.replace(" (, , , '","") loc = loc.replace(" (, )","") loc = loc.replace(" (SAT ONLY, , )","") loc = loc.replace(" (, , , ","") loc = loc.replace(" ()","") loc = loc.replace("All available BBMF Aircraft","") temp_df.iat[i,0]=loc new_df = temp_df #temp_df.head() new_df.loc[36,'LANCASTER']='--' display = new_df[(new_df['LOCATION'].str.contains('[a-zA-Z]')) & (new_df['LANCASTER'].str.contains('L')) & (new_df['SPITFIRE'].str.contains('S', na=True)) & (new_df['DAKOTA'].str.contains('D', na=True))] #display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6-|-8-')) & (df3['LANCASTER'].str.contains('L')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['DAKOTA'].str.contains('D', na=True))] #Use the above Line of Code when filtering DataFrame by Month #Months = May Jun Jul Aug Sep #('-6-')) For Multiple Months use ('-6-|-8-')) #Months = -5- -6- -7- -8- -9- #display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6$|-8$')) & (df3['LANCASTER'].str.contains('L')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['DAKOTA'].str.contains('D', na=True))] #df3['DATE'].str.contains('-6$')) or ('-6$|-8$')) For more than one Day. Use minus sign in front of the number when filtering the DataFrame by Days of Month. #('-6$')) #('-6$|-8$')) #print(display) #display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d') #display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y') #display=display.sort_values(by=['DATE']) ##added two lines above to convert date format #display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y') display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d') #display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y') #display.drop([36], inplace=True) #display.rename(index={16: '314', 17: '315'}, inplace=True) display=display.rename_axis('MyIdx') display=display.sort_values(by=['DATE','MyIdx']) #display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y') display.drop_duplicates(subset=['LOCATION', 'DATE'], keep='last', inplace=True) #display.drop('DAKOTA', axis=1, inplace=True) #display.drop('Date', axis=1, inplace=True) #display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all') display=display[['LOCATION','DATE','LANCASTER','HURRICANE','SPITFIRE','DAKOTA']] display=display.fillna('--') df = (display[display['LOCATION'] == 'Hastings, East Sussex']) display = pd.concat([display.iloc[:25], df, display.iloc[25:]], sort=False, ignore_index=True) display['DATE'] = pd.to_datetime(display['DATE'], format='%d-%b-%Y').astype(str) display.loc[25,'DATE'] = display.loc[25,'DATE'].replace('22', '21') display = display.sort_values(by=['DATE','LOCATION']) display.reset_index(drop=True, inplace=True) display.loc[20] = ['Royal International Air Tattoo, RAF Fairford, ...','14-Jul-2018','L','HH','SSS','D'] #display.loc[[8,10],'LOCATION']='Flywheel Festival, Bicester, Oxfordshire' display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y') display.to_csv("BBMF DISPLAY 2018.csv") displayAnd I get the DataFrame output I want, when I use the following parts of this Code :- new_df.loc[36,'LANCASTER']='--'And :- display.loc[20] = ['Royal International Air Tattoo, RAF Fairford, ...','14-Jul-2018','L','HH','SSS','D']The Row at index position 7 i.e. Row number 8 shows correctly, and the Row at display.loc[20] shows as I want it to. However if I hash those two lines of Code out, and use the following two lines of Code instead, I don't get the correct DataFrame output :- elif '17' in date and 'Jun' in month: temp_df.iat[i,3]='--' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H'And :- elif '14' in date and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SSS' temp_df.iat[i,5]='HH' temp_df.iat[i,6]='D'Or :- elif '14' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SSS' temp_df.iat[i,5]='HH' temp_df.iat[i,6]='D'In the First line of Code, the Haven Great Yarmouth Row for the 17th Jun, I wouldn't expect to show, and in the second line of Code, no changes happen, to the row in question in the DataFrame Output, when I use that line of Code, and I am not sure why ? Yet when I use these two if Lines of Code, the corresponding Rows display correctly in the DataFrame Output :- i.e. if use :- if '13' and '15' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SS' temp_df.iat[i,5]='--' temp_df.iat[i,6]='D'The Rows where 'Royal' is in the Location and the correct corresponding Dates, the Rows are output correctly, i.e. those changes are made to the correct Rows. And if I use :- if '13' in date and 'Sep' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H'The Rows where Jersey and Guernsey are the Location, show correctly in the DataFrame output. Why do those work and others don't ? I think when I use :- new_df.loc[36,'LANCASTER']='--'that works i.e. the 17th June Row doesn't show, I think because that is a direct change to the DataFrame, and the filtering of Rows means no Lancaster is considered, so that Row doesn't show ? Could anyone suggest where I have gone wrong, and suggest changes I could make to my Code, i.e. with the elif lines of Code in question ? Does an 'elif' statement, always require a 'else:' statement ending in Python ? And if so, what should I type ? Any help would be much appreciated Best Regards Eddie Winch RE: Pandas DataFrame Code Query - eddywinch82 - Feb-05-2021 Can anyone help me ? Eddie Winch RE: Pandas DataFrame Code Query - nilamo - Feb-05-2021 Quote:if '13' and '15' in date and 'Royal' in text and 'Jul' in month: This will always be True. Any elifs after this line will never match because of it. You probably wanted if '13' in date and '15' in date and 'Royal' in text and 'Jul' in month:
RE: Pandas DataFrame Code Query - eddywinch82 - Feb-08-2021 Hi nilamo, Many thanks for your very helpful reply, I made the change, to the line of Code you suggested I should alter, but the same DataFrame Output occurs, when I run the Code again. Any ideas of any other changes I should make ? Eddie Winch RE: Pandas DataFrame Code Query - eddywinch82 - Feb-11-2021 Can you help me again nilamo ? Or anyone else ? Any additional help, would be much appreciated. Regards Eddie Winch RE: Pandas DataFrame Code Query - nilamo - Feb-11-2021 Could you trim it down so it's easier to see the issue? 350 lines is a lot to look through. RE: Pandas DataFrame Code Query - eddywinch82 - Feb-12-2021 Hi nilamo, Here I have posted the Lines of Code, where I think the issue arises :- for i,row in temp_df.iterrows(): text = row[0] date = row[1] month = row[2] if 'Lancaster' in text: temp_df.iat[i,3]='L' else: temp_df.iat[i,3]=None if 'Spitfire' in text: temp_df.iat[i,4]='S' elif 'S x 2' in text: temp_df.iat[i,4]='SS' elif 'S x 4' in text: temp_df.iat[i,4]='SSSS' else: temp_df.iat[i,4]=None if 'Hurricane' in text: temp_df.iat[i,5]='H' elif 'H x 2' in text: temp_df.iat[i,5]='HH' else: temp_df.iat[i,5]=None if 'Dakota' in text: temp_df.iat[i,6]='D' else: temp_df.iat[i,6]=None if '16' in date and 'Jun' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H' elif '17' in date and 'Jun' in month: temp_df.iat[i,3]='--' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H' if '13' in date and '15' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SS' temp_df.iat[i,5]='--' temp_df.iat[i,6]='D' elif '14' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SSS' temp_df.iat[i,5]='HH' temp_df.iat[i,6]='D' if '13' in date and 'Sep' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='S' temp_df.iat[i,5]='H' for i,row in temp_df.iterrows(): text = row[1] text=text.replace('th','') text=text.replace('st','') text=text.replace('nd','') text=text.replace('rd','') temp_df.iat[i,1] = text for i,row in temp_df.iterrows(): date = row[1] month = row[2] location = row[0] if '-' in date: if month == 'May' and 'June' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'May' and 'June' in date: date=date.replace(" June","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'June',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' not in row[0]: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' not in date and 'Lancaster - SAT ONLY' in row[0]: date_list = date.split("-") dic = {"LOCATION":row[0],"DATE":str(date_list[0]),"MONTH":row[2],"LANCASTER":'L',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'} temp_df=temp_df.append(dic, ignore_index = True) dic = {"LOCATION":row[0],"DATE":str(date_list[1]),"MONTH":row[2],"LANCASTER":'--',"SPITFIRE":'S','HURRICANE':'H','DAKOTA':'--'} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'June' and 'July' in date: date=date.replace(" July","") date_list = date.split("-") for j in range(int(date_list[0]),31): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'July',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'July' and 'August' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'July' and 'August' in date: date=date.replace(" August","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'August',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'August' and 'September' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'August' and 'September' in date: date=date.replace(" September","") date_list = date.split("-") for j in range(int(date_list[0]),32): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'September',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'September' and 'October' not in date: date_list = date.split("-") for j in range(int(date_list[0]),int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) if month == 'September' and 'October' in date: date=date.replace(" October","") date_list = date.split("-") for j in range(int(date_list[0]),31): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":row[2],"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) for j in range(1,int(date_list[1])+1): dic = {"LOCATION":row[0],"DATE":str(j),"MONTH":'October',"LANCASTER":row[3],"SPITFIRE":row[4],'HURRICANE':row[5],'DAKOTA':row[6]} temp_df=temp_df.append(dic, ignore_index = True) temp_df=temp_df.drop(temp_df[ temp_df['DATE'] == row[1] ].index) temp_df.reset_index(drop=True, inplace=True) for i,row in temp_df.iterrows(): date = row[1] date=date.replace(' (30 Reserve)',"") temp_df.iat[i,1]=date for i,row in temp_df.iterrows(): month = row[2] day = row[1] if month == 'May': date = '2018-'+str(5)+"-"+str(day) elif month == 'June': date = '2018-'+str(6)+"-"+str(day) elif month == 'July': date = '2018-'+str(7)+"-"+str(day) elif month == 'August': date = '2018-'+str(8)+"-"+str(day) elif month == 'September': date = '2018-'+str(9)+"-"+str(day) temp_df.iat[i,1] = date temp_df = temp_df.drop('MONTH',axis=1) for i,row in temp_df.iterrows(): loc = row[0] if 'All available BBMF Aircraft' in loc: temp_df.iat[i,2] = 'L' temp_df.iat[i,3] = 'SSSSS' temp_df.iat[i,4] = 'HH' temp_df.iat[i,5] = 'D'Interestingly, with the following two lines of Code :- if '13' in date and '15' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SS' temp_df.iat[i,5]='--' temp_df.iat[i,6]='D' elif '14' in date and 'Royal' in text and 'Jul' in month: temp_df.iat[i,3]='L' temp_df.iat[i,4]='SSS' temp_df.iat[i,5]='HH' temp_df.iat[i,6]='D'For the 14th July Date, the same Aircraft are assigned, as for the 13th and 15th Date 'Royal' Rows, in the DataFrame output ? Regards Eddie Winch |