Jan-25-2020, 04:22 PM
(This post was last modified: Jan-25-2020, 04:24 PM by eddywinch82.)
Hi there,
This is the latest Python Code I have, which I run in Jupyter Notebook :-
Any help would be appreciated
Regards
Eddie Winch
This is the latest Python Code I have, which I run in Jupyter Notebook :-
import pandas as pd import requests from bs4 import BeautifulSoup res = requests.get("http://web.archive.org/web/20041020000138/http://www.raf.mod.uk/bbmf/displaydates.html") soup = BeautifulSoup(res.content,'lxml') table = soup.find_all('table', align="CENTER")[0] df = pd.read_html(str(table)) df = df[0] ################## ################## ################## pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) #make df[0] to list list=[] for i in df[0]: list.append(i) #reverse the list to make split to sublist easier list.reverse() #split list to sublist using condition len(val)> 2 size = len(list) idx_list = [idx + 1 for idx, val in enumerate(list) if len(val) > 2] res = [list[i: j] for i, j in zip([0] + idx_list, idx_list + ([size] if idx_list[-1] != size else []))] #make monthname to numbers and print for i in res: for j in range(len(i)): if i[j].upper()=='JUNE': i[j]='6' elif i[j].upper() =='MAY': i[j]='5' elif i[j].upper() == 'APRIL': i[j]='4' elif i[j].upper() =='JANUARY': i[j]='1' elif i[j].upper() == 'FEBRUARY': i[j]='2' elif i[j].upper() =='MARCH': i[j]='3' elif i[j].upper() == 'JULY': i[j]='7' elif i[j].upper() =='AUGUST': i[j]='8' elif i[j].upper() == 'SEPTEMBER': i[j]='9' elif i[j].upper() =='OCTOBER': i[j]='10' elif i[j].upper() == 'NOVEMBER': i[j]='11' elif i[j].upper() =='DECEMBER': i[j]='12' #append string and append to new list finallist=[] for i in res: for j in range(len(i)): if j < len(i) - 1: #print(f'2004-{i[-1]}-{i[j]}') finallist.append(f'2004-{i[-1]}-{i[j]}') #print(finallist) finallist.reverse() #print("\n=== ORIGINAL DF ===\n") #print(df) #convert dataframe to list listtemp1=df.values.tolist() #replace found below values with 0000_removable removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL'] for i in listtemp1: for j in range(len(i)): for place in removelist: if str(i[j]).upper()==place: i[j]='0000_removable' else: pass #remove sublists with the replaced values we redirected dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable'] res = [i for i in listtemp1 if i != dellist] #assign back to dataframe DF3 df3=pd.DataFrame() df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA']) #print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n") #print(df3) #now assign that sorted date list to dataframe DF3 idx = 0 #df3.insert(loc=idx, column='EDITED_DATE_FORMAT', value=finallist) df3.insert(loc=idx, column='DATE', value=finallist) pd.options.display.max_rows = 500 #print("\n=== FINAL DF3 after joining the edited date format column list ===\n") #print(df3) #validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column #df3['ED1']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day #df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1) #convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt #df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d') ################## ################## ################## #df3 = df3.rename(columns=df.iloc[0]) #df3 = df.iloc[2:] #df3.head(15) pd.options.display.max_rows = 1000 display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['LANCASTER'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['DAKOTA'].str.contains('X', na=True))] #print(display) #display.drop('DAKOTA', axis=1, inplace=True) display.drop('Date', axis=1, inplace=True) display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all') #print(display)It does what I want it to do, but I would like, the Dates shown in the DATE Column, to be in the Format Day-Month-Year i.e. 8-9-2004 . What do I need to change in that Code, to achieve that ?
Any help would be appreciated
Regards
Eddie Winch