Jan-27-2020, 12:12 PM
(This post was last modified: Jan-27-2020, 12:12 PM by sandeep_ganga.)
Try this, converting object to datetime64 type
Sandeep
GANGA SANDEEP KUMAR
display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d') display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%m-%Y')
import pandas as pd import requests from bs4 import BeautifulSoup res = requests.get("http://web.archive.org/web/20041020000138/http://www.raf.mod.uk/bbmf/displaydates.html") soup = BeautifulSoup(res.content,'lxml') table = soup.find_all('table', align="CENTER")[0] df = pd.read_html(str(table)) df = df[0] ################## ################## ################## pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) #make df[0] to list list=[] for i in df[0]: list.append(i) #reverse the list to make split to sublist easier list.reverse() #split list to sublist using condition len(val)> 2 size = len(list) idx_list = [idx + 1 for idx, val in enumerate(list) if len(val) > 2] res = [list[i: j] for i, j in zip([0] + idx_list, idx_list + ([size] if idx_list[-1] != size else []))] #make monthname to numbers and print for i in res: for j in range(len(i)): if i[j].upper()=='JUNE': i[j]='6' elif i[j].upper() =='MAY': i[j]='5' elif i[j].upper() == 'APRIL': i[j]='4' elif i[j].upper() =='JANUARY': i[j]='1' elif i[j].upper() == 'FEBRUARY': i[j]='2' elif i[j].upper() =='MARCH': i[j]='3' elif i[j].upper() == 'JULY': i[j]='7' elif i[j].upper() =='AUGUST': i[j]='8' elif i[j].upper() == 'SEPTEMBER': i[j]='9' elif i[j].upper() =='OCTOBER': i[j]='10' elif i[j].upper() == 'NOVEMBER': i[j]='11' elif i[j].upper() =='DECEMBER': i[j]='12' #append string and append to new list finallist=[] for i in res: for j in range(len(i)): if j < len(i) - 1: #print(f'2004-{i[-1]}-{i[j]}') finallist.append(f'2004-{i[-1]}-{i[j]}') #print(finallist) finallist.reverse() #print("\n=== ORIGINAL DF ===\n") #print(df) #convert dataframe to list listtemp1=df.values.tolist() #replace found below values with 0000_removable removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL'] for i in listtemp1: for j in range(len(i)): for place in removelist: if str(i[j]).upper()==place: i[j]='0000_removable' else: pass #remove sublists with the replaced values we redirected dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable'] res = [i for i in listtemp1 if i != dellist] #assign back to dataframe DF3 df3=pd.DataFrame() df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA']) #print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n") #print(df3) #now assign that sorted date list to dataframe DF3 idx = 0 #df3.insert(loc=idx, column='EDITED_DATE_FORMAT', value=finallist) df3.insert(loc=idx, column='DATE', value=finallist) pd.options.display.max_rows = 500 #print("\n=== FINAL DF3 after joining the edited date format column list ===\n") #print(df3) #validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column #df3['ED1']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day #df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1) #convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt #df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d') #df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT']).dt.strftime('%d-%m-%Y') #df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d') ################## ################## ################## #df3 = df3.rename(columns=df.iloc[0]) #df3 = df.iloc[2:] #df3.head(15) pd.options.display.max_rows = 1000 display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['LANCASTER'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['DAKOTA'].str.contains('X', na=True))] #print(display) display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d') display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%m-%Y') ##added two lines above to convert date format #display.drop('DAKOTA', axis=1, inplace=True) display.drop('Date', axis=1, inplace=True) display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all') #print(display)Best Regards,
Sandeep
GANGA SANDEEP KUMAR