Python Forum
Obtaining Correct Date In Pandas DataFrame
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Obtaining Correct Date In Pandas DataFrame
#10
Try this, converting object to datetime64 type

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')  
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%m-%Y') 
import pandas as pd
import requests
from bs4 import BeautifulSoup
    
      
res = requests.get("http://web.archive.org/web/20041020000138/http://www.raf.mod.uk/bbmf/displaydates.html")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table', align="CENTER")[0]
df = pd.read_html(str(table))
    
df = df[0]
   
   
##################
##################
##################
   
   
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
  
  
#make df[0] to list
list=[]
for i in df[0]:
    list.append(i)
   
#reverse the list to make split to sublist easier
list.reverse()
   
#split list to sublist using condition len(val)> 2 
size = len(list) 
idx_list = [idx + 1 for idx, val in
            enumerate(list) if len(val) > 2] 
res = [list[i: j] for i, j in
        zip([0] + idx_list, idx_list + 
        ([size] if idx_list[-1] != size else []))] 
   
#make monthname to numbers and print
for i in res:
    for j in range(len(i)):
        if i[j].upper()=='JUNE':
            i[j]='6'
        elif i[j].upper() =='MAY':
            i[j]='5'
        elif i[j].upper() == 'APRIL':
            i[j]='4'
        elif i[j].upper() =='JANUARY':
            i[j]='1'
        elif i[j].upper() == 'FEBRUARY':
            i[j]='2'
        elif i[j].upper() =='MARCH':
            i[j]='3'
        elif i[j].upper() == 'JULY':
            i[j]='7'        
        elif i[j].upper() =='AUGUST':
            i[j]='8'
        elif i[j].upper() == 'SEPTEMBER':
            i[j]='9'
        elif i[j].upper() =='OCTOBER':
            i[j]='10'
        elif i[j].upper() == 'NOVEMBER':
            i[j]='11'
        elif i[j].upper() =='DECEMBER':
            i[j]='12'       
   
   
#append string and append to new list
finallist=[]
for i in res:
    for j in range(len(i)):
        if j < len(i) - 1:
            #print(f'2004-{i[-1]}-{i[j]}')
            finallist.append(f'2004-{i[-1]}-{i[j]}')
#print(finallist)
finallist.reverse()
   
   
   
   
#print("\n=== ORIGINAL DF ===\n")
#print(df)
   
   
   
#convert dataframe to list
listtemp1=df.values.tolist()
   
#replace found below values with 0000_removable
removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL']
for i in listtemp1:
    for j in range(len(i)):
        for place in removelist:
            if str(i[j]).upper()==place:
                i[j]='0000_removable'
            else:
                pass
   
                   
#remove sublists with the replaced values we redirected
dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable']
res = [i for i in listtemp1 if i != dellist]
   
#assign back to dataframe DF3
df3=pd.DataFrame()
df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])
#print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n")
#print(df3)
   
   
#now assign that sorted date list to dataframe DF3
idx = 0
#df3.insert(loc=idx, column='EDITED_DATE_FORMAT', value=finallist)
df3.insert(loc=idx, column='DATE', value=finallist)
pd.options.display.max_rows = 500
   
#print("\n=== FINAL DF3 after joining the edited date format column list ===\n")
#print(df3)
   
   
#validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column
#df3['ED1']=  pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day
#df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1)
   
   
#convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d')
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT']).dt.strftime('%d-%m-%Y')
#df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d')
   
   
   
##################
##################
##################
   
   
  
  
   
  
#df3 = df3.rename(columns=df.iloc[0])
#df3 = df.iloc[2:]
#df3.head(15)
pd.options.display.max_rows = 1000
    
display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['LANCASTER'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['DAKOTA'].str.contains('X', na=True))]    
#print(display)

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%m-%Y')
##added two lines above to convert date format 
    
#display.drop('DAKOTA', axis=1, inplace=True)
display.drop('Date', axis=1, inplace=True)
display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')

#print(display)
Best Regards,
Sandeep

GANGA SANDEEP KUMAR
Reply


Messages In This Thread
RE: Obtaining Correct Date In Pandas DataFrame - by sandeep_ganga - Jan-27-2020, 12:12 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Add NER output to pandas dataframe dg3000 0 181 Apr-22-2024, 08:14 PM
Last Post: dg3000
  HTML Decoder pandas dataframe column mbrown009 3 1,096 Sep-29-2023, 05:56 PM
Last Post: deanhystad
  Pandas read csv file in 'date/time' chunks MorganSamage 4 1,748 Feb-13-2023, 11:24 AM
Last Post: MorganSamage
  Use pandas to obtain cartesian product between a dataframe of int and equations? haihal 0 1,140 Jan-06-2023, 10:53 PM
Last Post: haihal
  Pandas Dataframe Filtering based on rows mvdlm 0 1,470 Apr-02-2022, 06:39 PM
Last Post: mvdlm
  Pandas dataframe: calculate metrics by year mcva 1 2,357 Mar-02-2022, 08:22 AM
Last Post: mcva
  Pandas dataframe comparing anto5 0 1,289 Jan-30-2022, 10:21 AM
Last Post: anto5
  PANDAS: DataFrame | Replace and others questions moduki1 2 1,832 Jan-10-2022, 07:19 PM
Last Post: moduki1
  PANDAS: DataFrame | Saving the wrong value moduki1 0 1,580 Jan-10-2022, 04:42 PM
Last Post: moduki1
  update values in one dataframe based on another dataframe - Pandas iliasb 2 9,384 Aug-14-2021, 12:38 PM
Last Post: jefsummers

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020