Python Forum
No Output In Pandas DataFrame Query
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
No Output In Pandas DataFrame Query
#1
Hi there,

I have the following Python Code, which I run in Jupyter Notebook :-

import pandas as pd
import requests
from bs4 import BeautifulSoup
   
res1 = requests.get("http://web.archive.org/web/20020602133812/http://www.raf.mod.uk/bbmf/calendar.html")     
res2 = requests.get("http://web.archive.org/web/20020803081304/http://www.raf.mod.uk/bbmf/calendar.html")
soup1 = BeautifulSoup(res1.content,'lxml')
soup2 = BeautifulSoup(res2.content,'lxml')
table1 = soup1.find_all('table', align="CENTER")[0]
table2 = soup2.find_all('table', align="CENTER")[0]
df1 = pd.read_html(str(table1))
df2 = pd.read_html(str(table2))


   
df1 = df1[0]
df2 = df2[0]
df = pd.concat([df2, df1], axis=0)
  
  
##################
##################
##################
  
  
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
 
 
#make df[0] to list
list=[]
for i in df[0]:
    list.append(i)
  
#reverse the list to make split to sublist easier
list.reverse()
  
#split list to sublist using condition len(val)> 2 
size = len(list) 
idx_list = [idx + 1 for idx, val in
            enumerate(list) if len(val) > 2] 
res = [list[i: j] for i, j in
        zip([0] + idx_list, idx_list + 
        ([size] if idx_list[-1] != size else []))] 
  
#make monthname to numbers and print
for i in res:
    for j in range(len(i)):
        if i[j].upper()=='JUNE':
            i[j]='6'
        elif i[j].upper() =='MAY':
            i[j]='5'
        elif i[j].upper() == 'APRIL':
            i[j]='4'
        elif i[j].upper() =='JANUARY':
            i[j]='1'
        elif i[j].upper() == 'FEBRUARY':
            i[j]='2'
        elif i[j].upper() =='MARCH':
            i[j]='3'
        elif i[j].upper() == 'JULY':
            i[j]='7'        
        elif i[j].upper() =='AUGUST':
            i[j]='8'
        elif i[j].upper() == 'SEPTEMBER':
            i[j]='9'
        elif i[j].upper() =='OCTOBER':
            i[j]='10'
        elif i[j].upper() == 'NOVEMBER':
            i[j]='11'
        elif i[j].upper() =='DECEMBER':
            i[j]='12'       
  
  
#append string and append to new list
finallist=[]
for i in res:
    for j in range(len(i)):
        if j < len(i) - 1:
            #print(f'2004-{i[-1]}-{i[j]}')
            finallist.append(f'2002-{i[-1]}-{i[j]}')
#print(finallist)
finallist.reverse()
  
  
  
  
#print("\n=== ORIGINAL DF ===\n")
#print(df)
  
  
  
#convert dataframe to list
listtemp1=df.values.tolist()
  
#replace found below values with 0000_removable
removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL']
for i in listtemp1:
    for j in range(len(i)):
        for place in removelist:
            if str(i[j]).upper()==place:
                i[j]='0000_removable'
            else:
                pass
  
                  
#remove sublists with the replaced values we redirected
dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable']
res = [i for i in listtemp1 if i != dellist]
  
#assign back to dataframe DF3
df3=pd.DataFrame()
df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])
#print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n")
#print(df3)
  
  
#now assign that sorted date list to dataframe DF3
idx = 0
df3.insert(loc=idx, column='DATE', value=finallist)
pd.options.display.max_rows = 500
  
#print("\n=== FINAL DF3 after joining the edited date format column list ===\n")
#print(df3)
  
  
#validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column
#df3['ED1']=  pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day
#df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1)
  
  
#convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d')
  
  
  
##################
##################
##################
  
  
 
 
  
 
#df3 = df3.rename(columns=df.iloc[0])
#df3 = df.iloc[2:]
#df3.head(15)
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

df3['LANCASTER'] = df3['LANCASTER'].replace({'X': 'L'})
df3['HURRICANE'] = df3['HURRICANE'].replace({'X': 'H'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X': 'S'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X x 2': 'SS'})
df3['DAKOTA'] = df3['DAKOTA'].replace({'X': 'D'})

#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['LANCASTER'] != 'X')]    
#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('Jun')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#Months = May Jun Jul Aug Sep
#Months = -05- -06- -07- -08- -09-

#print(display) 

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')
display=display.sort_values(by=['DATE'])
##added two lines above to convert date format
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')

#display.drop('LANCASTER', axis=1, inplace=True)
#display.drop('Date', axis=1, inplace=True)
display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')
display=display[['LOCATION','DATE','DAKOTA','HURRICANE','SPITFIRE']]
display=display.fillna('--')
display.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2002-Code (Dakota With Fighters).csv')
display

#display[display['DATE'].str.contains('Jun')]

#print(display)
However when I run the Code, I don't get the desired Output, i.e. only the Columns are shown.

When I run the Code, using the Line above the display = line of Code I am using, i.e :-

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]
Then run the following Code, in the next Cell i.e. :-

display[display['DATE'].str.contains('Jun')]
I get the correct Output.

What do I need to change in the following Line of Code :-

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('Jun')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')
So that I do get the correct Output, running that line of Code, instead of the first one ?

i.e. with this bit of that line of Code ? :-

(df3['DATE'].str.contains('Jun'))
Any help and info, anyone could give me, would be much appreciated.

Best Regards

Eddie Winch Smile
Reply
#2
Hi there,

I have sorted out the issue I was having, The Jun was being seen, as a number 6 rather than Jun, I tried

changing this bit of Code :-

(df3['DATE'].str.contains('Jun'))
to the following :-

(df3['DATE'].str.contains('-6-'))
And now I do get the output, I wan't from that line, when I run the Code.

Also what line of Code do I need to type, to change index Position Value numbers ? I want to change 6 to 303 and 20 to 36 ? i.e 6 is at index position 21 and 20 at position 23. So what do you refer to values 6 and 20 as ? is that position or something else ? Sorry about the incorrect DataFrame layout, I wasn't sure how to get it displaying right.

Here is the DataFrame Output :-

Output:
LOCATION DATE DAKOTA HURRICANE SPITFIRE 179 Stafford 27-Apr-2002 D H S 187 Jersey 09-May-2002 D -- S 212 Mablethorpe 01-Jun-2002 D H S 213 Bury, Lancashire 01-Jun-2002 D H S 216 Portrush 02-Jun-2002 D H S 231 Bangor 04-Jun-2002 D H S 232 Willoughby 04-Jun-2002 D H S 253 Elvington 12-Jun-2002 D -- S 254 Bracebridge 15-Jun-2002 D H S 255 Grantham 15-Jun-2002 D H S 256 Loughborough 15-Jun-2002 D H S 257 Ripley 15-Jun-2002 D H S 258 River Mersey 15-Jun-2002 D H S 267 River Mersey 16-Jun-2002 D H S 268 Brinsley 16-Jun-2002 D H S 269 Lincoln 16-Jun-2002 D H S 293 Upperthong 30-Jun-2002 D H S 294 Skipton 30-Jun-2002 D H S 295 Elvington 30-Jun-2002 D H S 301 Gunthorpe 06-Jul-2002 D H -- 6 Jurby, Isle of Man 06-Jul-2002 D H S 302 Bridgnorth 06-Jul-2002 D H -- 20 Jurby, Isle of Man 07-Jul-2002 D H S 39 Inkersall 13-Jul-2002 D H S 40 Wirral 13-Jul-2002 D H S 37 Overseal 13-Jul-2002 D H S 38 Crich 13-Jul-2002 D H S 45 Wirral 14-Jul-2002 D H S 51 Machynlleth 20-Jul-2002 D -- S 76 Kielder Forest 03-Aug-2002 D H S 75 Scunthorpe 03-Aug-2002 D H S 77 Windermere 03-Aug-2002 D H S 82 Windermere 04-Aug-2002 D H S 99 Sleap Airfield, Shropshire 18-Aug-2002 D -- S 123 Shoreham 31-Aug-2002 D H S 122 Hendon 31-Aug-2002 D H S 126 Shoreham 01-Sep-2002 D H S 134 Coltishall 06-Sep-2002 D H S 135 Duxford 07-Sep-2002 D H S 142 Duxford 08-Sep-2002 D H S 143 Somerby 08-Sep-2002 D H S 144 Gedling 08-Sep-2002 D H S ​
Any help would be very much appreciated.

Best Regards

Eddie Winch Smile

Update :-

I have been working on this for hours, looking at various Forum Posts on the Internet and other info, and have solved the issue finally, the DataFrame Output is all correct now.

Here is the finished Code :-

import pandas as pd
import requests
from bs4 import BeautifulSoup
from contextlib import suppress  

res1 = requests.get("http://web.archive.org/web/20020602133812/http://www.raf.mod.uk/bbmf/calendar.html")     
res2 = requests.get("http://web.archive.org/web/20020803081304/http://www.raf.mod.uk/bbmf/calendar.html")
soup1 = BeautifulSoup(res1.content,'lxml')
soup2 = BeautifulSoup(res2.content,'lxml')
table1 = soup1.find_all('table', align="CENTER")[0]
table2 = soup2.find_all('table', align="CENTER")[0]
df1 = pd.read_html(str(table1))
df2 = pd.read_html(str(table2))


   
df1 = df1[0]
df2 = df2[0]
df = pd.concat([df2, df1], axis=0)
  
  
##################
##################
##################
  
  
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
 
 
#make df[0] to list
list=[]
for i in df[0]:
    list.append(i)
  
#reverse the list to make split to sublist easier
list.reverse()
  
#split list to sublist using condition len(val)> 2 
size = len(list) 
idx_list = [idx + 1 for idx, val in
            enumerate(list) if len(val) > 2] 
res = [list[i: j] for i, j in
        zip([0] + idx_list, idx_list + 
        ([size] if idx_list[-1] != size else []))] 
  
#make monthname to numbers and print
for i in res:
    for j in range(len(i)):
        if i[j].upper()=='JUNE':
            i[j]='6'
        elif i[j].upper() =='MAY':
            i[j]='5'
        elif i[j].upper() == 'APRIL':
            i[j]='4'
        elif i[j].upper() =='JANUARY':
            i[j]='1'
        elif i[j].upper() == 'FEBRUARY':
            i[j]='2'
        elif i[j].upper() =='MARCH':
            i[j]='3'
        elif i[j].upper() == 'JULY':
            i[j]='7'        
        elif i[j].upper() =='AUGUST':
            i[j]='8'
        elif i[j].upper() == 'SEPTEMBER':
            i[j]='9'
        elif i[j].upper() =='OCTOBER':
            i[j]='10'
        elif i[j].upper() == 'NOVEMBER':
            i[j]='11'
        elif i[j].upper() =='DECEMBER':
            i[j]='12'       
  
  
#append string and append to new list
finallist=[]
for i in res:
    for j in range(len(i)):
        if j < len(i) - 1:
            #print(f'2004-{i[-1]}-{i[j]}')
            finallist.append(f'2002-{i[-1]}-{i[j]}')
#print(finallist)
finallist.reverse()
  
  
  
  
#print("\n=== ORIGINAL DF ===\n")
#print(df)
  
  
  
#convert dataframe to list
listtemp1=df.values.tolist()
  
#replace found below values with 0000_removable
removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL']
for i in listtemp1:
    for j in range(len(i)):
        for place in removelist:
            if str(i[j]).upper()==place:
                i[j]='0000_removable'
            else:
                pass
  
                  
#remove sublists with the replaced values we redirected
dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable']
res = [i for i in listtemp1 if i != dellist]
  
#assign back to dataframe DF3
df3=pd.DataFrame()
df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])
#print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n")
#print(df3)
  
  
#now assign that sorted date list to dataframe DF3
idx = 0
df3.insert(loc=idx, column='DATE', value=finallist)
pd.options.display.max_rows = 500
  
#print("\n=== FINAL DF3 after joining the edited date format column list ===\n")
#print(df3)
  
  
#validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column
#df3['ED1']=  pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day
#df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1)
  
  
#convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d')
  
  
  
##################
##################
##################
  
  
 
 
  
 
#df3 = df3.rename(columns=df.iloc[0])
#df3 = df.iloc[2:]
#df3.head(15)
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

df3['LANCASTER'] = df3['LANCASTER'].replace({'X': 'L'})
df3['HURRICANE'] = df3['HURRICANE'].replace({'X': 'H'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X': 'S'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X x 2': 'SS'})
df3['DAKOTA'] = df3['DAKOTA'].replace({'X': 'D'})


#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['LANCASTER'] != 'X')]    
display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6-')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#Months = May Jun Jul Aug Sep
#Months = -5- -6- -7- -8- -9-

#print(display) 

#display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')
#display=display.sort_values(by=['DATE'])
##added two lines above to convert date format
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')

with suppress(Exception):
 display.drop([303], inplace=True) 
display.rename(index={6: '303', 20: '36'}, inplace=True)
#display.rename(index={6: '19'}, inplace=True)
display=display.rename_axis('MyIdx')
display=display.sort_values(by=['DATE','MyIdx'])
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')
display.drop_duplicates(subset=['LOCATION', 'DATE'], keep='last', inplace=True)

#display.drop('LANCASTER', axis=1, inplace=True)
#display.drop('Date', axis=1, inplace=True)
display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')
display=display[['LOCATION','DATE','DAKOTA','HURRICANE','SPITFIRE']]
display=display.fillna('--')
display.reset_index(drop=True, inplace=True)
display.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2002-Code (Dakota With Fighters).csv')

display

#print(display)
Smile Smile
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  HTML Decoder pandas dataframe column mbrown009 3 1,028 Sep-29-2023, 05:56 PM
Last Post: deanhystad
  Use pandas to obtain cartesian product between a dataframe of int and equations? haihal 0 1,118 Jan-06-2023, 10:53 PM
Last Post: haihal
  Pandas Dataframe Filtering based on rows mvdlm 0 1,430 Apr-02-2022, 06:39 PM
Last Post: mvdlm
  Pandas dataframe: calculate metrics by year mcva 1 2,311 Mar-02-2022, 08:22 AM
Last Post: mcva
  Pandas dataframe comparing anto5 0 1,261 Jan-30-2022, 10:21 AM
Last Post: anto5
  PANDAS: DataFrame | Replace and others questions moduki1 2 1,797 Jan-10-2022, 07:19 PM
Last Post: moduki1
  PANDAS: DataFrame | Saving the wrong value moduki1 0 1,551 Jan-10-2022, 04:42 PM
Last Post: moduki1
  update values in one dataframe based on another dataframe - Pandas iliasb 2 9,259 Aug-14-2021, 12:38 PM
Last Post: jefsummers
  empty row in pandas dataframe rwahdan 3 2,446 Jun-22-2021, 07:57 PM
Last Post: snippsat
Question Pandas - Creating additional column in dataframe from another column Azureaus 2 2,963 Jan-11-2021, 09:53 PM
Last Post: Azureaus

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020