No Output In Pandas DataFrame Query

eddywinch82 · Aug-15-2020, 05:47 PM

Hi there,

I have the following Python Code, which I run in Jupyter Notebook :-

import pandas as pd
import requests
from bs4 import BeautifulSoup
   
res1 = requests.get("http://web.archive.org/web/20020602133812/http://www.raf.mod.uk/bbmf/calendar.html")     
res2 = requests.get("http://web.archive.org/web/20020803081304/http://www.raf.mod.uk/bbmf/calendar.html")
soup1 = BeautifulSoup(res1.content,'lxml')
soup2 = BeautifulSoup(res2.content,'lxml')
table1 = soup1.find_all('table', align="CENTER")[0]
table2 = soup2.find_all('table', align="CENTER")[0]
df1 = pd.read_html(str(table1))
df2 = pd.read_html(str(table2))


   
df1 = df1[0]
df2 = df2[0]
df = pd.concat([df2, df1], axis=0)
  
  
##################
##################
##################
  
  
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
 
 
#make df[0] to list
list=[]
for i in df[0]:
    list.append(i)
  
#reverse the list to make split to sublist easier
list.reverse()
  
#split list to sublist using condition len(val)> 2 
size = len(list) 
idx_list = [idx + 1 for idx, val in
            enumerate(list) if len(val) > 2] 
res = [list[i: j] for i, j in
        zip([0] + idx_list, idx_list + 
        ([size] if idx_list[-1] != size else []))] 
  
#make monthname to numbers and print
for i in res:
    for j in range(len(i)):
        if i[j].upper()=='JUNE':
            i[j]='6'
        elif i[j].upper() =='MAY':
            i[j]='5'
        elif i[j].upper() == 'APRIL':
            i[j]='4'
        elif i[j].upper() =='JANUARY':
            i[j]='1'
        elif i[j].upper() == 'FEBRUARY':
            i[j]='2'
        elif i[j].upper() =='MARCH':
            i[j]='3'
        elif i[j].upper() == 'JULY':
            i[j]='7'        
        elif i[j].upper() =='AUGUST':
            i[j]='8'
        elif i[j].upper() == 'SEPTEMBER':
            i[j]='9'
        elif i[j].upper() =='OCTOBER':
            i[j]='10'
        elif i[j].upper() == 'NOVEMBER':
            i[j]='11'
        elif i[j].upper() =='DECEMBER':
            i[j]='12'       
  
  
#append string and append to new list
finallist=[]
for i in res:
    for j in range(len(i)):
        if j < len(i) - 1:
            #print(f'2004-{i[-1]}-{i[j]}')
            finallist.append(f'2002-{i[-1]}-{i[j]}')
#print(finallist)
finallist.reverse()
  
  
  
  
#print("\n=== ORIGINAL DF ===\n")
#print(df)
  
  
  
#convert dataframe to list
listtemp1=df.values.tolist()
  
#replace found below values with 0000_removable
removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL']
for i in listtemp1:
    for j in range(len(i)):
        for place in removelist:
            if str(i[j]).upper()==place:
                i[j]='0000_removable'
            else:
                pass
  
                  
#remove sublists with the replaced values we redirected
dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable']
res = [i for i in listtemp1 if i != dellist]
  
#assign back to dataframe DF3
df3=pd.DataFrame()
df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])
#print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n")
#print(df3)
  
  
#now assign that sorted date list to dataframe DF3
idx = 0
df3.insert(loc=idx, column='DATE', value=finallist)
pd.options.display.max_rows = 500
  
#print("\n=== FINAL DF3 after joining the edited date format column list ===\n")
#print(df3)
  
  
#validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column
#df3['ED1']=  pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day
#df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1)
  
  
#convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d')
  
  
  
##################
##################
##################
  
  
 
 
  
 
#df3 = df3.rename(columns=df.iloc[0])
#df3 = df.iloc[2:]
#df3.head(15)
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

df3['LANCASTER'] = df3['LANCASTER'].replace({'X': 'L'})
df3['HURRICANE'] = df3['HURRICANE'].replace({'X': 'H'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X': 'S'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X x 2': 'SS'})
df3['DAKOTA'] = df3['DAKOTA'].replace({'X': 'D'})

#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['LANCASTER'] != 'X')]    
#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('Jun')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#Months = May Jun Jul Aug Sep
#Months = -05- -06- -07- -08- -09-

#print(display) 

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')
display=display.sort_values(by=['DATE'])
##added two lines above to convert date format
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')

#display.drop('LANCASTER', axis=1, inplace=True)
#display.drop('Date', axis=1, inplace=True)
display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')
display=display[['LOCATION','DATE','DAKOTA','HURRICANE','SPITFIRE']]
display=display.fillna('--')
display.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2002-Code (Dakota With Fighters).csv')
display

#display[display['DATE'].str.contains('Jun')]

#print(display)

However when I run the Code, I don't get the desired Output, i.e. only the Columns are shown.

When I run the Code, using the Line above the display = line of Code I am using, i.e :-

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]

Then run the following Code, in the next Cell i.e. :-

display[display['DATE'].str.contains('Jun')]

I get the correct Output.

What do I need to change in the following Line of Code :-

display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('Jun')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')

So that I do get the correct Output, running that line of Code, instead of the first one ?

i.e. with this bit of that line of Code ? :-

(df3['DATE'].str.contains('Jun'))

Any help and info, anyone could give me, would be much appreciated.

Best Regards

Eddie Winch Smile

eddywinch82 · (This post was last modified: Aug-17-2020, 09:25 PM by eddywinch82.)

Hi there,

I have sorted out the issue I was having, The Jun was being seen, as a number 6 rather than Jun, I tried

changing this bit of Code :-

(df3['DATE'].str.contains('Jun'))

to the following :-

(df3['DATE'].str.contains('-6-'))

And now I do get the output, I wan't from that line, when I run the Code.

Also what line of Code do I need to type, to change index Position Value numbers ? I want to change 6 to 303 and 20 to 36 ? i.e 6 is at index position 21 and 20 at position 23. So what do you refer to values 6 and 20 as ? is that position or something else ? Sorry about the incorrect DataFrame layout, I wasn't sure how to get it displaying right.

Here is the DataFrame Output :-

Output:LOCATION	DATE	DAKOTA	HURRICANE	SPITFIRE
179	   Stafford	27-Apr-2002	D	H	S
187	   Jersey	09-May-2002	D	--	S
212	   Mablethorpe	01-Jun-2002	D	H	S
213    Bury, Lancashire	01-Jun-2002	D	H	S
216	   Portrush	02-Jun-2002	D	H	S
231	   Bangor	04-Jun-2002	D	H	S
232	   Willoughby	04-Jun-2002	D	H	S
253	   Elvington	12-Jun-2002	D	--	S
254	   Bracebridge	15-Jun-2002	D	H	S
255	   Grantham	15-Jun-2002	D	H	S
256	   Loughborough	15-Jun-2002	D	H	S
257	   Ripley	15-Jun-2002	D	H	S
258	   River Mersey	15-Jun-2002	D	H	S
267	   River Mersey	16-Jun-2002	D	H	S
268	   Brinsley	16-Jun-2002	D	H	S
269	   Lincoln	16-Jun-2002	D	H	S
293	   Upperthong	30-Jun-2002	D	H	S
294	   Skipton	30-Jun-2002	D	H	S
295	   Elvington	30-Jun-2002	D	H	S
301	   Gunthorpe	06-Jul-2002	D	H	--
6	   Jurby, Isle of Man	06-Jul-2002	D	H	S
302	   Bridgnorth	06-Jul-2002	D	H	--
20	   Jurby, Isle of Man	07-Jul-2002	D	H	S
39	   Inkersall	13-Jul-2002	D	H	S
40	   Wirral	13-Jul-2002	D	H	S
37	   Overseal	13-Jul-2002	D	H	S
38	   Crich	13-Jul-2002	D	H	S
45	   Wirral	14-Jul-2002	D	H	S
51	   Machynlleth	20-Jul-2002	D	--	S
76	   Kielder Forest	03-Aug-2002	D	H	S
75	   Scunthorpe	03-Aug-2002	D	H	S
77	   Windermere	03-Aug-2002	D	H	S
82	   Windermere	04-Aug-2002	D	H	S
99	   Sleap Airfield, Shropshire	18-Aug-2002	D	--	S
123	   Shoreham	31-Aug-2002	D	H	S
122	   Hendon	31-Aug-2002	D	H	S
126	   Shoreham	01-Sep-2002	D	H	S
134	   Coltishall	06-Sep-2002	D	H	S
135	   Duxford	07-Sep-2002	D	H	S
142	   Duxford	08-Sep-2002	D	H	S
143	   Somerby	08-Sep-2002	D	H	S
144	   Gedling	08-Sep-2002	D	H	S

Any help would be very much appreciated.

Best Regards

Eddie Winch Smile

Update :-

I have been working on this for hours, looking at various Forum Posts on the Internet and other info, and have solved the issue finally, the DataFrame Output is all correct now.

Here is the finished Code :-

import pandas as pd
import requests
from bs4 import BeautifulSoup
from contextlib import suppress  

res1 = requests.get("http://web.archive.org/web/20020602133812/http://www.raf.mod.uk/bbmf/calendar.html")     
res2 = requests.get("http://web.archive.org/web/20020803081304/http://www.raf.mod.uk/bbmf/calendar.html")
soup1 = BeautifulSoup(res1.content,'lxml')
soup2 = BeautifulSoup(res2.content,'lxml')
table1 = soup1.find_all('table', align="CENTER")[0]
table2 = soup2.find_all('table', align="CENTER")[0]
df1 = pd.read_html(str(table1))
df2 = pd.read_html(str(table2))


   
df1 = df1[0]
df2 = df2[0]
df = pd.concat([df2, df1], axis=0)
  
  
##################
##################
##################
  
  
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
 
 
#make df[0] to list
list=[]
for i in df[0]:
    list.append(i)
  
#reverse the list to make split to sublist easier
list.reverse()
  
#split list to sublist using condition len(val)> 2 
size = len(list) 
idx_list = [idx + 1 for idx, val in
            enumerate(list) if len(val) > 2] 
res = [list[i: j] for i, j in
        zip([0] + idx_list, idx_list + 
        ([size] if idx_list[-1] != size else []))] 
  
#make monthname to numbers and print
for i in res:
    for j in range(len(i)):
        if i[j].upper()=='JUNE':
            i[j]='6'
        elif i[j].upper() =='MAY':
            i[j]='5'
        elif i[j].upper() == 'APRIL':
            i[j]='4'
        elif i[j].upper() =='JANUARY':
            i[j]='1'
        elif i[j].upper() == 'FEBRUARY':
            i[j]='2'
        elif i[j].upper() =='MARCH':
            i[j]='3'
        elif i[j].upper() == 'JULY':
            i[j]='7'        
        elif i[j].upper() =='AUGUST':
            i[j]='8'
        elif i[j].upper() == 'SEPTEMBER':
            i[j]='9'
        elif i[j].upper() =='OCTOBER':
            i[j]='10'
        elif i[j].upper() == 'NOVEMBER':
            i[j]='11'
        elif i[j].upper() =='DECEMBER':
            i[j]='12'       
  
  
#append string and append to new list
finallist=[]
for i in res:
    for j in range(len(i)):
        if j < len(i) - 1:
            #print(f'2004-{i[-1]}-{i[j]}')
            finallist.append(f'2002-{i[-1]}-{i[j]}')
#print(finallist)
finallist.reverse()
  
  
  
  
#print("\n=== ORIGINAL DF ===\n")
#print(df)
  
  
  
#convert dataframe to list
listtemp1=df.values.tolist()
  
#replace found below values with 0000_removable
removelist=['LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA','DATE','JUNE','JANUARY','FEBRUARY','MARCH','MAY','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER','APRIL']
for i in listtemp1:
    for j in range(len(i)):
        for place in removelist:
            if str(i[j]).upper()==place:
                i[j]='0000_removable'
            else:
                pass
  
                  
#remove sublists with the replaced values we redirected
dellist=['0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable', '0000_removable']
res = [i for i in listtemp1 if i != dellist]
  
#assign back to dataframe DF3
df3=pd.DataFrame()
df3=pd.DataFrame(res, columns=['Date','LOCATION','LANCASTER','SPITFIRE','HURRICANE','DAKOTA'])
#print("\n=== AFTER REMOVE month and column names from DF, assigned to new as DF3 ===\n")
#print(df3)
  
  
#now assign that sorted date list to dataframe DF3
idx = 0
df3.insert(loc=idx, column='DATE', value=finallist)
pd.options.display.max_rows = 500
  
#print("\n=== FINAL DF3 after joining the edited date format column list ===\n")
#print(df3)
  
  
#validation logic if needed compare processed date from new joined "edited_Date_format" column with already existing "Date" column
#df3['ED1']=  pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d').dt.day
#df3['validation of date'] = df3.apply(lambda x: str(x['ED1']) == x['Date'], axis=1)
  
  
#convert df3['EDITED_DATE_FORMAT'] column from object to datetime64 foramt
#df3['EDITED_DATE_FORMAT']= pd.to_datetime(df3['EDITED_DATE_FORMAT'],format='%Y-%m-%d')
  
  
  
##################
##################
##################
  
  
 
 
  
 
#df3 = df3.rename(columns=df.iloc[0])
#df3 = df.iloc[2:]
#df3.head(15)
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

df3['LANCASTER'] = df3['LANCASTER'].replace({'X': 'L'})
df3['HURRICANE'] = df3['HURRICANE'].replace({'X': 'H'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X': 'S'})
df3['SPITFIRE'] = df3['SPITFIRE'].replace({'X x 2': 'SS'})
df3['DAKOTA'] = df3['DAKOTA'].replace({'X': 'D'})


#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('X')) & (df3['SPITFIRE'].str.contains('X', na=True)) & (df3['LANCASTER'] != 'X')]    
display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#display = df3[(df3['LOCATION'].str.contains('[a-zA-Z]')) & (df3['DATE'].str.contains('-6-')) & (df3['DAKOTA'].str.contains('D')) & (df3['SPITFIRE'].str.contains('S', na=True)) & (df3['LANCASTER'] != 'L')]    

#Months = May Jun Jul Aug Sep
#Months = -5- -6- -7- -8- -9-

#print(display) 

#display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')
#display=display.sort_values(by=['DATE'])
##added two lines above to convert date format
#display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')

display['DATE']= pd.to_datetime(display['DATE'],format='%Y-%m-%d')
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%m-%d-%Y')

with suppress(Exception):
 display.drop([303], inplace=True) 
display.rename(index={6: '303', 20: '36'}, inplace=True)
#display.rename(index={6: '19'}, inplace=True)
display=display.rename_axis('MyIdx')
display=display.sort_values(by=['DATE','MyIdx'])
display['DATE']= pd.to_datetime(display['DATE']).dt.strftime('%d-%b-%Y')
display.drop_duplicates(subset=['LOCATION', 'DATE'], keep='last', inplace=True)

#display.drop('LANCASTER', axis=1, inplace=True)
#display.drop('Date', axis=1, inplace=True)
display=display.dropna(subset=['SPITFIRE', 'HURRICANE'], how='all')
display=display[['LOCATION','DATE','DAKOTA','HURRICANE','SPITFIRE']]
display=display.fillna('--')
display.reset_index(drop=True, inplace=True)
display.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2002-Code (Dakota With Fighters).csv')

display

#print(display)

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	[Solved] Formatting cells of a pandas dataframe into an OpenDocument ods spreadsheet	Calab	1	992	Mar-01-2025, 04:51 AM Last Post: Calab
	Find duplicates in a pandas dataframe list column on other rows	Calab	2	2,563	Sep-18-2024, 07:38 PM Last Post: Calab
	Find strings by index from a list of indexes in a different Pandas dataframe column	Calab	3	1,837	Aug-26-2024, 04:52 PM Last Post: Calab
	Add NER output to pandas dataframe	dg3000	0	1,265	Apr-22-2024, 08:14 PM Last Post: dg3000
	HTML Decoder pandas dataframe column	mbrown009	3	2,901	Sep-29-2023, 05:56 PM Last Post: deanhystad
	Use pandas to obtain cartesian product between a dataframe of int and equations?	haihal	0	2,128	Jan-06-2023, 10:53 PM Last Post: haihal
	Pandas Dataframe Filtering based on rows	mvdlm	0	2,168	Apr-02-2022, 06:39 PM Last Post: mvdlm
	Pandas dataframe: calculate metrics by year	mcva	1	3,587	Mar-02-2022, 08:22 AM Last Post: mcva
	Pandas dataframe comparing	anto5	0	2,005	Jan-30-2022, 10:21 AM Last Post: anto5
	PANDAS: DataFrame \| Replace and others questions	moduki1	2	2,801	Jan-10-2022, 07:19 PM Last Post: moduki1

No Output In Pandas DataFrame Query

User Panel Messages

Announcements