Python Forum

Full Version: Filtering Excel Document Data Based On Numerical Values
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Pages: 1 2 3 4
Hi GodMaster,

The TypeError issue I had, has been fixed, the FreeLance Python Programmer, updated the Code.
He reconvereted the Original .xls File to.csv format, and told me to save the converted File, to my Desktop. Here is the updated Python Code :-

import pandas as pd
         
data = pd.read_csv(r'C:\Users\Edward\Desktop\Sheet1-Table 1.csv')      
pd.options.display.max_rows = 1000   
pd.options.display.max_columns = 1000
 
df = pd.DataFrame(data, columns= ['Venue','A/C','DISPLAY/','Date','BID','Durn'])
#df[(df['Venue'].str.contains('[a-zA-Z]') & (df['DISPLAY/'].str.contains('DISPLAY') & df['A/C'].str.contains("DHS|DAK|HS|SPIT")) & (df['A/C'] != 'LHS') & (df['A/C'] != 'LANC'))] 
#df["Date"].fillna("No Date", inplace = True)   
df["Date"].fillna(method='ffill', inplace = True)
df['A/C'].unique().tolist()  
rename_map = {
    'DAK': 'D',
    'SPIT': 'S',
    'LANC': 'L',
    'HURRI': 'H',
    'PARA': 'P'
}
df['A/C std'] = df['A/C'].replace(rename_map)
print(df['A/C std'].unique().tolist())
print("=======")  
selected = df.loc[df['DISPLAY/'] == 'DISPLAY']
#selected = df.loc[df['DISPLAY/'] == 'FLYPAST']
#selected = df.loc[df['DISPLAY/'].isin(['DISPLAY', 'FLYPAST'])]
#selected = df.loc[df['A/C'].isin(['DS', 'DH', 'DHS', 'SD', 'HSD'])]
 
df1=df
groupby_venue_date = selected.groupby(['Venue', 'BID', 'Date', 'DISPLAY/']) 
aircraft = groupby_venue_date['A/C std'].apply(''.join).rename('Aircraft-combined')
def sorted_string(s):
    return ''.join(sorted(s))   
def remove_duplicate_chars(s):
    return ''.join(set(s))
    
    
#####################
#####################
#####################
#####################
    
    
df=pd.DataFrame(aircraft)
#print(df) #original df
      
     
indexNames = df[~df['Aircraft-combined'].str.contains('D',na=False)].index
df.drop(indexNames , inplace=True)
#print(df)#prints all rows having D and removes remaining rows
    
df['Aircraft-combined']=df['Aircraft-combined'].replace(to_replace='SD', value='DS')
df['Aircraft-combined']=df['Aircraft-combined'].replace(to_replace=['HSD','HDS', 'SHSD'], value='DHS')
#print(df)#replaces the SD with DS and HSD,HDS and SHSD with DHS
      
   
     
df = df.drop(df[df['Aircraft-combined'] == 'D'].index)#remove value that match letter D
df = df.drop(df[df['Aircraft-combined'] == 'DD'].index)
df = df.drop(df[df['Aircraft-combined'] == 'DL'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LD'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LDS'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LSSD'].index)
#df = df.drop(df[df['Aircraft-combined'] != 'D'].index)#if need the rows that match only letter D in selected dataframe column
   
   
   
#column names before alignment
#print("column names: before change  ", list(df.columns.values.tolist()))
#print("=======")
#print(df)
#print("=======")  
    
   
#to fix column name alignment 
df.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
df = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','BID','Date','DISPLAY/','Aircraft-combined'], header=0 )
   
   
#column names before alignment
#print("column names:  after change ",list(df.columns.values.tolist()))
#print("=======")
#print(df)
#print("=======")
 
 
###My code Starts here
 
x,y,z=df['Venue'],df['BID'],df['Date']
x=list(x)
y=list(y)
z=list(z)
import numpy as np
 
h=np.array(y)
np.unique(h)
 
yy=[]
for jj in y:
    yy.append(str(jj))
 
t= df1['Venue'].isin(x)
df2=df1[t]
df2
 
g=df2['Date'].isin(z)
df3=df2[g]
df3=pd.DataFrame(df3)
 
df3
 
g=df3['BID'].isin(yy)
df4=df3[g]
df4=pd.DataFrame(df4)
 
 
df4
 
column_names = ['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined']
dff = pd.DataFrame(columns = column_names)
dff.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
dff = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined'], header=0 )
df4
df4.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
df4 = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined'], header=0 )
for x in range(0,len(df4)):
    y=x+1
    try:
        if ((((df4.iloc[x][6]))=='DSH') or (((df4.iloc[x][6]))=='SD') or (((df4.iloc[x][6]))=='DS') or (((df4.iloc[x][6]))=='HSD') or (((df4.iloc[x][6]))=='HDS') or (((df4.iloc[x][6]))=='SHSD') or (((df4.iloc[x][6]))=='DHS')):
            dff3 ={'Venue':df4.iloc[x][0],'A/C':df4.iloc[x][1],'DISPLAY/':df4.iloc[x][2],'Date':df4.iloc[x][3],'BID':df4.iloc[x][4],'Durn':df4.iloc[x][5],'Aircraft-combined':df4.iloc[x][6]}
            dff = dff.append(dff3,ignore_index=True)
             
             
        elif ((df4.iloc[x][3]==df4.iloc[y][3]) and (df4.iloc[x][4]==df4.iloc[y][4])):
            sidhu=df4.iloc[x][6]+df4.iloc[y][6]
            dff1 = {'Venue':df4.iloc[x][0],'A/C':df4.iloc[x][1],'DISPLAY/':df4.iloc[x][2],'Date':df4.iloc[x][3],'BID':df4.iloc[x][4],'Durn':df4.iloc[x][5],'Aircraft-combined':sidhu}
            dff2 = {'Venue':df4.iloc[y][0],'A/C':df4.iloc[y][1],'DISPLAY/':df4.iloc[y][2],'Date':df4.iloc[y][3],'BID':df4.iloc[y][4],'Durn':df4.iloc[y][5],'Aircraft-combined':sidhu}
            dff = dff.append(dff1,ignore_index=True)
            dff = dff.append(dff2,ignore_index=True)
             
         
             
              
        else:
            continue
    except Exception:
        break
         
dff['Aircraft-combined']=dff['Aircraft-combined'].replace(to_replace='SD', value='DS')
dff['Aircraft-combined']=dff['Aircraft-combined'].replace(to_replace=['HSD','HDS', 'SHSD'], value='DHS')
dff['Dakota And Fighters In The Same Display Slot']='Yes'
 
 
for x in range(0,len(dff)):
    y=x+1
    try:
        if ((dff.iloc[x][3]==dff.iloc[y][3]) and (dff.iloc[x][4]==dff.iloc[y][4])):
            if dff.iloc[x][5]==dff.iloc[y][5]:
                dff['Dakota And Fighters In The Same Display Slot'][x]='Yes'
                dff['Dakota And Fighters In The Same Display Slot'][y]='Yes'
                 
            else:
                dff['Dakota And Fighters In The Same Display Slot'][x]='No'
                dff['Dakota And Fighters In The Same Display Slot'][y]='No'
                 
                 
             
 
             
         
             
              
        else:
            continue
    except Exception:
        break
dff=dff.drop('A/C',axis=1)
dff=dff.drop_duplicates(subset=['Venue','Date','BID','Aircraft-combined','Dakota And Fighters In The Same Display Slot'], keep='first', inplace=False)
dff=dff.drop_duplicates(subset=None, keep='first', inplace=False)
 
dff['Durn']= pd.to_datetime(dff['Durn'],format='%M').apply(pd.Timestamp)
dff['Durn'] = pd.to_datetime(dff['Durn'], format='%H:%M:%S').dt.time
 
dff.at[14,'Aircraft-combined']='DHS'
dff.at[18,'Aircraft-combined']='DHS'
dff.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2009-Code.csv')
dff
So when you run the Code GodMaster, save the .csv File, I have attached to this email, to your Desktop. Just alter the relevant File Paths, to suit, but make sure the path is to your Desktop, and don't rename the .csv File. And the DataFrame Output should work, when you run the Code in Jupyter Notebook, it does for me now.

Regards

Eddie Winch
Pages: 1 2 3 4