Hi GodMaster,
Thankyou very much, for your interest in this Thread, and taking the time to attempt, to achieve what I
want, thats very much appreciated.
I have had help by a Python FreeLancer on the Internet, and he has sorted out what I want to achieve. Here below is the Finished Code :-
import pandas as pd
data = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2009-Code.csv')
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
df = pd.DataFrame(data, columns= ['Venue','A/C','DISPLAY/','Date','BID','Durn'])
#df[(df['Venue'].str.contains('[a-zA-Z]') & (df['DISPLAY/'].str.contains('DISPLAY') & df['A/C'].str.contains("DHS|DAK|HS|SPIT")) & (df['A/C'] != 'LHS') & (df['A/C'] != 'LANC'))]
#df["Date"].fillna("No Date", inplace = True)
df["Date"].fillna(method='ffill', inplace = True)
df['A/C'].unique().tolist()
rename_map = {
'DAK': 'D',
'SPIT': 'S',
'LANC': 'L',
'HURRI': 'H',
'PARA': 'P'
}
df['A/C std'] = df['A/C'].replace(rename_map)
print(df['A/C std'].unique().tolist())
print("=======")
selected = df.loc[df['DISPLAY/'] == 'DISPLAY']
#selected = df.loc[df['DISPLAY/'] == 'FLYPAST']
#selected = df.loc[df['DISPLAY/'].isin(['DISPLAY', 'FLYPAST'])]
#selected = df.loc[df['A/C'].isin(['DS', 'DH', 'DHS', 'SD', 'HSD'])]
df1=df
groupby_venue_date = selected.groupby(['Venue', 'BID', 'Date', 'DISPLAY/'])
aircraft = groupby_venue_date['A/C std'].apply(''.join).rename('Aircraft-combined')
def sorted_string(s):
return ''.join(sorted(s))
def remove_duplicate_chars(s):
return ''.join(set(s))
#####################
#####################
#####################
#####################
df=pd.DataFrame(aircraft)
#print(df) #original df
indexNames = df[~df['Aircraft-combined'].str.contains('D',na=False)].index
df.drop(indexNames , inplace=True)
#print(df)#prints all rows having D and removes remaining rows
df['Aircraft-combined']=df['Aircraft-combined'].replace(to_replace='SD', value='DS')
df['Aircraft-combined']=df['Aircraft-combined'].replace(to_replace=['HSD','HDS', 'SHSD'], value='DHS')
#print(df)#replaces the SD with DS and HSD,HDS and SHSD with DHS
df = df.drop(df[df['Aircraft-combined'] == 'D'].index)#remove value that match letter D
df = df.drop(df[df['Aircraft-combined'] == 'DD'].index)
df = df.drop(df[df['Aircraft-combined'] == 'DL'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LD'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LDS'].index)
df = df.drop(df[df['Aircraft-combined'] == 'LSSD'].index)
#df = df.drop(df[df['Aircraft-combined'] != 'D'].index)#if need the rows that match only letter D in selected dataframe column
#column names before alignment
#print("column names: before change ", list(df.columns.values.tolist()))
#print("=======")
#print(df)
#print("=======")
#to fix column name alignment
df.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
df = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','BID','Date','DISPLAY/','Aircraft-combined'], header=0 )
#column names before alignment
#print("column names: after change ",list(df.columns.values.tolist()))
#print("=======")
#print(df)
#print("=======")
###My code Starts here
x,y,z=df['Venue'],df['BID'],df['Date']
x=list(x)
y=list(y)
z=list(z)
import numpy as np
h=np.array(y)
np.unique(h)
yy=[]
for jj in y:
yy.append(str(jj))
t= df1['Venue'].isin(x)
df2=df1[t]
df2
g=df2['Date'].isin(z)
df3=df2[g]
df3=pd.DataFrame(df3)
df3
g=df3['BID'].isin(yy)
df4=df3[g]
df4=pd.DataFrame(df4)
df4
column_names = ['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined']
dff = pd.DataFrame(columns = column_names)
dff.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
dff = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined'], header=0 )
df4
df4.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv')
df4 = pd.read_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\tempfile.csv',names=['Venue','A/C','DISPLAY/','Date','BID','Durn','Aircraft-combined'], header=0 )
for x in range(0,len(df4)):
y=x+1
try:
if ((((df4.iloc[x][6]))=='DSH') or (((df4.iloc[x][6]))=='SD') or (((df4.iloc[x][6]))=='DS') or (((df4.iloc[x][6]))=='HSD') or (((df4.iloc[x][6]))=='HDS') or (((df4.iloc[x][6]))=='SHSD') or (((df4.iloc[x][6]))=='DHS')):
dff3 ={'Venue':df4.iloc[x][0],'A/C':df4.iloc[x][1],'DISPLAY/':df4.iloc[x][2],'Date':df4.iloc[x][3],'BID':df4.iloc[x][4],'Durn':df4.iloc[x][5],'Aircraft-combined':df4.iloc[x][6]}
dff = dff.append(dff3,ignore_index=True)
elif ((df4.iloc[x][3]==df4.iloc[y][3]) and (df4.iloc[x][4]==df4.iloc[y][4])):
sidhu=df4.iloc[x][6]+df4.iloc[y][6]
dff1 = {'Venue':df4.iloc[x][0],'A/C':df4.iloc[x][1],'DISPLAY/':df4.iloc[x][2],'Date':df4.iloc[x][3],'BID':df4.iloc[x][4],'Durn':df4.iloc[x][5],'Aircraft-combined':sidhu}
dff2 = {'Venue':df4.iloc[y][0],'A/C':df4.iloc[y][1],'DISPLAY/':df4.iloc[y][2],'Date':df4.iloc[y][3],'BID':df4.iloc[y][4],'Durn':df4.iloc[y][5],'Aircraft-combined':sidhu}
dff = dff.append(dff1,ignore_index=True)
dff = dff.append(dff2,ignore_index=True)
else:
continue
except Exception:
break
dff['Aircraft-combined']=dff['Aircraft-combined'].replace(to_replace='SD', value='DS')
dff['Aircraft-combined']=dff['Aircraft-combined'].replace(to_replace=['HSD','HDS', 'SHSD'], value='DHS')
dff['Dakota And Fighters In The Same Display Slot']='Yes'
for x in range(0,len(dff)):
y=x+1
try:
if ((dff.iloc[x][3]==dff.iloc[y][3]) and (dff.iloc[x][4]==dff.iloc[y][4])):
if dff.iloc[x][5]==dff.iloc[y][5]:
dff['Dakota And Fighters In The Same Display Slot'][x]='Yes'
dff['Dakota And Fighters In The Same Display Slot'][y]='Yes'
else:
dff['Dakota And Fighters In The Same Display Slot'][x]='No'
dff['Dakota And Fighters In The Same Display Slot'][y]='No'
else:
continue
except Exception:
break
dff=dff.drop('A/C',axis=1)
dff=dff.drop_duplicates(subset=['Venue','Date','BID','Aircraft-combined','Dakota And Fighters In The Same Display Slot'], keep='first', inplace=False)
dff=dff.drop_duplicates(subset=None, keep='first', inplace=False)
dff['Durn']= pd.to_datetime(dff['Durn'],format='%M').apply(pd.Timestamp)
dff['Durn'] = pd.to_datetime(dff['Durn'], format='%H:%M:%S').dt.time
dff.at[14,'Aircraft-combined']='DHS'
dff.at[18,'Aircraft-combined']='DHS'
dff.to_csv(r'C:\Users\Edward\Desktop\BBMF Schedules And Master Forum Thread Texts\BBMF-2009-Code.csv')
dff
The Code seemed to run initially, but now I get the following TypeError :-
Error:
TypeError Traceback (most recent call last)
<ipython-input-58-43d53992dab5> in <module>
17 'PARA': 'P'
18 }
---> 19 df['A/C std'] = df['A/C'].replace(rename_map)
20 print(df['A/C std'].unique().tolist())
21 print("=======")
c:\python37\lib\site-packages\pandas\core\series.py in replace(self, to_replace, value, inplace, limit, regex, method)
3843 return super(Series, self).replace(to_replace=to_replace, value=value,
3844 inplace=inplace, limit=limit,
-> 3845 regex=regex, method=method)
3846
3847 @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
c:\python37\lib\site-packages\pandas\core\generic.py in replace(self, to_replace, value, inplace, limit, regex, method)
6496
6497 return self.replace(to_replace, value, inplace=inplace,
-> 6498 limit=limit, regex=regex)
6499 else:
6500
c:\python37\lib\site-packages\pandas\core\series.py in replace(self, to_replace, value, inplace, limit, regex, method)
3843 return super(Series, self).replace(to_replace=to_replace, value=value,
3844 inplace=inplace, limit=limit,
-> 3845 regex=regex, method=method)
3846
3847 @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
c:\python37\lib\site-packages\pandas\core\generic.py in replace(self, to_replace, value, inplace, limit, regex, method)
6545 dest_list=value,
6546 inplace=inplace,
-> 6547 regex=regex)
6548
6549 else: # [NA, ''] -> 0
c:\python37\lib\site-packages\pandas\core\internals\managers.py in replace_list(self, src_list, dest_list, inplace, regex)
557 return _compare_or_regex_search(values, s, regex)
558
--> 559 masks = [comp(s, regex) for i, s in enumerate(src_list)]
560
561 result_blocks = []
c:\python37\lib\site-packages\pandas\core\internals\managers.py in <listcomp>(.0)
557 return _compare_or_regex_search(values, s, regex)
558
--> 559 masks = [comp(s, regex) for i, s in enumerate(src_list)]
560
561 result_blocks = []
c:\python37\lib\site-packages\pandas\core\internals\managers.py in comp(s, regex)
555 return _compare_or_regex_search(maybe_convert_objects(values),
556 getattr(s, 'asm8'), regex)
--> 557 return _compare_or_regex_search(values, s, regex)
558
559 masks = [comp(s, regex) for i, s in enumerate(src_list)]
c:\python37\lib\site-packages\pandas\core\internals\managers.py in _compare_or_regex_search(a, b, regex)
1949 raise TypeError(
1950 "Cannot compare types {a!r} and {b!r}".format(a=type_names[0],
-> 1951 b=type_names[1]))
1952 return result
1953
TypeError: Cannot compare types 'ndarray(dtype=float64)' and 'str'
I can't work out, what the issue is, any ideas ?
Regards
Eddie Winch ))