Hi, i tried this way even but received the error
import pandas as pd file1 = 'Book1.csv' file2 = 'Book2.csv' file3 = 'update.csv' cols_to_show = ['XID', 'TCO', 'Payment Plan','Livable Area','Brochure', 'Banks'] old = pd.read_csv(file1) new = pd.read_csv(file2) def report_diff(x): return x[0] if x[1] == x[0] else '{0} --> {1}'.format(*x) old['version'] = 'old' new['version'] = 'new' full_set = pd.concat([old, new], ignore_index=True) changes = full_set.drop_duplicates(subset=cols_to_show, keep='last') dupe_names = changes.set_index('XID').index.get_duplicates() dupes = changes[changes['XID'].isin(dupe_names)] change_new = dupes[(dupes['version'] == 'new')] change_old = dupes[(dupes['version'] == 'old')] change_new = change_new.drop(['version'], axis=1) change_old = change_old.drop(['version'], axis=1) change_new.set_index('XID', inplace=True) change_old.set_index('XID', inplace=True) diff_panel = pd.Panel(dict(df1=change_old, df2=change_new)) diff_output = diff_panel.apply(report_diff, axis=0) changes['duplicate'] = changes['XID'].isin(dupe_names) removed_names = changes[(changes['duplicate'] == False) & (changes['version'] == 'old')] removed_names.set_index('XID', inplace=True) new_name_set = full_set.drop_duplicates(subset=cols_to_show) new_name_set['duplicate'] = new_name_set['XID'].isin(dupe_names) added_names = new_name_set[(new_name_set['duplicate'] == False) & (new_name_set['version'] == 'new')] added_names.set_index('XID', inplace=True) print(added_names) df = pd.concat([diff_output, removed_names, added_names], keys=('changed', 'removed', 'added')) print(df) df[cols_to_show].to_csv(file3)and the error is ..
Error:KeyError: "['XID'] not in index"
can anyone help me on this error?