I'm sure the experts here can do this much more elegantly, but this works.
After doing PHP all morning, Python is a pleasant relief!! (I can't handle PHP)
import csv
path2csv = '/home/pedro/myPython/csv/randomdups.csv'
infile = open(path2csv)
# read the data file in
answers = csv.reader(infile)
# csv.reader is annoying, it's gone if you have to repeat,
# so, at least while you are experimenting, read to data[] first
# data will be a list of lists
# you can use it more than one time
data = []
# read answers to data, a list of lists
for row in answers:
data.append(row)
# for info
for d in data:
print(d)
"""
['col1', 'col2', 'col3']
['eggs', '25', '28']
['bananas', '3', '46']
['diamonds', '54', '63']
['apples', '15', '12']
['pears', '55', '11']
['pumpkins', '2', '22']
['eggs', '9', '8']
['bananas', '99', '101']
['apples', '14', '33']
['pears', '61', '17']
['pumpkins', '87', '45']
['rust', '13', '87']
['eggs', '88', '46']
['bananas', '89', '47']
['apples', '90', '48']
['pears', '91', '49']
['pumpkins', '92', '50']
"""
# get rid of the column headers
del data[0]
# there can be no duplicates in a set
# declare an empty set
unique_items = set()
# get a set of all items
for d in data:
unique_items.add(d[0])
# just for info
for u in unique_items:
print(u)
# make a dictionary where all values are 0
item_num_dict = {}
for item in unique_items:
item_num_dict[item] = 0
# now count the number of occurences of each item
for item in unique_items:
for d in data:
if d[0] == item:
item_num_dict[item] +=1
savepath = '/home/pedro/myPython/csv/'
# get first example of a duplicate key in a dictionary
first_example_data = {}
# a function to get the first example of a duplicate key
def getFirstExample(key):
for d in data:
if key == d[0] and item_num_dict[key] > 1:
# get the data as a tuple
first_example_data[key] = (d[1], d[2])
# bale out after first example
return
# get the first example of duplicate items and save to a dictionary: first_example_data
for key in item_num_dict.keys():
getFirstExample(key)
for key in first_example_data.keys():
info_list = [key, first_example_data[key][0], first_example_data[key][1]]
savename = savepath + key + '_first_example.csv'
with open(savename, mode='w') as f:
f_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
fieldnames = ['item', 'data1', 'data2']
f_writer.writerow(fieldnames)
f_writer.writerow(info_list)
print('First duplicate values saved to', savename)
print('Makes a change from that complicated PHP shit ... ')
And if you're offering me diamonds and rust
I've already paid