Python script merging some columns to one column with new name

jbveenstra · (This post was last modified: Feb-13-2020, 09:57 AM by jbveenstra.)

Hello Gribouillis,
Thanks for your reply, this helped a lot already. Im getting the idea of the replacement plan, exactly what I meant.
I edited my code with yours, and it now reads the headers of my input file and puts them in the plan.
I can't get the merging part to work. De merged columns should be written to the outputfile
(Yes, I'm a python beginner :) maybe to difficult things to start with, but I learn the most of the practice)
On rule 70: shouldn't that be a for loop, to run through all the rows?
On rule 83: I think that the merging should be done here, but how.
Code after this is unchanged

import csv
import sys

#===NEW========================
from more_itertools import unique_everseen
 
rules = [
    ('_A-REFERENTIE_', ['_01-REFERENTIEAANDUIDING_', '_02-REFERENTIE1_', '_03-REFERENTIE2_']),
    ('_B-ORGANISATIE_', ['_04-ORGANISATIE_']),
    ('_C-AFDELING1_', ['_05-AFDELING1_']),
    ('_D-AFDELING2_', ['_06-AFDELING2_']),
    ('_E-COMPLEETNAAM_', ['_07-AANHEF_', '_08-TITELVOOR_', '_09-VOORLETTERS_', '_10-VOORNAAM_', '_11-TUSSENVOEGSEL_', '_12-ACHTERNAAM1_', '_13-ACHTERNAAM2_', '_15-COMPLEETNAAM_', '_14-TITELNA_']),
    ('_F-COMPLEETADRES_', ['_19-COMPLEETADRES1_', '_16-STRAAT_', '_17-HUISNR_', '_18-HUISNRTV_']),
    ('_G-COMPLEETADRES2_', ['_20-COMPLEETADRES2_']),
    ('_H-COMPLEETPOSTPLAATS_', ['_21-POSTCODE_', '_22-POSTPLAATS_', '_23-POSTCODE2_', '_25-COMPLEETPOSTPLAATS_', '_24-LANDREGIO_']),
    ('_I-LAND_', ['_26-LAND_']),
    ('_J-KIX_', ['_28-KIX_']),
    ('_K-AANTAL_', ['_29-AANTAL_']),
    ('_L-VOLGNR_', ['_30-VOLGNR_']),
    ('_M-BUNDEL_', ['_31-BUNDEL_']),
    ('_N-BUNDELNR_', ['_32-BUNDELNR_']),
    ('_O-PALLET_', ['_33-PALLET_']),
    ('_P-PALLNR_', ['_34-PALLNR_']),
    ('_Q-SANDDCODE_', ['_35-SANDDCODE_']),
]

inverse_rules = { old: new for new, olds in rules for old in olds}
drules = dict(rules)

def merging_plan(headers):
    headers = list(headers)
    news = list(unique_everseen(inverse_rules.get(h, h) for h in headers))
    s = set(headers)
    plan = []
    for new in news:
        plan.append((new, [old for old in drules.get(new, [new]) if old in s]))
    return plan
 
def merge(plan, row):
    return {k: ' '.join(row[x] for x in v) for k, v in plan}
 
#===NEW_END========================


#inFile = sys.argv[1]
inFile = 'AdressenTest.csv'
#outFile = sys.argv[2]
outFile = 'AdressenTestOut.csv'

### open csv file
csvfile = open(inFile, "r" )
reader = csv.DictReader(csvfile, delimiter=';')

#===NEW========================
def main():
    ### compute the merging plan for a given sequence of input headers
    headers = reader.fieldnames
    # headers = ['header2', 'header5', 'header3', 'spam', 'header10', 'header13']
    # headers = ['_04-ORGANISATIE_', '_15-COMPLEETNAAM_', '_19-COMPLEETADRES1_', '_17-HUISNR_', '_21-POSTCODE_', '_22-POSTPLAATS_', '_36-NVT_', '_30-VOLGNR_', '_35-SANDDCODE_', '_31-BUNDEL_', '_32-BUNDELNR_', '_33-PALLET_', '_34-PALLNR_']
    plan = merging_plan(headers)
    print(plan)
     
    ### compute the merged row corresponding to an input row
    '''r = {'header2': 'v2', 'header5': 'v5',
         'header3': 'v3', 'spam': 'vspam',
         'header10': 'v10', 'header13': 'v13',}
    print(merge(plan, r))'''
 
if __name__ == '__main__':
    main()
#===NEW_END========================

### open output file
outfile = open(outFile, "w" )
# get a csv writer --  concat(nameFirst, nameLast) as full_name 

fieldnames=reader.fieldnames
# fieldnames = ["SD_VOLGNR","voorletters","achternaam","NewHeaderA","SD_SNDCODE"]


#writer = csv.DictWriter(outfile, delimiter=';', fieldnames=reader.fieldnames)
writer = csv.DictWriter(outfile, delimiter=';', fieldnames=fieldnames, extrasaction='ignore')
headers = {} 
for n in writer.fieldnames:
    headers[n] = n
writer.writerow(headers)
for row in reader:
    writer.writerow(row)

csvfile.close()
outfile.close()

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Transform 3 Columns into Single Column	DaveG	9	3,605	Mar-19-2025, 03:46 AM Last Post: robbert23
	Converting column of values into muliple columns of counts	highland44	0	913	Feb-01-2024, 12:48 AM Last Post: highland44
	Is there a .bat DOS batch script to .py Python Script converter?	pstein	3	8,221	Jun-29-2023, 11:57 AM Last Post: gologica
	J2534 Python Can Bus merging	natezoom	0	1,605	May-01-2023, 10:37 PM Last Post: natezoom
	Reshaping a single column in to multiple column using Python	sahar	7	3,608	Jun-20-2022, 12:35 PM Last Post: deanhystad
	df column aggregate and group by multiple columns	SriRajesh	0	1,663	May-06-2022, 02:26 PM Last Post: SriRajesh
	Split single column to multiple columns	SriRajesh	1	1,930	Jan-07-2022, 06:43 PM Last Post: jefsummers
	How to remove a column or two columns in a correlation heatmap?	lulu43366	3	7,686	Sep-30-2021, 03:47 PM Last Post: lulu43366
	Merging spreadsheets with the same columns and extracting rows with matching entries	johnbernard	3	14,222	Aug-19-2021, 03:08 PM Last Post: johnbernard
	Index error - columns vs non-column	Vinny	3	6,539	Aug-09-2021, 04:46 PM Last Post: snippsat

Python script merging some columns to one column with new name

User Panel Messages

Announcements