Python Forum

Full Version: Complex word search multiple files
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hello,

This is probably going to be the hardest thing I've had to explain haha. This is what I need to be able to do:



Start with word inside ws1 ['A1][28:] (example word 'A115' this word always changes as the script loops through each file.)

Open ws4 search row B (no header) for ws1['A1'][28:] (A115)

Once word (A115) is located in ws4 row B locate word that is 9 cells below it (example 'N90111.90' word always changes as script loops) then take that word and

open ws5 and find that word (N90111.90) under column A (no header) once that word is found go horizontally from that word (N90111.90) to column L and copy that value (20.22) and convert it from Miles to Ft. (106761.6) and then paste that value inside
ws3 column O row 2 (106761.6).




import openpyxl as xl 
import os
import pandas as pd
import xlsxwriter
import xlrd


input_dir = 'C:\\work\\comparison\\NNM'
Summary = 'C:\\work\\comparison\\Summary.xlsx'
TLL = 'C:\\work\\comparison\\TLL.xlsx'
NDE = 'C:\\work\\comparison\\NDE.xlsx'
newFile = 'Comparison.xlsx'




files = [file for file in os.listdir(input_dir)
         if os.path.isfile(file) and file.endswith(".xlsx")]

i=0
ii=0

wb3=xlsxwriter.Workbook(newFile)
ws3=wb3.add_worksheet('Comparison')


format = wb3.add_format()
format.set_align('center')
format.set_align('vcenter')
format.set_bold(True)
format.set_font_size(14)
format.set_bg_color('#C9C9C9')
format.set_border(style=1)


ws3.write('F2:', '=C2-E2')

ws3.write_row("A1:Q1", ['T','Segment','Chainage(ft)','Segment','Chainage(ft)','Start Chainage Difference(ft)','Segment','Chainage(ft)','Segment','Chainage(ft)','End Chainage Difference(ft)','Stops','Stops','Distance','Distance','Distance Difference (ft)','Comments'], format)
ws3.set_column(1, 17, 35)
wb3.close()

wb3 = xl.load_workbook(newFile) 
ws3 = wb3.worksheets[0]

wb2 = xl.load_workbook(Summary) 
ws2 = wb2.worksheets[1]

wb4 = xl.load_workbook(TLL) 
ws4 = wb4.worksheets[0]

wb5 = xl.load_workbook(NDE) 
ws5 = wb5.worksheets[0]


for file in files: 
   input_file =  os.path.join(input_dir, file)
   wb1=xl.load_workbook(input_file)
   ws1=wb1.worksheets[0]
   

   series = pd.read_excel(file,usecols = "F",squeeze = True)
   counts = series.value_counts()
   z = counts[0]/2


   ws3[f'A{i+2}']=ws1['A1'].value[28:]
   ws3[f'D{i+2}']=ws1['B4'].value
   ws3[f'E{i+2}']=ws1['D4'].value
   ws3[f'I{i+2}']=ws1['B'][-1].value
   ws3[f'J{i+2}']=ws1['D'][-1].value
   ws3[f'O{i+2}']=ws1['E'][-1].value 
   ws3[f'N{i+2}']=ws2[f'I{ii+6}'].value   
   ws3[f'M{i+2}']=z
   
   i += 1
   ii +=1          
   


   wb3.save(newFile)