Jul-21-2020, 04:11 PM
Hello,
Down where my code says Stop2 (close to the bottom) I'm trying to count how many times a string occurs inside a file. It's almost working correctly but my only problem is, is that it counts a similar string for instance the string is A118, and there's another string very similar to that inside the file called A118X. I don't want it to count A118X I just want it to count A118. I want it to count A118X on it's own. I have 1000's of strings to go through, each need to be counted individually.
Down where my code says Stop2 (close to the bottom) I'm trying to count how many times a string occurs inside a file. It's almost working correctly but my only problem is, is that it counts a similar string for instance the string is A118, and there's another string very similar to that inside the file called A118X. I don't want it to count A118X I just want it to count A118. I want it to count A118X on it's own. I have 1000's of strings to go through, each need to be counted individually.
import openpyxl as xl import os import pandas as pd import xlsxwriter import xlrd from xlsxwriter.utility import xl_rowcol_to_cell input_dir = 'C:\\work\\comparison\\NNM' Summary = 'C:\\work\\comparison\\Summary.xlsx' Dwell = 'C:\\work\\comparison\\Dwell.xlsx' newFile = 'C:\\work\\comparison\\Comparison.xlsx' files = [file for file in os.listdir(input_dir) if os.path.isfile(file) and file.endswith(".xlsx")] i=0 wb3=xlsxwriter.Workbook(newFile) ws3=wb3.add_worksheet('Comparison') format = wb3.add_format() format.set_align('center') format.set_align('vcenter') format.set_bold(True) format.set_font_size(14) format.set_bg_color('#C9C9C9') format.set_border(style=1) for row_num in range(1,30): ws3.write_formula(row_num - 1, 5, '=ABS(C%d - E%d)' % (row_num, row_num)) for row_num in range(1,30): ws3.write_formula(row_num - 1, 10, '=ABS(H%d - J%d)' % (row_num, row_num)) for row_num in range(1,30): ws3.write_formula(row_num - 1, 15, '=ABS(N%d - O%d)' % (row_num, row_num)) ws3.write_row("A1:Q1", ['TNAME','Start Segment','Start Chainage(ft)','Start Segment','Start Chainage(ft)','Start Chainage Difference(ft)','End Segment','End Chainage(ft)','End Segment','End Chainage(ft)','End Chainage Difference(ft)','Stops','Stops','Distance','Distance','Distance Difference (ft)','Comments'], format) ws3.set_column(1, 17, 35) wb3.close() wb3 = xl.load_workbook(newFile) ws3 = wb3.worksheets[0] wb2 = xl.load_workbook(Summary) ws2 = wb2.worksheets[1] for file in files: input_file = os.path.join(input_dir, file) wb1=xl.load_workbook(input_file) ws1=wb1.worksheets[0] Stop1 = pd.read_excel(file,usecols = "F",squeeze = True) counts1 = Stop1.value_counts() z1 = counts1[0]/2 Stop2 = pd.read_excel(Dwell,usecols = "A",squeeze = True) Detail=pd.read_excel(input_file, usecols="A",nrows=1,header=None,squeeze=True).str.slice(start=28,stop=-2).to_string(index=False).strip() counts2 = Stop2.str.match(Detail).sum() ws3[f'A{i+2}']=ws1['A1'].value[28:] ws3[f'D{i+2}']=ws1['B4'].value ws3[f'E{i+2}']=ws1['D4'].value ws3[f'I{i+2}']=ws1['B'][-1].value ws3[f'J{i+2}']=ws1['D'][-1].value ws3[f'O{i+2}']=ws1['E'][-1].value ws3[f'N{i+2}']=ws2[f'I{i+6}'].value ws3[f'M{i+2}']=z1 ws3[f'L{i+2}']=counts2 i += 1 wb3.save(newFile)