Im using two .py files to accomplish the task of converting pdfs to tables then saving them into an excel sheet.
Everything works as it should when running on pycharm, but when I use pyinstaller to create the .exe, I get get error after error, some I could fix by installing the missing module in the CMD terminal (1st error example) I basically had to do it for every module, so I assume this is some rooky mistake.
Runs fine in Pycharm but as soon as I run Pyinstaller on it and run the EXE it breaks.
Advice please!
I hope its the last problem anyway.......
Everything works as it should when running on pycharm, but when I use pyinstaller to create the .exe, I get get error after error, some I could fix by installing the missing module in the CMD terminal (1st error example) I basically had to do it for every module, so I assume this is some rooky mistake.
Error:C:\Users\Justi\PycharmProjects\Camelot>C:\Users\Justi\PycharmProjects\Camelot\dist\TheWorks\TheWorks.exe
Folder Path: C:/Project/Part Lists
Output File: C:/Project/Part Lists/buffbilly.xlsx
Starting Extraction and Clean Up
Traceback (most recent call last):
File "TheWorks.py", line 1, in <module>
from TidyUp import extract_and_format_tables
File "PyInstaller\loader\pyimod02_importers.py", line 499, in exec_module
File "TidyUp.py", line 108, in <module>
extract_and_format_tables(folder_path, output_file)
File "TidyUp.py", line 49, in extract_and_format_tables
excel_writer = pd.ExcelWriter(output_file, engine='xlsxwriter')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pandas\io\excel\_xlsxwriter.py", line 198, in __init__
ModuleNotFoundError: No module named 'xlsxwriter'
[24524] Failed to execute script 'TheWorks' due to unhandled exception!
this one I can't seem to fix; the module is installed everywhere and the suggestion is I use a different module - I don't really want to blow up my code and change to a different module though....Error:C:\Users\Justi\PycharmProjects\Camelot>C:\Users\Justi\PycharmProjects\Camelot\dist\TheWorks\TheWorks.exe
Folder Path: C:/Project/Part Lists
Output File: C:/Project/Part Lists/combined_tables.xlsx
Starting Extraction and Clean Up
0%| | 0/9 [00:00<?, ?it/s]
Traceback (most recent call last):
File "TheWorks.py", line 1, in <module>
from TidyUp import extract_and_format_tables
File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
File "PyInstaller\loader\pyimod02_importers.py", line 499, in exec_module
File "TidyUp.py", line 108, in <module>
extract_and_format_tables(folder_path, output_file)
File "TidyUp.py", line 56, in extract_and_format_tables
tables = camelot.read_pdf(os.path.join(folder_path, pdf_file), pages='all', flavor='stream')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "camelot\io.py", line 111, in read_pdf
File "camelot\handlers.py", line 50, in __init__
File "camelot\handlers.py", line 75, in _get_pages
File "PyPDF2\_reader.py", line 1974, in __init__
File "PyPDF2\_utils.py", line 369, in deprecation_with_replacement
File "PyPDF2\_utils.py", line 351, in deprecation
PyPDF2.errors.DeprecationError: PdfFileReader is deprecated and was removed in PyPDF2 3.0.0. Use PdfReader instead.
[15280] Failed to execute script 'TheWorks' due to unhandled exception!
The code for TidyUp.pyimport camelot import pandas as pd import os from tqdm import tqdm from PyPDF2 import PdfReader import warnings import PySimpleGUI as sg # Create a layout with a folder selection button, a text input field, and a "Save As" button layout = [ [sg.Text("Folder Path"), sg.InputText(), sg.FolderBrowse()], [sg.Text("Output File"), sg.InputText(), sg.SaveAs(file_types=(("Excel Files", "*.xlsx"),))], [sg.Submit()] ] # Create the window window = sg.Window("Input Folder and Output File", layout) # Loop to get the user input while True: event, values = window.read() if event == sg.WIN_CLOSED or event == "Submit": break # Get the user input folder_path = values[0] output_file = values[1] # Do something with the input print(f"Folder Path: {folder_path}") print(f"Output File: {output_file}") # Close the window window.close() # suppress the FutureWarning warnings.simplefilter("ignore", category=FutureWarning) # suppress the UserWarning warnings.simplefilter("ignore", category=UserWarning) def extract_and_format_tables(folder_path, output_file): print("Starting Extraction and Clean Up") # Create an ExcelWriter object to write the data to excel_writer = pd.ExcelWriter(output_file, engine='xlsxwriter') # Iterate over the PDF files in the folder for pdf_file in tqdm(os.listdir(folder_path)): # Check if the file is a PDF if pdf_file.endswith('.pdf'): # Use Camelot to extract the tables from the PDF tables = camelot.read_pdf(os.path.join(folder_path, pdf_file), pages='all', flavor='stream') # Convert the tables to a list of pandas dataframes dfs = [table.df for table in tables] # Combine all of the dataframes into a single dataframe df_all = pd.concat(dfs) # Convert the values in column 4 to numeric values, and fill any NaN values with an empty string df_all[4] = pd.to_numeric(df_all[4], errors='coerce') # Define a function to replace cells containing "JOB" with an empty string def replace_qty_job(cell): if "Job" in str(cell) or "Description" in str(cell) or "Consolidation" in str(cell) or "Width" in str(cell): return None else: return cell # Apply the function to the dataframe using applymap df_all = df_all.applymap(lambda x: replace_qty_job(x)) def replace_empty_strings(cell): if cell == '': return None else: return cell # Apply the function to the dataframe using applymap df_all = df_all.applymap(lambda x: replace_empty_strings(x)) # Drop rows that have all null values in columns 0, 1, 2, 3, 4 df_all.dropna(how='all', subset=[0, 1, 2, 3, 4], inplace=True) # Set the width of the first four columns to 25 characters df_all.style.set_properties(**{'text-align': 'left'}).set_table_styles( [{'selector': 'th', 'props': [('text-align', 'left'), ('max-width', '50ch')]}, {'selector': 'td', 'props': [('text-align', 'left'), ('max-width', '50ch')]}]) df_all.to_excel(excel_writer, sheet_name=pdf_file, index=False, header=True) # Save the Excel file and close the ExcelWriter object excel_writer.save() excel_writer.close() print('Finished Extraction and Clean UP, Moving on to AutoFormatting') # Set the file path to the folder containing the PDF files #folder_path = 'C:/Project/Part Lists' # Set the file path for the output Excel file #output_file = 'C:/Project/combined_tables.xlsx' # Extract and format the tables from the PDF files in the specified folder, and write the results to the output Excel file extract_and_format_tables(folder_path, output_file)the code for TheWorks.py wich runs it all
from TidyUp import extract_and_format_tables import openpyxl import warnings from TidyUp import output_file import openpyxl ################################################################ # suppress the FutureWarning warnings.simplefilter("ignore", category=FutureWarning) # suppress the UserWarning warnings.simplefilter("ignore", category=UserWarning) ################################################################# #name the folder to find the PDF files in, #name the Excel sheet to output the converted PDFs to #folder_path = "c:/Project/Part Lists" #output_file = 'C:/Project/combined_tables.xlsx' # Open the workbook wb = openpyxl.load_workbook(output_file) # Iterate over the sheets in the workbook for sheet in wb: # Iterate over the rows in the sheet for row in sheet.iter_rows(): # Get the value in the first cell of the row cell_value = row[0].value # Check if the cell value contains "PSI" or "Total Parts Per" if "PSI" in str(cell_value) or "Total Parts Per" in str(cell_value): # If the cell value contains "PSI" or "Total Parts Per", apply formatting to the row or cell if "PSI" in str(cell_value): # If the cell value contains "PSI", apply formatting to the entire row for cell in row: cell.fill = openpyxl.styles.PatternFill(start_color='FFFF00', end_color='FFFF00', fill_type='solid') else: # If the cell value contains "Total Parts Per", apply formatting to the cell row[0].font = openpyxl.styles.Font(bold=True, size=16) # Adjust the width of column 0 to fit the data max_length = 0 for cell in sheet['A']: if cell.value: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) adjusted_width = (max_length + 2) * 1 sheet.column_dimensions['A'].width = adjusted_width # Save the workbook wb.save(output_file) print('All done, have a nice rest of your day! :)')I'm almost there, I just cant figure this last problem out.
Runs fine in Pycharm but as soon as I run Pyinstaller on it and run the EXE it breaks.
Advice please!
I hope its the last problem anyway.......
