May-22-2024, 05:21 PM
In this python code I am attempting to extract files from a FTP folder and then search through them for an invoice#. I have the command window open when running and keep getting stuck on the part where the code is trying to strip the date out of the file name, I get the error "Invalid Date Format" and it states it is skipping that file. The folder contains a ton of files and I really only need to look at the last 30 days worth of files. We have another python code that is doing something similar but with a different file name layout and I used that code to try to replicate it in this instance. This is what the files look like on the ftp folder, attached. This is the beginning of the code that I am getting stuck on will see about the search part once I figure this out;
import os import datetime from ftplib import FTP import csv from tkinter import * from tkinter import messagebox import tkinter as tk # FTP and path configurations outputPath = r'\\xxxxxx\xxxxxxxxxx\SanMar Invoice' cacheDir = 'C:/temp/SanMarRoi/cache' cacheSize = 60 ftp_site = "xxxxxxxxx" ftp_username = "xxxxxxxx" ftp_password = "xxxxxxxx" # Ensure directories exist if not os.path.exists(cacheDir): os.makedirs(cacheDir) if not os.path.exists(outputPath): os.makedirs(outputPath) rows = [] invoice = set() ponumber = "" # Function to fetch files from FTP and sync cache def sync_cache(): ftp = FTP(ftp_site) ftp.login(user=ftp_username, passwd=ftp_password) ftp.cwd("Outbound") filenames = [] ftp.retrlines("LIST", lambda line: filenames.append(line.split()[-1])) print("Files retrieved from FTP server:") for file in filenames: print(file) # Get current date now = datetime.datetime.now() valid_filenames = [] for file in filenames: try: # Extract the date from the filename file_date_str = file.split('-')[-1].split('.')[0] # Get the last part of the filename and remove extension file_date = datetime.datetime.strptime(file_date_str, "%m-%d-%y") valid_filenames.append((file, file_date)) except (ValueError, IndexError): # If parsing fails, skip the file print(f"Skipping file {file}: Invalid date format") # Filter to include only files from the last 30 days recent_files = [file for file in valid_filenames if (now - file[1]).days <= 30] print("Recent files from the last 30 days:") for file in recent_files: print(file[0]) recent_files.sort(key=lambda filename: filename[1], reverse=True) print("Syncing cache. Please wait...") for i, (filename, _) in enumerate(recent_files): if i == cacheSize: break local_path = os.path.join(cacheDir, filename) if os.path.exists(local_path): print(f"File already in cache: {filename}") continue with open(local_path, "w") as cacheFile: ftp.retrbinary(f"RETR {filename}", lambda data: cacheFile.write(data.decode("utf-8"))) print(f"Downloaded and cached file: {filename}") print("Cache sync complete") return [file[0] for file in recent_files]