Mar-22-2021, 10:10 AM
The problem is that when I scraping data from multiple .xlsm file,in ("DataSource": fn) it is taking the whole path of the file location "C:\Users\ShantanuGupta\Desktop\Test Python\202009 - September - Diamond Plod Day & Night MKY025.xlsm". But I wanted only the last portion "202009 - September - Diamond Plod Day & Night MKY025.xlsm" not the whole file location. Also attach picture.I have mentioned in the code with comment where the problem.
Any help????
![[Image: view?usp=sharing]](https://drive.google.com/file/d/1eRpqlHeJagq-_2ss6xNsDQ3lAxCZNY62/view?usp=sharing)
Any help????
from pathlib import Path import time import parser import argparse import pandas as pd import numpy as np import warnings warnings.filterwarnings("ignore") parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("path", help="define the directory to folder/file") parser.add_argument("--verbose", help="display processing information") start = time.time() def main(path_xlsm, verbose): if (".xlsm" in str(path_xlsm).lower()) and path_xlsm.is_file(): xlsm_files = [Path(path_xlsm)] else: xlsm_files = list(Path(path_xlsm).glob("*.xlsm")) df = pd.DataFrame() for fn in xlsm_files: all_dfs = pd.read_excel(fn, sheet_name=None, header=None, engine="openpyxl") list_data = all_dfs.keys() all_dfs.pop("Date", None) all_dfs.pop("Ops Report", None) all_dfs.pop("Fuel Report", None) all_dfs.pop("Bit Report", None) all_dfs.pop("Plod Example", None) all_dfs.pop("Plod Definitions", None) all_dfs.pop("Consumables", None) all_dfs.pop("Tables", None) for ws in list_data: # Looping for excel sheet df1 = all_dfs[ws] df2 = pd.DataFrame() if df1.iloc[41, 1] == "Drillers Comments": row = 42 elif df1.iloc[44, 1] == "Drillers Comments": row = 45 new_row = { "PlodDate": df1.iloc[4, 3], "PlodShift": df1.iloc[5, 3], "RigNo": df1.iloc[2, 9], "Location": df1.iloc[3, 3], "DrillersComments": df1.iloc[row, 1], "GeologistComments": df1.iloc[row, 14], "MaintenanceComments": df1.iloc[row, 26], "TravelInName": df1.iloc[2, 36], "TravelInHours": df1.iloc[2, 45], "TravelOutName": df1.iloc[3, 36], "TravelOutHours": df1.iloc[3, 45], "DataSource": fn, #Problem Problem Problem } df2 = df2.append(new_row, ignore_index=True) df2 = df2[ [ "PlodDate", "PlodShift", "RigNo", "Location", "DrillersComments", "GeologistComments", "MaintenanceComments", "TravelInName", "TravelInHours", "TravelOutName", "TravelOutHours", "DataSource", ] ] cols = [ "Location", "DrillersComments", "GeologistComments", "MaintenanceComments", "TravelInName", "TravelInHours", "TravelOutName", "TravelOutHours", ] df2[cols].replace("", np.nan, inplace=True) df2 = df2.dropna(subset=cols, how="all") df2 = df2.replace(",", ";", regex=True) df2 = df2.replace("\n", " ", regex=True) df2 = df2.replace("\r", " ", regex=True) df2 = df2.replace("\t", " ", regex=True) df = df.append(df2) df.to_csv("McKayPlod-1.csv", index=False) if __name__ == "__main__": start = time.time() args = parser.parse_args() path = Path(args.path) verbose = args.verbose main(path, verbose) #Calling Main Function print("Processed time:", time.time() - start) #Total TimeImage