Get the current file name in Python

shantanu97 · Mar-22-2021, 10:10 AM

The problem is that when I scraping data from multiple .xlsm file,in ("DataSource": fn) it is taking the whole path of the file location "C:\Users\ShantanuGupta\Desktop\Test Python\202009 - September - Diamond Plod Day & Night MKY025.xlsm". But I wanted only the last portion "202009 - September - Diamond Plod Day & Night MKY025.xlsm" not the whole file location. Also attach picture.I have mentioned in the code with comment where the problem.

Any help????

[Image: view?usp=sharing]

        
          
          
              
              from pathlib import Path
import time
import parser
import argparse
import pandas as pd
import numpy as np
import warnings
 
warnings.filterwarnings("ignore")
 
parser = argparse.ArgumentParser(description="Process some integers.")
 
parser.add_argument("path", help="define the directory to folder/file")
parser.add_argument("--verbose", help="display processing information")
 
start = time.time()
 
 
def main(path_xlsm, verbose):
    if (".xlsm" in str(path_xlsm).lower()) and path_xlsm.is_file():
        xlsm_files = [Path(path_xlsm)]
    else:
        xlsm_files = list(Path(path_xlsm).glob("*.xlsm"))
 
    df = pd.DataFrame()
    for fn in xlsm_files:
        all_dfs = pd.read_excel(fn, sheet_name=None, header=None, engine="openpyxl")
        list_data = all_dfs.keys()
        all_dfs.pop("Date", None)
        all_dfs.pop("Ops Report", None)
        all_dfs.pop("Fuel Report", None)
        all_dfs.pop("Bit Report", None)
        all_dfs.pop("Plod Example", None)
        all_dfs.pop("Plod Definitions", None)
        all_dfs.pop("Consumables", None)
        all_dfs.pop("Tables", None)
        for ws in list_data:  # Looping for excel sheet
            df1 = all_dfs[ws]
            df2 = pd.DataFrame()
            if df1.iloc[41, 1] == "Drillers Comments":
                row = 42
            elif df1.iloc[44, 1] == "Drillers Comments":
                row = 45
            new_row = {
                "PlodDate": df1.iloc[4, 3],
                "PlodShift": df1.iloc[5, 3],
                "RigNo": df1.iloc[2, 9],
                "Location": df1.iloc[3, 3],
                "DrillersComments": df1.iloc[row, 1],
                "GeologistComments": df1.iloc[row, 14],
                "MaintenanceComments": df1.iloc[row, 26],
                "TravelInName": df1.iloc[2, 36],
                "TravelInHours": df1.iloc[2, 45],
                "TravelOutName": df1.iloc[3, 36],
                "TravelOutHours": df1.iloc[3, 45],
                "DataSource": fn, #Problem Problem Problem
            }
            df2 = df2.append(new_row, ignore_index=True)
            df2 = df2[
                [
                    "PlodDate",
                    "PlodShift",
                    "RigNo",
                    "Location",
                    "DrillersComments",
                    "GeologistComments",
                    "MaintenanceComments",
                    "TravelInName",
                    "TravelInHours",
                    "TravelOutName",
                    "TravelOutHours",
                    "DataSource",
                ]
            ]
            cols = [
                "Location",
                "DrillersComments",
                "GeologistComments",
                "MaintenanceComments",
                "TravelInName",
                "TravelInHours",
                "TravelOutName",
                "TravelOutHours",
            ]
            df2[cols].replace("", np.nan, inplace=True)
            df2 = df2.dropna(subset=cols, how="all")
            df2 = df2.replace(",", ";", regex=True)
            df2 = df2.replace("\n", " ", regex=True)
            df2 = df2.replace("\r", " ", regex=True)
            df2 = df2.replace("\t", " ", regex=True)
            df = df.append(df2)
    df.to_csv("McKayPlod-1.csv", index=False)
 
 
if __name__ == "__main__":
    start = time.time()
    args = parser.parse_args()
    path = Path(args.path)
    verbose = args.verbose
    main(path, verbose)  #Calling Main Function
    print("Processed time:", time.time() - start)  #Total Time

            

        
      

Image

**Larz60+** · Mar-22-2021, 04:32 PM

use fn.name
also, get your directory contents like line 11 below.

        
              from pathlib import Path
import os
 
# For this example, Set cwd to script path
os.chdir(os.path.abspath(os.path.dirname(__file__)))
 
# get list of all files ending with .py
path = '.' # [b]Set path to your path[/b]
homepath = Path(path)
 
xlsm_files = [filename for filename in homepath.iterdir() if filename.is_file and filename.suffix == '.xlsm']
 
for fn in xlsm_files:
    print(f"\nRelative path: {fn}")
    print(f"filename: {fn.name}")
    print(f"fullpath: {fn.resolve()}")
    print(f"fullfile parts: {fn.resolve().parts}")
    print(f"File prefix (stem): {fn.stem}")
    print(f"File suffix: {fn.suffix}")

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	How to find out from outside Python (in Windows) the current version of Python?	pstein	5	2,394	Jun-28-2024, 07:02 AM Last Post: Samuel34
	Compare current date on calendar with date format file name	Fioravanti	1	2,131	Mar-26-2024, 08:23 AM Last Post: Pedroski55
	How to get all the data for the current month in ms Access using python?	aeo03	1	2,854	Nov-07-2018, 08:21 PM Last Post: micseydel
	Problem with file not saving current output	jameseroni	2	3,269	Oct-28-2018, 02:02 PM Last Post: jameseroni
	Do I need to uninstall my current Python distribution (3.7) to install Anaconda?	Tim	2	24,391	Jul-10-2018, 03:03 PM Last Post: Tim
	Python code to check SQL table for current date	PYTHONDUDE	3	3,896	May-16-2018, 02:27 PM Last Post: buran
	getting current filename of a text editor file for printing file name?	hsunteik	3	6,131	Dec-24-2016, 07:58 PM Last Post: Blue Dog

Get the current file name in Python

User Panel Messages

Announcements