Python Forum
Get the current file name in Python
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Get the current file name in Python
#1
The problem is that when I scraping data from multiple .xlsm file,in ("DataSource": fn) it is taking the whole path of the file location "C:\Users\ShantanuGupta\Desktop\Test Python\202009 - September - Diamond Plod Day & Night MKY025.xlsm". But I wanted only the last portion "202009 - September - Diamond Plod Day & Night MKY025.xlsm" not the whole file location. Also attach picture.I have mentioned in the code with comment where the problem.

Any help????

[Image: view?usp=sharing]


from pathlib import Path
import time
import parser
import argparse
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

parser = argparse.ArgumentParser(description="Process some integers.")

parser.add_argument("path", help="define the directory to folder/file")
parser.add_argument("--verbose", help="display processing information")

start = time.time()


def main(path_xlsm, verbose):
    if (".xlsm" in str(path_xlsm).lower()) and path_xlsm.is_file():
        xlsm_files = [Path(path_xlsm)]
    else:
        xlsm_files = list(Path(path_xlsm).glob("*.xlsm"))

    df = pd.DataFrame()
    for fn in xlsm_files:
        all_dfs = pd.read_excel(fn, sheet_name=None, header=None, engine="openpyxl")
        list_data = all_dfs.keys()
        all_dfs.pop("Date", None)
        all_dfs.pop("Ops Report", None)
        all_dfs.pop("Fuel Report", None)
        all_dfs.pop("Bit Report", None)
        all_dfs.pop("Plod Example", None)
        all_dfs.pop("Plod Definitions", None)
        all_dfs.pop("Consumables", None)
        all_dfs.pop("Tables", None)
        for ws in list_data:  # Looping for excel sheet
            df1 = all_dfs[ws]
            df2 = pd.DataFrame()
            if df1.iloc[41, 1] == "Drillers Comments":
                row = 42
            elif df1.iloc[44, 1] == "Drillers Comments":
                row = 45
            new_row = {
                "PlodDate": df1.iloc[4, 3],
                "PlodShift": df1.iloc[5, 3],
                "RigNo": df1.iloc[2, 9],
                "Location": df1.iloc[3, 3],
                "DrillersComments": df1.iloc[row, 1],
                "GeologistComments": df1.iloc[row, 14],
                "MaintenanceComments": df1.iloc[row, 26],
                "TravelInName": df1.iloc[2, 36],
                "TravelInHours": df1.iloc[2, 45],
                "TravelOutName": df1.iloc[3, 36],
                "TravelOutHours": df1.iloc[3, 45],
                "DataSource": fn, #Problem Problem Problem
            }
            df2 = df2.append(new_row, ignore_index=True)
            df2 = df2[
                [
                    "PlodDate",
                    "PlodShift",
                    "RigNo",
                    "Location",
                    "DrillersComments",
                    "GeologistComments",
                    "MaintenanceComments",
                    "TravelInName",
                    "TravelInHours",
                    "TravelOutName",
                    "TravelOutHours",
                    "DataSource",
                ]
            ]
            cols = [
                "Location",
                "DrillersComments",
                "GeologistComments",
                "MaintenanceComments",
                "TravelInName",
                "TravelInHours",
                "TravelOutName",
                "TravelOutHours",
            ]
            df2[cols].replace("", np.nan, inplace=True)
            df2 = df2.dropna(subset=cols, how="all")
            df2 = df2.replace(",", ";", regex=True)
            df2 = df2.replace("\n", " ", regex=True)
            df2 = df2.replace("\r", " ", regex=True)
            df2 = df2.replace("\t", " ", regex=True)
            df = df.append(df2)
    df.to_csv("McKayPlod-1.csv", index=False)


if __name__ == "__main__":
    start = time.time()
    args = parser.parse_args()
    path = Path(args.path)
    verbose = args.verbose
    main(path, verbose)  #Calling Main Function
    print("Processed time:", time.time() - start)  #Total Time
Image
Reply
#2
use fn.name
also, get your directory contents like line 11 below.
from pathlib import Path
import os

# For this example, Set cwd to script path
os.chdir(os.path.abspath(os.path.dirname(__file__)))

# get list of all files ending with .py
path = '.' # [b]Set path to your path[/b]
homepath = Path(path)

xlsm_files = [filename for filename in homepath.iterdir() if filename.is_file and filename.suffix == '.xlsm']

for fn in xlsm_files:
    print(f"\nRelative path: {fn}")
    print(f"filename: {fn.name}")
    print(f"fullpath: {fn.resolve()}")
    print(f"fullfile parts: {fn.resolve().parts}")
    print(f"File prefix (stem): {fn.stem}")
    print(f"File suffix: {fn.suffix}")
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Compare current date on calendar with date format file name Fioravanti 1 207 Mar-26-2024, 08:23 AM
Last Post: Pedroski55
  How to find out from outside Python (in Windows) the current version of Python? pstein 4 728 Oct-04-2023, 10:01 AM
Last Post: snippsat
  How to get all the data for the current month in ms Access using python? aeo03 1 2,304 Nov-07-2018, 08:21 PM
Last Post: micseydel
  Problem with file not saving current output jameseroni 2 2,388 Oct-28-2018, 02:02 PM
Last Post: jameseroni
  Do I need to uninstall my current Python distribution (3.7) to install Anaconda? Tim 2 23,168 Jul-10-2018, 03:03 PM
Last Post: Tim
  Python code to check SQL table for current date PYTHONDUDE 3 2,939 May-16-2018, 02:27 PM
Last Post: buran
  getting current filename of a text editor file for printing file name? hsunteik 3 5,035 Dec-24-2016, 07:58 PM
Last Post: Blue Dog

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020