Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Can I minimize the code???
#1
Can i minimize the line of the code? or can I automate some of the steps in my python code???
from pathlib import Path
import time
import parser
import argparse
import pandas as pd
import os
import warnings

warnings.filterwarnings("ignore")

parser = argparse.ArgumentParser(description="Process some integers.")

parser.add_argument("path", help="define the directory to folder/file")
parser.add_argument("--verbose", help="display processing information")


start = time.time()


def main(path_csv, verbose):
    if (".csv" in str(path_csv).lower()) and path_csv.is_file():
        csv_files = [Path(path_csv)]
    else:
        csv_files = list(Path(path_csv).glob("*.csv"))

    df = pd.DataFrame()
    for fn in csv_files:
        all_dfs = pd.read_csv(fn, header=None, index_col=None, encoding="ISO-8859-1")

        df1 = pd.DataFrame()
        df2 = all_dfs.iloc[32 : len(all_dfs), [1, 2]]
        df3 = all_dfs.iloc[32 : len(all_dfs), [1, 3]]
        df4 = all_dfs.iloc[32 : len(all_dfs), [1, 4]]
        df5 = all_dfs.iloc[32 : len(all_dfs), [1, 6]]
        df6 = all_dfs.iloc[32 : len(all_dfs), [1, 7]]

        df2.columns = ["Sampno", "Result"]
        df3.columns = ["Sampno", "Result"]
        df4.columns = ["Sampno", "Result"]
        df5.columns = ["Sampno", "Result"]
        df6.columns = ["Sampno", "Result"]

        df2 = df2.assign(Method=all_dfs.iloc[28, 2])
        df2 = df2.assign(UOM=all_dfs.iloc[31, 2])
        df2 = df2.assign(L_dl=all_dfs.iloc[30, 2])
        df2 = df2.assign(Analyte=all_dfs.iloc[29, 2])
        df2 = df2[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df3 = df3.assign(Method=all_dfs.iloc[28, 3])
        df3 = df3.assign(UOM=all_dfs.iloc[31, 3])
        df3 = df3.assign(L_dl=all_dfs.iloc[30, 3])
        df3 = df3.assign(Analyte=all_dfs.iloc[29, 3])
        df3 = df3[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df4 = df4.assign(Method=all_dfs.iloc[28, 4])
        df4 = df4.assign(UOM=all_dfs.iloc[31, 4])
        df4 = df4.assign(L_dl=all_dfs.iloc[30, 4])
        df4 = df4.assign(Analyte=all_dfs.iloc[29, 4])
        df4 = df4[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df5 = df5.assign(Method=all_dfs.iloc[28, 6])
        df5 = df5.assign(UOM=all_dfs.iloc[31, 6])
        df5 = df5.assign(L_dl=all_dfs.iloc[30, 6])
        df5 = df5.assign(Analyte=all_dfs.iloc[29, 6])
        df5 = df5[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df6 = df6.assign(Method=all_dfs.iloc[28, 7])
        df6 = df6.assign(UOM=all_dfs.iloc[31, 7])
        df6 = df6.assign(L_dl=all_dfs.iloc[30, 7])
        df6 = df6.assign(Analyte=all_dfs.iloc[29, 7])
        df6 = df6[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df1 = pd.concat([df2, df3, df4, df5, df6], axis=0)
        df1 = df1.assign(Laboratory="Gekko")
        df1 = df1.assign(DataSource=fn)
        df1 = df1[
            [
                "Method",
                "UOM",
                "L_dl",
                "Analyte",
                "Laboratory",
                "Sampno",
                "Result",
                "DataSource",
            ]
        ]
        df1 = df1.reset_index()
        del df1["index"]
        df1 = df1.dropna()
        df1.drop(df1.loc[df1["Result"] == "-"].index, inplace=True)
        df = df.append(df1, ignore_index=True)
        path = r"Output"
    df.to_csv(os.path.join(path, r"GekkoResults.csv"), index=False)


if __name__ == "__main__":
    start = time.time()
    args = parser.parse_args()
    path = Path(args.path)
    verbose = args.verbose
    main(path, verbose)
    print("Processed time", time.time() - start)
Reply
#2
At first blush, yes - instead of having df2 df3 df4 etc you should use a list of dataframes. Then you can reference as dfl[counter] in a loop and simplify many of your code blocks.
Reply
#3
@jefsummers Can you just give me brief example of how to do with my code?
Reply
#4
(Mar-22-2021, 12:05 PM)jefsummers Wrote: At first blush, yes - instead of having df2 df3 df4 etc you should use a list of dataframes. Then you can reference as dfl[counter] in a loop and simplify many of your code blocks.

Can you just give me brief example of how to do with my code?
Reply
#5
Short segment
        df_array = []
    for last in range(2,8) :
        df_array.append(all_dfs.iloc[32 : len(all_dfs), [1, last]])
        df_array[last-2].columns = ["Sampno", "Result"]
And then do similar for the blocks that set up df2 (which is now df_array[0]) etc.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  How to edit Tkinter Minimize, Maximize, and Close Buttons kucingkembar 7 182 Yesterday, 11:36 AM
Last Post: kucingkembar
  Minimize function with SciPy PierreLCV 3 336 Apr-05-2024, 07:51 AM
Last Post: paul18fr
  How to Minimize ADB window OomKoos 0 395 Dec-29-2023, 12:41 PM
Last Post: OomKoos

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020