Can I minimize the code???

shantanu97 · Mar-22-2021, 11:48 AM

Can i minimize the line of the code? or can I automate some of the steps in my python code???

from pathlib import Path
import time
import parser
import argparse
import pandas as pd
import os
import warnings

warnings.filterwarnings("ignore")

parser = argparse.ArgumentParser(description="Process some integers.")

parser.add_argument("path", help="define the directory to folder/file")
parser.add_argument("--verbose", help="display processing information")


start = time.time()


def main(path_csv, verbose):
    if (".csv" in str(path_csv).lower()) and path_csv.is_file():
        csv_files = [Path(path_csv)]
    else:
        csv_files = list(Path(path_csv).glob("*.csv"))

    df = pd.DataFrame()
    for fn in csv_files:
        all_dfs = pd.read_csv(fn, header=None, index_col=None, encoding="ISO-8859-1")

        df1 = pd.DataFrame()
        df2 = all_dfs.iloc[32 : len(all_dfs), [1, 2]]
        df3 = all_dfs.iloc[32 : len(all_dfs), [1, 3]]
        df4 = all_dfs.iloc[32 : len(all_dfs), [1, 4]]
        df5 = all_dfs.iloc[32 : len(all_dfs), [1, 6]]
        df6 = all_dfs.iloc[32 : len(all_dfs), [1, 7]]

        df2.columns = ["Sampno", "Result"]
        df3.columns = ["Sampno", "Result"]
        df4.columns = ["Sampno", "Result"]
        df5.columns = ["Sampno", "Result"]
        df6.columns = ["Sampno", "Result"]

        df2 = df2.assign(Method=all_dfs.iloc[28, 2])
        df2 = df2.assign(UOM=all_dfs.iloc[31, 2])
        df2 = df2.assign(L_dl=all_dfs.iloc[30, 2])
        df2 = df2.assign(Analyte=all_dfs.iloc[29, 2])
        df2 = df2[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df3 = df3.assign(Method=all_dfs.iloc[28, 3])
        df3 = df3.assign(UOM=all_dfs.iloc[31, 3])
        df3 = df3.assign(L_dl=all_dfs.iloc[30, 3])
        df3 = df3.assign(Analyte=all_dfs.iloc[29, 3])
        df3 = df3[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df4 = df4.assign(Method=all_dfs.iloc[28, 4])
        df4 = df4.assign(UOM=all_dfs.iloc[31, 4])
        df4 = df4.assign(L_dl=all_dfs.iloc[30, 4])
        df4 = df4.assign(Analyte=all_dfs.iloc[29, 4])
        df4 = df4[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df5 = df5.assign(Method=all_dfs.iloc[28, 6])
        df5 = df5.assign(UOM=all_dfs.iloc[31, 6])
        df5 = df5.assign(L_dl=all_dfs.iloc[30, 6])
        df5 = df5.assign(Analyte=all_dfs.iloc[29, 6])
        df5 = df5[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df6 = df6.assign(Method=all_dfs.iloc[28, 7])
        df6 = df6.assign(UOM=all_dfs.iloc[31, 7])
        df6 = df6.assign(L_dl=all_dfs.iloc[30, 7])
        df6 = df6.assign(Analyte=all_dfs.iloc[29, 7])
        df6 = df6[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]]

        df1 = pd.concat([df2, df3, df4, df5, df6], axis=0)
        df1 = df1.assign(Laboratory="Gekko")
        df1 = df1.assign(DataSource=fn)
        df1 = df1[
            [
                "Method",
                "UOM",
                "L_dl",
                "Analyte",
                "Laboratory",
                "Sampno",
                "Result",
                "DataSource",
            ]
        ]
        df1 = df1.reset_index()
        del df1["index"]
        df1 = df1.dropna()
        df1.drop(df1.loc[df1["Result"] == "-"].index, inplace=True)
        df = df.append(df1, ignore_index=True)
        path = r"Output"
    df.to_csv(os.path.join(path, r"GekkoResults.csv"), index=False)


if __name__ == "__main__":
    start = time.time()
    args = parser.parse_args()
    path = Path(args.path)
    verbose = args.verbose
    main(path, verbose)
    print("Processed time", time.time() - start)

jefsummers · Mar-22-2021, 12:05 PM

At first blush, yes - instead of having df2 df3 df4 etc you should use a list of dataframes. Then you can reference as dfl[counter] in a loop and simplify many of your code blocks.

shantanu97 · (This post was last modified: Mar-23-2021, 02:21 AM by shantanu97.)

@jefsummers Can you just give me brief example of how to do with my code?

shantanu97 · Mar-23-2021, 02:22 AM

(Mar-22-2021, 12:05 PM)jefsummers Wrote: At first blush, yes - instead of having df2 df3 df4 etc you should use a list of dataframes. Then you can reference as dfl[counter] in a loop and simplify many of your code blocks.

Can you just give me brief example of how to do with my code?

jefsummers · Mar-23-2021, 05:26 PM

Short segment

        df_array = []
    for last in range(2,8) :
        df_array.append(all_dfs.iloc[32 : len(all_dfs), [1, last]])
        df_array[last-2].columns = ["Sampno", "Result"]

And then do similar for the blocks that set up df2 (which is now df_array[0]) etc.

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	How to edit Tkinter Minimize, Maximize, and Close Buttons	kucingkembar	7	8,078	Apr-26-2024, 11:36 AM Last Post: kucingkembar
	Minimize function with SciPy	PierreLCV	3	3,329	Apr-05-2024, 07:51 AM Last Post: paul18fr
	How to Minimize ADB window	OomKoos	0	976	Dec-29-2023, 12:41 PM Last Post: OomKoos

Can I minimize the code???

User Panel Messages

Announcements