Mar-22-2021, 11:48 AM
Can i minimize the line of the code? or can I automate some of the steps in my python code???
from pathlib import Path import time import parser import argparse import pandas as pd import os import warnings warnings.filterwarnings("ignore") parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("path", help="define the directory to folder/file") parser.add_argument("--verbose", help="display processing information") start = time.time() def main(path_csv, verbose): if (".csv" in str(path_csv).lower()) and path_csv.is_file(): csv_files = [Path(path_csv)] else: csv_files = list(Path(path_csv).glob("*.csv")) df = pd.DataFrame() for fn in csv_files: all_dfs = pd.read_csv(fn, header=None, index_col=None, encoding="ISO-8859-1") df1 = pd.DataFrame() df2 = all_dfs.iloc[32 : len(all_dfs), [1, 2]] df3 = all_dfs.iloc[32 : len(all_dfs), [1, 3]] df4 = all_dfs.iloc[32 : len(all_dfs), [1, 4]] df5 = all_dfs.iloc[32 : len(all_dfs), [1, 6]] df6 = all_dfs.iloc[32 : len(all_dfs), [1, 7]] df2.columns = ["Sampno", "Result"] df3.columns = ["Sampno", "Result"] df4.columns = ["Sampno", "Result"] df5.columns = ["Sampno", "Result"] df6.columns = ["Sampno", "Result"] df2 = df2.assign(Method=all_dfs.iloc[28, 2]) df2 = df2.assign(UOM=all_dfs.iloc[31, 2]) df2 = df2.assign(L_dl=all_dfs.iloc[30, 2]) df2 = df2.assign(Analyte=all_dfs.iloc[29, 2]) df2 = df2[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]] df3 = df3.assign(Method=all_dfs.iloc[28, 3]) df3 = df3.assign(UOM=all_dfs.iloc[31, 3]) df3 = df3.assign(L_dl=all_dfs.iloc[30, 3]) df3 = df3.assign(Analyte=all_dfs.iloc[29, 3]) df3 = df3[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]] df4 = df4.assign(Method=all_dfs.iloc[28, 4]) df4 = df4.assign(UOM=all_dfs.iloc[31, 4]) df4 = df4.assign(L_dl=all_dfs.iloc[30, 4]) df4 = df4.assign(Analyte=all_dfs.iloc[29, 4]) df4 = df4[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]] df5 = df5.assign(Method=all_dfs.iloc[28, 6]) df5 = df5.assign(UOM=all_dfs.iloc[31, 6]) df5 = df5.assign(L_dl=all_dfs.iloc[30, 6]) df5 = df5.assign(Analyte=all_dfs.iloc[29, 6]) df5 = df5[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]] df6 = df6.assign(Method=all_dfs.iloc[28, 7]) df6 = df6.assign(UOM=all_dfs.iloc[31, 7]) df6 = df6.assign(L_dl=all_dfs.iloc[30, 7]) df6 = df6.assign(Analyte=all_dfs.iloc[29, 7]) df6 = df6[["Method", "UOM", "L_dl", "Analyte", "Sampno", "Result"]] df1 = pd.concat([df2, df3, df4, df5, df6], axis=0) df1 = df1.assign(Laboratory="Gekko") df1 = df1.assign(DataSource=fn) df1 = df1[ [ "Method", "UOM", "L_dl", "Analyte", "Laboratory", "Sampno", "Result", "DataSource", ] ] df1 = df1.reset_index() del df1["index"] df1 = df1.dropna() df1.drop(df1.loc[df1["Result"] == "-"].index, inplace=True) df = df.append(df1, ignore_index=True) path = r"Output" df.to_csv(os.path.join(path, r"GekkoResults.csv"), index=False) if __name__ == "__main__": start = time.time() args = parser.parse_args() path = Path(args.path) verbose = args.verbose main(path, verbose) print("Processed time", time.time() - start)