Hi everyone,
I am new to Python, and just completed some codes as below that take forever to run. It seemed working if I only run the first 10 rows in the main DataFrame, I hope someone can assist me in improving the codes, so that it can run much faster. Many thanks
I am new to Python, and just completed some codes as below that take forever to run. It seemed working if I only run the first 10 rows in the main DataFrame, I hope someone can assist me in improving the codes, so that it can run much faster. Many thanks
from datetime import timedelta import numpy as np import pandas as pd # Get data from the given excel file filepath = r'C:\Users\User\Desktop\Power BI\All.xlsx' df_CoC = pd.read_excel(filepath, usecols=['ID, 'Is there time loss?', 'MC Capacity Date From', 'MC Capacity Date To'], sheet_name='CoC') #18000 rows df_WRS = pd.read_excel(filepath, usecols=['ID', 'Date From', 'Date To'], sheet_name='WRS') #22000 rows df_Open = pd.read_excel(filepath, sheet_name='Open') #2500 rows Count_Open = df_Open['ID'].count() df_Open['CoC_MissingWRS'] = None df_Open['WRS_MissingCoC'] = None ####### Fill new 'CoC_MissingWRS' column in open query ######## for i in range(0, Count_Open): if i > 90: break imn = df_Open.loc[i, 'Injury Management: Injury Management Number'] #Fill WRS date array arrWRSDates = np.array([]) filtered_df = df_WRS[df_WRS['ID'] == imn] if not filtered_df.empty: Count_WRS = filtered_df['ID'].count()-1 for w in range(0, Count_WRS): #Date_From is always available Date_From = filtered_df.iloc[w]['Date From'].date() if filtered_df.iloc[w]['Date To'] is None: Date_To = Date_From else: Date_To = filtered_df.iloc[w]['Date To'].date() if Date_To < Date_From: Date_To = Date_From adate = Date_From while adate <= Date_To: if adate not in arrWRSDates: arrWRSDates = np.append(arrWRSDates, adate) adate += timedelta(days=1) # Fill Timeloss & all CoC date array arrTLCoCDates = np.array([]) arrAllCoCDates = np.array([]) filtered_df = df_CoC[df_CoC['ID'] == imn] if not filtered_df.empty: Count_CoC= filtered_df['ID'].count() - 1 for c in range(0, Count_CoC): Date_From = filtered_df.iloc[c]['MC Capacity Date From'].date() if filtered_df.iloc[c]['MC Capacity Date To'] is None: Date_To = Date_From else: Date_To = filtered_df.iloc[c]['MC Capacity Date To'].date() if Date_To < Date_From or Date_To is None: Date_To = Date_From adate = Date_From while adate <= Date_To: if df_CoC.iloc[c]['Is there time loss?'] == 'Yes': if adate not in arrTLCoCDates: arrTLCoCDates = np.append(arrTLCoCDates, adate) else: if adate not in arrAllCoCDates: arrAllCoCDatesCoCDates = np.append(arrAllCoCDates, adate) adate += timedelta(days=1) ##### Check if CoC missing WRS ##### bMissingWRS = 'No' if len(arrTLCoCDates) > 0: adate = min(arrTLCoCDates) while adate <= max(arrTLCoCDates) and bMissingWRS == 'No': if adate not in arrWRSDates and adate.weekday() < 5: bMissingWRS = 'Yes' df_Open.loc[i, 'CoC_MissingWRS'] = bMissingWRS ##### Check if WRS missing CoC ##### bMissingCoC = 'No' if len(arrWRSDates) > 0: adate = min(arrWRSDates) while adate <= max(arrWRSDates) and bMissingCoC == 'No': if adate not in arrAllCoCDates and adate.weekday() < 5: bMissingCoC = 'Yes' df_Open.loc[i, 'WRS_MissingCoC'] = bMissingCoC