I played with your code, not excatly want you are expecting but here you may find some tricks to help you.
If you can estimate the size of the initial array, then you can drastically speed-up the code without appending or concatenating
If you can estimate the size of the initial array, then you can drastically speed-up the code without appending or concatenating
import numpy as np import re, time # Way 1: if you cannot estimate the size of the data array def GetData(Data, Line): Line = Line.replace('D', 'E') Variables = re.split(r"\s+", Line) # time = Variables[3] # delta_t = Variables[4] # mass = Variables[5] # radius = Variables[6] # lum_core = Variables[7] # lum_tot = Variables[8] # flux = Variables[9] # ratio = Variables[10] Array = np.empty(8) Array=[Variables[i] for i in range(3, 11)] Data = np.vstack((Data, Array)) return Data # a single line is used here instead of a complete text file Extract = "hydro output: 1 1.05200D+09 1.05200D+09 9.94376D+31 3.66754D+10 7.52265D+31 7.52265D+31 4.99722D-235 0.0499938" t0=time.time() # Data array is initialized Data=np.empty(8, dtype=float) # A n lines text file is simulated using a loop n=10_000 for i in range(n): if "HYDRO" in Extract.upper(): Data=GetData(Data, Extract) t1=time.time() # now the first first empty line is removed Data=np.delete(Data, 0, axis=0) # the array is composed of string so far, it's converted into float in a single step (faster than converting numbers one by one) Data=Data.astype(float) # remember : # time = column 0 # delta_t = column 1 # mass = column 2 # radius = column 3 # lum_core = column 4 # lum_tot = column 5 # flux = column 6 # ratio = column 7 # if you want all radius data for example: Radius=Data[:, 3] t2=time.time() print(f"Duration reading lines={t1-t0}") print(f"Duration converting data={t2-t1}") ## way 2: if you can estimate the size of the data array (can be the max number of lines?) n=10_000 Data2 = np.empty((n,8)) for i in range(n): if "HYDRO" in Extract.upper(): Extract = Extract.replace('D', 'E') Variables = re.split(r"\s+", Extract) Data2[i, :]=[Variables[j] for j in range(3, 11)] Data2=Data2.astype(float) flux=Data[:, 6] t3=time.time() print(f"Duration way2={t3-t2}") MaxDifference=np.max(np.absolute(Data-Data2)) print(f"Max difference={MaxDifference}")