you are closing the dat file after you write to it. If you open it another program like Notepad while trying to write to it I expect it to error. If you open it in Notepad++, you can write to it, but it will not refresh unless you switch to different tab and then back again to original tab. As far as I know it depends how the program opens the file.
Otherwise this will resolve the overwriting
import pandas as pd
from datetime import datetime
import time
def read_data(dat_file, last_update):
df = pd.read_table(dat_file, delimiter=',', parse_dates=[1,], low_memory=False, skiprows=1)
df.drop("RECNBR", axis=1, inplace=True)
df["TMSTAMP"] = pd.to_datetime(df["TMSTAMP"])
cols = df.columns.drop(labels=["TMSTAMP"])
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
return df[df['TMSTAMP'] > last_update]
def write_xlsx(xlsx_file, df):
df.to_excel(xlsx_file, index=False)
dat_file = 'foo.dat'
xlsx_file = 'foo.xlsx'
try:
df = pd.read_excel(xlsx_file, engine='openpyxl', parse_dates=[1,])
last_update = df.TMSTAMP.iat[-1]
except FileNotFoundError:
df = pd.DataFrame(columns=["TMSTAMP", "WS_kph_S_WVT","WindDir_D1_WVT","WindDir_SD1_WVT","WS_kph_Max",
"AirTC_Avg","AirTC_Max","AirTC_Min","RH_Avg","RH_Max","RH_Min","BP_mbar_Avg","BP_mbar_Max",
"BP_mbar_Min","Rain_mm_Tot","Rain_mm_Intensity_1_min","Rain_mm_Max_Intensity_1_min",
"Rain_mm_Min_Intensity_1_min","Tot_Rain_mm_12_sec_Max","Tot_Rain_mm_12_sec_Avg",
"Lufft_R2S_Mode","Mode(1)","Mode(2)","Mode(3)","Mode(4)","Mode(5)","Mode(6)","SlrW_Avg",
"SlrW_Max","TdC_Avg","TdC_Max","TdC_Min","TwC_Avg","TwC_Max","TwC_Min","HI_C_Avg","HI_C_Max",
"SVPWPa_Avg","SVPWPa_Max","SVPWPa_Min","SunHrs_Tot","PrecipitationHrs_Tot","PotSlrW_Avg",
"PotSlrW_Max","PotSlrW_Min","WC_C_Avg","WC_C_Min"])
last_update = datetime.strptime('2020-01-01 21:11:00', '%Y-%m-%d %H:%M:%S') # change to some date in the past
while True:
new_data = read_data(dat_file=dat_file, last_update=last_update)
num_rows = new_data.shape[0] # get number of rows in dataframe
if num_rows:
print(f'Appending {num_rows} records to {xlsx_file}')
df = df.append(new_data, ignore_index=True)
print(df)
write_xlsx(xlsx_file=xlsx_file, df=df)
last_update = new_data.TMSTAMP.iat[-1]
else:
print('No new data id dat file.')
time.sleep(60) # wait 1 minute
Of course if it runs long enough it will hit Memory limit