Then I would continue with my initial idea which is roughly such:
- find whether splits are needed (1 < days)
- if split needed:
- calculate midnights
- based on midnights calculate first and last day duration
- construct first and last day rows
- if split is larger than 1 (more than two days)
- all remaining (i.e not first or last) days duration is 24 hours;
- iterate pairwise over midnights and construct rows
- append row before last
- if split is not needed then return initial row
Following implementation of idea above assumes that input data is correct (no validation is made of duration; all data fields are separated with comma followed by space etc.). While writing this I observed that desired output from OP contains error.
This code should be refactored but in this form it expresses my idea good enough.
from datetime import datetime, timedelta
from itertools import tee
data = ['2020-11-02 16:53:14, 2020-11-03 08:41:47, 15:48:33, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A',
'2020-11-04 17:53:14, 2020-11-06 09:41:47, 39:48:33, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A']
def parse_row(row):
start, end, duration, rest = row.split(', ', maxsplit=3)
formatter = '%Y-%m-%d %H:%M:%S'
dt_start = datetime.strptime(start, formatter)
dt_end = datetime.strptime(end, formatter)
splits = (dt_end.date()- dt_start.date()).days
rows = []
if splits:
def make_midnight(x): return dt_start.replace(day=dt_start.day + x + 1, hour=0, minute=0, second=0)
midnights = [make_midnight(i) for i in range(splits)]
first_day_duration = f'{str(timedelta(seconds=(midnights[0] - dt_start).total_seconds())):0>8}'
last_day_duration = str(dt_end.time())
rows.append(', '.join([start, str(midnights[0]), first_day_duration, rest]))
rows.append(', '.join([str(midnights[-1]), end, last_day_duration, rest]))
if 1 < splits:
first, second = tee(midnights)
next(second, None)
for x, y in zip(first, second):
rows.insert(-1, ', '.join([str(x), str(y), '24:00:00', rest]))
else:
rows.append(row)
return rows
# usage
for row in data:
print(*parse_row(row), sep='\n')
2020-11-02 16:53:14, 2020-11-03 00:00:00, 07:06:46, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A
2020-11-03 00:00:00, 2020-11-03 08:41:47, 08:41:47, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A
2020-11-04 17:53:14, 2020-11-05 00:00:00, 06:06:46, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A
2020-11-05 00:00:00, 2020-11-06 00:00:00, 24:00:00, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A
2020-11-06 00:00:00, 2020-11-06 09:41:47, 09:41:47, JP04TNNV0015EN, Bell_13, TT_Dev, TG LK A