I've simplified the code some more, hoping to make it more clear. This time I profiled both running over 20.000 values. Most of the time is spent in the simple_moving_average method of the dict version. Maybe a lot of dict look-ups are slow somehow, but why would it get exponentially slower? What can I do to narrow it down some more?
#!/usr/bin/env python3 class DataFrame: def __init__(self): self.data = {} def add_value(self, label, value): try: self.data[label].append(value) except KeyError: self.data[label] = [value] def simple_moving_average(self, src, dest, window): try: cumsum = self.data[dest+'_cumsum'][-1] + self.data[src][-1] except KeyError: cumsum = self.data[src][0] self.add_value(dest+'_cumsum', cumsum) len_cumsum = len(self.data[dest+'_cumsum'][:]) if len_cumsum <= window: sma = self.data[dest+'_cumsum'][-1] / len_cumsum else: sma = (self.data[dest+'_cumsum'][-1] \ -self.data[dest+'_cumsum'][-window - 1]) / window self.add_value(dest, sma) def read_line(fname): with open(fname) as f: for data in f: yield float(data) df = DataFrame() fname = 'data2.txt' for value in read_line(fname): df.add_value('data', value) df.simple_moving_average('data' , 'sma250', 250) df.simple_moving_average('sma250', 'sma500', 500)
$ python3 -m cProfile with_dict.py 300053 function calls in 14.282 seconds Ordered by: standard name ncalls tottime percall cumtime percall filename:lineno(function) 1 0.000 0.000 0.000 0.000 _bootlocale.py:23(getpreferredencoding) 1 0.000 0.000 0.000 0.000 codecs.py:259(__init__) 1 0.000 0.000 0.000 0.000 codecs.py:308(__init__) 23 0.000 0.000 0.000 0.000 codecs.py:318(decode) 40000 13.913 0.000 14.067 0.000 with_dict.py:13(simple_moving_average) 20001 0.108 0.000 0.108 0.000 with_dict.py:29(read_line) 1 0.091 0.091 14.282 14.282 with_dict.py:3(<module>) 1 0.000 0.000 0.000 0.000 with_dict.py:3(DataFrame) 1 0.000 0.000 0.000 0.000 with_dict.py:4(__init__) 100000 0.123 0.000 0.142 0.000 with_dict.py:7(add_value) 23 0.000 0.000 0.000 0.000 {built-in method _codecs.utf_8_decode} 1 0.000 0.000 0.000 0.000 {built-in method _locale.nl_langinfo} 1 0.000 0.000 0.000 0.000 {built-in method builtins.__build_class__} 1 0.000 0.000 14.282 14.282 {built-in method builtins.exec} 40000 0.028 0.000 0.028 0.000 {built-in method builtins.len} 1 0.000 0.000 0.000 0.000 {built-in method io.open} 99995 0.020 0.000 0.020 0.000 {method 'append' of 'list' objects} 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
#!/usr/bin/env python3 class DataFrame: def add_value(self, label, value): try: attr = getattr(self, label) attr.append(value) except AttributeError: setattr(self, label, [value]) def simple_moving_average(self, src, dest, window): try: src_attr = getattr(self, src) dest_attr = getattr(self, dest+'_cumsum') cumsum = src_attr[-1] + dest_attr[-1] except AttributeError: cumsum = src_attr[0] self.add_value(dest+'_cumsum', cumsum) dest_attr = getattr(self, dest+'_cumsum') len_cumsum = len(dest_attr) if len_cumsum <= window: sma = dest_attr[-1] / len_cumsum else: sma = (dest_attr[-1] - dest_attr[-window - 1]) / window self.add_value(dest, sma) def read_line(fname): with open(fname) as f: for data in f: yield float(data) df = DataFrame() fname = 'data2.txt' for value in read_line(fname): df.add_value('data', value) df.simple_moving_average('data' , 'sma250', 250) df.simple_moving_average('sma250', 'sma500', 500)
$ python3 -m cProfile with_attr.py 520057 function calls in 0.330 seconds Ordered by: standard name ncalls tottime percall cumtime percall filename:lineno(function) 1 0.000 0.000 0.000 0.000 _bootlocale.py:23(getpreferredencoding) 1 0.000 0.000 0.000 0.000 codecs.py:259(__init__) 1 0.000 0.000 0.000 0.000 codecs.py:308(__init__) 23 0.000 0.000 0.000 0.000 codecs.py:318(decode) 40000 0.126 0.000 0.257 0.000 with_attr.py:11(simple_moving_average) 20001 0.017 0.000 0.017 0.000 with_attr.py:29(read_line) 1 0.036 0.036 0.330 0.330 with_attr.py:3(<module>) 1 0.000 0.000 0.000 0.000 with_attr.py:3(DataFrame) 100000 0.063 0.000 0.104 0.000 with_attr.py:4(add_value) 23 0.000 0.000 0.000 0.000 {built-in method _codecs.utf_8_decode} 1 0.000 0.000 0.000 0.000 {built-in method _locale.nl_langinfo} 1 0.000 0.000 0.000 0.000 {built-in method builtins.__build_class__} 1 0.000 0.000 0.330 0.330 {built-in method builtins.exec} 220000 0.067 0.000 0.067 0.000 {built-in method builtins.getattr} 40000 0.007 0.000 0.007 0.000 {built-in method builtins.len} 5 0.000 0.000 0.000 0.000 {built-in method builtins.setattr} 1 0.000 0.000 0.000 0.000 {built-in method io.open} 99995 0.013 0.000 0.013 0.000 {method 'append' of 'list' objects} 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}