The example below should clarify what happening when we trying to calculate mean value
of data came from lognormal distribution:
of data came from lognormal distribution:
from pylab import * from scipy import stats as st x = [[19.8815 ],[19.0141 ],[18.1857 ],[17.3943 ],[16.6382 ],[15.9158 ],[15.2254 ],[14.5657 ],[13.9352 ],[13.3325 ],[12.7564 ],[12.2056 ],[11.679 ],[11.1755 ], [10.6941 ],[10.2338 ],[ 9.79353],[ 9.37249],[ 8.96979],[ 8.58462],[ 8.21619],[ 7.86376],[ 7.52662],[ 7.20409],[ 6.89552],[ 6.6003 ], [ 6.31784],[ 6.04757],[ 5.78897],[ 5.54151],[ 5.30472],[ 5.07812],[ 4.86127],[ 4.65375],[ 4.45514],[ 4.26506],[ 4.08314],[ 3.90903],[ 3.74238], [ 3.58288],[ 3.4302 ],[ 3.28407],[ 3.14419],[ 3.01029],[ 2.88212],[ 2.75943],[ 2.64198],[ 2.52955],[ 2.42192],[ 2.31889],[ 2.22026],[ 2.12583], [ 2.03543],[ 1.94889],[ 1.86604],[ 1.78671],[ 1.71077],[ 1.63807],[ 1.56845],[ 1.50181],[ 1.43801],[ 1.37691],[ 1.31842],[ 1.26242],[ 1.2088 ], [ 1.15746],[ 1.10832],[ 1.06126],[ 1.01619]] x = np.array(x).ravel() # testing for lognormality # the data is lognormal if np.log(data) is normal pvalx = st.shapiro(np.log(x))[-1] print("p-value for `accepting` lognormality of x-data = ", pvalx) print("Ok: the array come from lognormal distribution" if pvalx>0.01 else "Hm... the array isn't lognormal") print('Raw mean value of x-data is: ', np.mean(x)) # Note: median and mean values could significantly differ in case of lognormal distribution print('Mean value of related normal distribution: ', np.mean(np.log(x))) print('Mapped mean value: ', np.exp(np.mean(np.log(x)))) s, loc, scale = st.lognorm.fit(x, floc=0) #x0 is rawdata x-axis estimated_mu = np.log(scale) print("Estimated mu is almost equal to mapped mean value (above): ", abs(np.exp(estimated_mu)-np.exp(np.mean(np.log(x)))))