Python Forum
Scipy question - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Data Science (https://python-forum.io/forum-44.html)
+--- Thread: Scipy question (/thread-2966.html)



Scipy question - vstradag - Apr-21-2017

Hi there!
I have a problem with the following code. I try to take some data, calculate the parameters of de beta distribution and compare the theoretical distribution to that one which comes from the actual data (running a kolmogorov sminov test). the values that I obtain are: KstestResult(statistic=0.5, pvalue=0.0) but that has no sense. Someone who could help me. I will appreciate it a lot.
from scipy.optimize import fmin
from scipy.stats import beta
from scipy.special import gamma as gammaf
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from matplotlib import pyplot
import numpy
from scipy.stats import kstest #------------------KSTEST-----------#
from scipy.stats import norm

def betaNLL(param,*args):
    '''Negative log likelihood function for beta
        <param>: list for parameters to be fitted.
        <args>: 1-element array containing the sample data.
        
        Return <nll>: negative log-likelihood to be minimized.
        '''

    a,b=param
    data=args[0]
    pdf=beta.pdf(data,a,b,loc=0,scale=1)
    
    
    lg=numpy.log(pdf)
    #-----Replace -inf with 0s------
    
    mask = numpy.isfinite(lg)
    nll = -lg[mask].sum()
    return nll

numpy.seterr(divide='ignore', invalid='ignore')
#-------------------DATA FROM-------------------

data=numpy.loadtxt("D0035.txt")

#----------------NORMALIZE [0,1]----------------
data=(data-numpy.min(data))/(numpy.max(data)-numpy.min(data))

#----------------FITTING USING REAL DATA----------------
mean=numpy.mean(data)
var=numpy.var(data,ddof=1)
alpha1=mean**2*(1-mean)/var-mean
beta1=alpha1*(1-mean)/mean

#----------------FITTING USING beta.fit----------------
alpha3,beta3,xx,yy=beta.fit(data,loc=0,scale=1)
numpy.seterr(divide='ignore', invalid='ignore')
kolmogorov = kstest(data,'norm')
#-------------------------------------------------------#
print 'mean:',mean
print 'variance:' ,var
print 'kolmogorov smirnoff:',kolmogorov
print '\n# alpha,beta from moments:',alpha1,beta1
print '# alpha,beta from beta.fit:',alpha3,beta3

#-----------------------Plot-----------------------
plt.hist(data,bins=30,normed=True)
pyplot.yscale('log')
fitted=lambda x,a,b:gammaf(a+b)/gammaf(a)/gammaf(b)*x**(a-1)*(1-x)**(b-1) 
xx=numpy.linspace(0,max(data),len(data))
plt.plot(xx,fitted(xx,alpha1,beta1),'r')
plt.plot(xx,fitted(xx,alpha3,beta3),'y')

plt.show()



RE: Scipy question - sparkz_alot - Apr-22-2017

What output were you expecting?