Python Forum

Hello python world, Im new to python just a heads up. Also most of the tags and names are in spanish as its the language we speak and have to deliver the data.

I'm doing a task for my college subject "Probability And Statistics"

I have to do a double weibull distribution and plot a few things, while also do some calculations

So I have to make samples for n=100, 1000, 10000, 10000 for the Dweibull using parameters c = 3 loc = 25 scale = 24

I have to make a boxplot of the 4 distributions and analyse if there are any rare values, this part worked great I get the boxplot on the output, but I can't get it to plot the rest of the stuff I ask for. Any ideas why it is not working?

Thanks in advance

import numpy as np
import matplotlib.pyplot as plot
from scipy.stats import weibull_min

# Parámetros para la distribución Weibull
c = 3
loc = 25
scale = 24

# Función para calcular la esperanza teórica 
def esperanza(c, loc, scale):
    return loc + scale * (np.e ** (1 / c) - 1) ** (1 / c)

# Función para calcular la varianza teórica 
def varianza(c, loc, scale):
    return scale ** 2 * ((np.e ** (2 / c) - 1) * (np.e ** (2 / c) - 2) + 1)


# Generar muestras
muestras = {}
for n in [100, 1000, 10000, 100000]:
    muestras[n] = np.random.weibull(c, size=n) * scale + loc

# Calcular valores empíricos de la esperanza y la varianza
valores_empiricos = {}
for n, muestra in muestras.items():
    esperanza_empirica = np.mean(muestra)
    varianza_empirica = np.var(muestra)
    valores_empiricos[n] = {'Esperanza': esperanza_empirica, 'Varianza': varianza_empirica}

# Calcular valores teóricos de la esperanza y la varianza
esperanza_teorica = esperanza(c, loc, scale)
varianza_teorica = varianza(c, loc, scale)

# Imprimir resultados
print("Valores Empíricos vs. Valores Teóricos:")
print("……………………………………………………………")
print("Tamaño de la muestra | Esperanza (empírica) | Varianza (empírica) | Esperanza (teórica) | Varianza (teórica)")
print("……………………………………………………………")
for n, valores in valores_empiricos.items():
    print(f"{n:<20} | {valores['Esperanza']:<20.4f} | {valores['Varianza']:<20.4f} | {esperanza_teorica:<20.4f} | {varianza_teorica:<20.4f}")

# Trazar el boxplot
plot.figure(figsize=(10, 6))
plot.boxplot([muestras[100], muestras[1000], muestras[10000], muestras[100000]], tick_labels=['n=100', 'n=1000', 'n=10000', 'n=100000'])
plot.title('Diagrama de cajas Distribución Weibull Doble')
plot.xlabel('Tamaño de la muestra')
plot.ylabel('Valores')
plot.grid(True)
plot.show()

# Datos atípicos
for n in [100, 1000, 10000, 100000]:
    C1 = np.percentile(muestras[n], 25)
    C3 = np.percentile(muestras[n], 75)
    C50 = C3 - C1
    lower_bound = C1 - 1.5 * C50
    upper_bound = C3 + 1.5 * C50

    fueradserie = [x for x in muestras[n] if x < lower_bound or x > upper_bound]
    if len(fueradserie) > 0:
        print(f"Para n={n}, hay {len(fueradserie)} datos atípicos.")
    else:
        print(f"Para n={n}, no hay datos atípicos.")

# Trazar histograma y densidad teórica para n=100 
plot.figure(figsize=(12, 5)) 
plot.subplot(1, 2, 1) 
plot.hist(muestras[100], bins=30, density=True, alpha=0.6, color='blue', label='Histograma de Muestra (n=100)') 
x = np.linspace(min(muestras[100]), max(muestras[100]), 1000) 
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica') 
plot.title('Histograma vs Densidad Teórica (n=100)') 
plot.xlabel('Valores') 
plot.ylabel('Densidad') 
plot.legend() 

# Trazar histograma y densidad teórica para n=100000 
plot.subplot(1, 2, 2) 
plot.hist(muestras[100000], bins=30, density=True, alpha=0.6, color='green', label='Histograma de Muestra (n=100000)') 
x = np.linspace(min(muestras[100000]), max(muestras[100000]), 1000) 
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica')
plot.title('Histograma y Densidad Teórica (n=100000)') 
plot.xlabel('Valores') 
plot.ylabel('Densidad') 
plot.legend() 
plot.tight_layout() 
plot.show()

Is the problem that it draws this plot:

# Trazar el boxplot
plot.figure(figsize=(10, 6))
plot.boxplot([muestras[100], muestras[1000], muestras[10000], muestras[100000]], tick_labels=['n=100', 'n=1000', 'n=10000', 'n=100000'])
plot.title('Diagrama de cajas Distribución Weibull Doble')
plot.xlabel('Tamaño de la muestra')
plot.ylabel('Valores')
plot.grid(True)
plot.show()

But it doesn't show these plots?

# Trazar histograma y densidad teórica para n=100 
plot.figure(figsize=(12, 5)) 
plot.subplot(1, 2, 1) 
plot.hist(muestras[100], bins=30, density=True, alpha=0.6, color='blue', label='Histograma de Muestra (n=100)') 
x = np.linspace(min(muestras[100]), max(muestras[100]), 1000) 
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica') 
plot.title('Histograma vs Densidad Teórica (n=100)') 
plot.xlabel('Valores') 
plot.ylabel('Densidad') 
plot.legend() 
 
# Trazar histograma y densidad teórica para n=100000 
plot.subplot(1, 2, 2) 
plot.hist(muestras[100000], bins=30, density=True, alpha=0.6, color='green', label='Histograma de Muestra (n=100000)') 
x = np.linspace(min(muestras[100000]), max(muestras[100000]), 1000) 
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica')
plot.title('Histograma y Densidad Teórica (n=100000)') 
plot.xlabel('Valores') 
plot.ylabel('Densidad') 
plot.legend() 
plot.tight_layout() 
plot.show()

plot.show() is blocking. Your program is waiting for you to close the first plot() window before continuing.

Or is the problem that you get an error trying to plot the second plot? Running your code I get this error.

Error:Exception has occurred: NameError
name 'densidad' is not defined
  File "...test.py", line 77, in <module>
    plot.plot(x, densidad(x, c, loc, scale), "r--", label="Densidad Teórica")
NameError: name 'densidad' is not defined

It plots the first part (the boxplot) how can I make it so that it delivers all the plots in different images?

I understand my problem is not that its not plotting, its just not showing it since I didn't close the first plot? If I close it, it will keep running and plot the others?

I will check the densidad var, I think i called something else then changed it

I defined the densidad var now, and I still have to understand how I can close the output to see the next one, looks like an image but I can't figure out how to close it and keep the output window open

import numpy as np
import matplotlib.pyplot as plot
from scipy.stats import weibull_min

# Parámetros para la distribución Weibull
c = 3
loc = 25
scale = 24

# Función para calcular la esperanza teórica
def esperanza(c, loc, scale):
return loc + scale * (np.e ** (1 / c) - 1) ** (1 / c)

# Función para calcular la varianza teórica
def varianza(c, loc, scale):
return scale ** 2 * ((np.e ** (2 / c) - 1) * (np.e ** (2 / c) - 2) + 1)

# Funcion Densidad
def densidad(x, c, loc, scale):
return (c / scale) * ((x - loc) / scale) ** (c - 1) * np.exp(-((x - loc) / scale) ** c)

# Generar muestras
muestras = {}
for n in [100, 1000, 10000, 100000]:
muestras[n] = np.random.weibull(c, size=n) * scale + loc

# Calcular valores empíricos de la esperanza y la varianza
valores_empiricos = {}
for n, muestra in muestras.items():
esperanza_empirica = np.mean(muestra)
varianza_empirica = np.var(muestra)
valores_empiricos[n] = {'Esperanza': esperanza_empirica, 'Varianza': varianza_empirica}

# Calcular valores teóricos de la esperanza y la varianza
esperanza_teorica = esperanza(c, loc, scale)
varianza_teorica = varianza(c, loc, scale)

# Imprimir resultados
print("Valores Empíricos vs. Valores Teóricos:")
print("……………………………………………………………")
print("Tamaño de la muestra | Esperanza (empírica) | Varianza (empírica) | Esperanza (teórica) | Varianza (teórica)")
print("……………………………………………………………")
for n, valores in valores_empiricos.items():
print(f"{n:<20} | {valores['Esperanza']:<20.4f} | {valores['Varianza']:<20.4f} | {esperanza_teorica:<20.4f} | {varianza_teorica:<20.4f}")

# Trazar el boxplot
plot.figure(figsize=(10, 6))
plot.boxplot([muestras[100], muestras[1000], muestras[10000], muestras[100000]], tick_labels=['n=100', 'n=1000', 'n=10000', 'n=100000'])
plot.title('Diagrama de cajas Distribución Weibull Doble')
plot.xlabel('Tamaño de la muestra')
plot.ylabel('Valores')
plot.grid(True)
plot.show()

# Datos atípicos
for n in [100, 1000, 10000, 100000]:
C1 = np.percentile(muestras[n], 25)
C3 = np.percentile(muestras[n], 75)
C50 = C3 - C1
lower_bound = C1 - 1.5 * C50
upper_bound = C3 + 1.5 * C50

fueradserie = [x for x in muestras[n] if x < lower_bound or x > upper_bound]
if len(fueradserie) > 0:
print(f"Para n={n}, hay {len(fueradserie)} datos atípicos.")
else:
print(f"Para n={n}, no hay datos atípicos.")

# Trazar histograma y densidad teórica para n=100
plot.figure(figsize=(12, 5))
plot.subplot(1, 2, 1)
plot.hist(muestras[100], bins=30, density=True, alpha=0.6, color='blue', label='Histograma de Muestra (n=100)')
x = np.linspace(min(muestras[100]), max(muestras[100]), 1000)
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica')
plot.title('Histograma vs Densidad Teórica (n=100)')
plot.xlabel('Valores')
plot.ylabel('Densidad')
plot.legend()

# Trazar histograma y densidad teórica para n=100000
plot.subplot(1, 2, 2)
plot.hist(muestras[100000], bins=30, density=True, alpha=0.6, color='green', label='Histograma de Muestra (n=100000)')
x = np.linspace(min(muestras[100000]), max(muestras[100000]), 1000)
plot.plot(x, densidad(x, c, loc, scale), 'r--', label='Densidad Teórica')
plot.title('Histograma y Densidad Teórica (n=100000)')
plot.xlabel('Valores')
plot.ylabel('Densidad')
plot.legend()
plot.tight_layout()
plot.show()

Just and update, worked eveything out. Thank you very much for your help, your pointers worked great!

vermor

deanhystad

vermor

vermor

vermor