Python Forum

Full Version: Portfolio Optimization code error i want to cry
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
I have been coding this bad boy for a long fucking time, started python recently more finance related stuff but i cannot get this MF to work like i spent so much time and i just do not even know anymore

the file is attached but imma write it here anyway, first time doingthis also i dunno just to be safe :
it runs fine until the plotting part where it says:
KeyError Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3653, in Index.get_loc(self, key)
3652 try:
-> 3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:

File ~\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:147, in pandas._libs.index.IndexEngine.get_loc()

File ~\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:155, in pandas._libs.index.IndexEngine.get_loc()

File pandas\_libs\index_class_helper.pxi:70, in pandas._libs.index.Int64Engine._check_type()

KeyError: 'Volatility'

The above exception was the direct cause of the following exception:

KeyError Traceback (most recent call last)
Cell In[44], line 2
1 portfolios = pd.DataFrame(pfolio_returns, pfolio_volatility)
----> 2 portfolios.plot(x="Volatility", y="Returns", kind="scatter", figsize=(10,6))

File ~\anaconda3\Lib\site-packages\pandas\plotting\_core.py:920, in PlotAccessor.__call__(self, *args, **kwargs)
918 if kind in self._dataframe_kinds:
919 if isinstance(data, ABCDataFrame):
--> 920 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
921 else:
922 raise ValueError(f"plot kind {kind} can only be used for data frames")

File ~\anaconda3\Lib\site-packages\pandas\plotting\_matplotlib\__init__.py:71, in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result

File ~\anaconda3\Lib\site-packages\pandas\plotting\_matplotlib\core.py:448, in MPLPlot.generate(self)
446 self._compute_plot_data()
447 self._setup_subplots()
--> 448 self._make_plot()
449 self._add_table()
450 self._make_legend()

File ~\anaconda3\Lib\site-packages\pandas\plotting\_matplotlib\core.py:1259, in ScatterPlot._make_plot(self)
1256 else:
1257 label = None
1258 scatter = ax.scatter(
-> 1259 data[x].values,
1260 data[y].values,
1261 c=c_values,
1262 label=label,
1263 cmap=cmap,
1264 norm=norm,
1265 **self.kwds,
1266 )
1267 if cb:
1268 cbar_label = c if c_is_column else ""

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:3761, in DataFrame.__getitem__(self, key)
3759 if self.columns.nlevels > 1:
3760 return self._getitem_multilevel(key)
-> 3761 indexer = self.columns.get_loc(key)
3762 if is_integer(indexer):
3763 indexer = [indexer]

File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3655, in Index.get_loc(self, key)
3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
-> 3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)

KeyError: 'Volatility'


import numpy as np
import pandas as pd
from pandas_datareader import data as wb
import yfinance as yf
import matplotlib.pyplot as plt

yf.pdr_override()

# Download and calculate market data
Market_data = wb.get_data_yahoo("ITOT", start="2012-1-1")["Adj Close"]
Market_return = np.log(Market_data / Market_data.shift(1))
Market_var = Market_return.var() * 250

# Define asset lists
ESIT = ["ESIT.L", "ITOT"]
BRK_B = ["BRK-B", "ITOT"]
CIBR = ["CIBR", "ITOT"]
NEER = ["NEE", "ITOT"]
COLOP = ["CBHD.F", "ITOT"]

# Download asset data in separate DataFrames
data_ESIT = pd.DataFrame()
for a in ESIT:
    data_ESIT[a] = wb.get_data_yahoo(a, start="2012-1-1")["Adj Close"]

data_BRK_B = pd.DataFrame()
for t in BRK_B:
    data_BRK_B[t] = wb.get_data_yahoo(t, start="2012-1-1")["Adj Close"]

data_CIBR = pd.DataFrame()
for t in CIBR:
    data_CIBR[t] = wb.get_data_yahoo(t, start="2012-1-1")["Adj Close"]

data_NEER = pd.DataFrame()
for t in NEER:
    data_NEER[t] = wb.get_data_yahoo(t, start="2012-1-1")["Adj Close"]

data_COLOP = pd.DataFrame()
for t in COLOP:
    data_COLOP[t] = wb.get_data_yahoo(t, start="2012-1-1")["Adj Close"]

# Calculate past returns for each asset
ESIT_past_returns = np.log(data_ESIT / data_ESIT.shift(1))
BRK_B_past_returns = np.log(data_BRK_B / data_BRK_B.shift(1))
CIBR_past_returns = np.log(data_CIBR / data_CIBR.shift(1))
NEER_past_returns = np.log(data_NEER / data_NEER.shift(1))
COLOP_past_returns = np.log(data_COLOP / data_COLOP.shift(1))

# Calculate covariance matrices and betas
ESIT_cov_matrix_a = ESIT_past_returns.cov() * 250
ESIT_cov_with_market = ESIT_cov_matrix_a.iloc[0, 1]
ESIT_BETA = ESIT_cov_with_market / Market_var

BRK_B_cov_matrix_a = BRK_B_past_returns.cov() * 250
BRK_B_cov_with_market = BRK_B_cov_matrix_a.iloc[0, 1]
BRK_B_BETA = BRK_B_cov_with_market / Market_var

CIBR_cov_matrix_a = CIBR_past_returns.cov() * 250
CIBR_cov_with_market = CIBR_cov_matrix_a.iloc[0, 1]
CIBR_BETA = CIBR_cov_with_market / Market_var

NEER_cov_matrix_a = NEER_past_returns.cov() * 250
NEER_cov_with_market = NEER_cov_matrix_a.iloc[0, 1]
NEER_BETA = NEER_cov_with_market / Market_var

COLOP_cov_matrix_a = COLOP_past_returns.cov() * 250
COLOP_cov_with_market = COLOP_cov_matrix_a.iloc[0, 1]
COLOP_BETA = COLOP_cov_with_market / Market_var

# Calculate CAPMs
ESIT_CAPM = 0.0426 + ESIT_BETA * (0.0129)
BRK_B_CAPM = 0.0426 + BRK_B_BETA * (0.0460)
CIBR_CAPM = 0.0426 + CIBR_BETA * (0.0460)
NEER_CAPM = 0.0426 + NEER_BETA * (0.0460)
COLOP_CAPM = 0.0426 + COLOP_BETA * (0.0129)

# List CAPMs and Assets
CAPMs = [ESIT_CAPM, BRK_B_CAPM, CIBR_CAPM, NEER_CAPM, COLOP_CAPM]
Assets = ["ESIT.L", "BRK-B", "CIBR", "NEE", "CBHD.F"]
Assets_Returns = CAPMs

# Download data for correlation matrix
data_for_corr = pd.DataFrame()
for t in Assets:
    data_for_corr[t] = wb.get_data_yahoo(t, start="2012-1-1")["Adj Close"]

log_returns = np.log(data_for_corr / data_for_corr.shift(1))
print(log_returns.corr())  # Calculate and display correlation matrix

# Simulate efficient frontier portfolios
pfolio_returns = []
pfolio_volatility = []

for x in range(1000):
    weights = np.random.random(5)
    weights /= np.sum(weights)
    pfolio_returns.append(np.sum(weights * Assets_Returns))
    pfolio_volatility.append(np.sqrt(np.dot(weights.T, np.dot(weights * Assets_Returns, weights))))

pfolio_returns = np.array(pfolio_returns)
pfolio_volatility = np.array(pfolio_volatility)

# Create DataFrame and plot
portfolios = pd.DataFrame({"Returns": pfolio_returns, "Volatility": pfolio_volatility})
portfolios.plot(x="Volatility", y="Returns", kind="scatter", figsize=(10,6))
plt.show()  # Add plt.show() to display the plot
What should portfolios = pd.DataFrame(pfolio_ returns, pfolio_volatility) do? The second argument is wrong,
(Mar-10-2024, 06:32 PM)deanhystad Wrote: [ -> ]What should portfolios = pd.DataFrame(pfolio_ returns, pfolio_volatility) do? The second argument is wrong,
it should be: portfolios = pd.DataFrame({"Returns":pfolio_returns, "Volatility":pfolio_volatility})

now im getting:

ValueError Traceback (most recent call last)
Cell In[64], line 1
----> 1 portfolios = pd.DataFrame({"Returns":pfolio_returns, "Volatility":pfolio_volatility})
2 print(portfolios)
3 portfolios.plot(x="Volatility", y="Returns", kind="scatter", figsize=(10,6))

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:709, in DataFrame.__init__(self, data, index, columns, dtype, copy)
703 mgr = self._init_mgr(
704 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
705 )
707 elif isinstance(data, dict):
708 # GH#38939 de facto copy defaults to False only in non-dict cases
--> 709 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
710 elif isinstance(data, ma.MaskedArray):
711 from numpy.ma import mrecords

File ~\anaconda3\Lib\site-packages\pandas\core\internals\construction.py:481, in dict_to_mgr(data, index, columns, dtype, typ, copy)
477 else:
478 # dtype check to exclude e.g. range objects, scalars
479 arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]
--> 481 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)

File ~\anaconda3\Lib\site-packages\pandas\core\internals\construction.py:115, in arrays_to_mgr(arrays, columns, index, dtype, verify_integrity, typ, consolidate)
112 if verify_integrity:
113 # figure out the index, if necessary
114 if index is None:
--> 115 index = _extract_index(arrays)
116 else:
117 index = ensure_index(index)

File ~\anaconda3\Lib\site-packages\pandas\core\internals\construction.py:642, in _extract_index(data)
640 raw_lengths.append(len(val))
641 elif isinstance(val, np.ndarray) and val.ndim > 1:
--> 642 raise ValueError("Per-column arrays must each be 1-dimensional")
644 if not indexes and not raw_lengths:
645 raise ValueError("If using all scalar values, you must pass an index")

ValueError: Per-column arrays must each be 1-dimensional
This is the source of the error you are currently getting.
import numpy as np

Assets_Returns = [1] * 5
weights = np.random.random(5)
weights /= np.sum(weights)
print(np.sum(weights * Assets_Returns))  # np.sum(weights * Assets_Returns) is a float
print(np.sqrt(np.dot(weights.T, np.dot(weights * Assets_Returns, weights))))  # np.sqrt(np.dot(weights.T, np.dot(weights * Assets_Returns, weights))) is an array
Output:
1.0 [0.2383131 0.25652407 0.1334876 0.15391969 0.40296863]
pfolio_returns will be a 1D array and pfolio_volatility a 2D array. You cannot add use a 2D array as a column in a dataframe.

I don't understand this. Please explain.
pfolio_volatility.append(np.sqrt(np.dot(weights.T, np.dot(weights * Assets_Returns, weights))))