Hi all
I am just learning python. I am trying to visualize the correlation matrix of the S&P 500 stock data but I am getting a ValueError: could not convert string to float: '2000-01-03'.
Here is the coding:
I am just learning python. I am trying to visualize the correlation matrix of the S&P 500 stock data but I am getting a ValueError: could not convert string to float: '2000-01-03'.
Here is the coding:
import os import pandas as pd import requests import pickle import yfinance as yf from pandas_datareader import data as pdr import bs4 as bs import datetime as dt import matplotlib.pyplot as plt from matplotlib import style import numpy as np # Function to save the S&P 500 tickers from the Wikipedia page def save_sp500_tickers(): url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" resp = requests.get(url) soup = bs.BeautifulSoup(resp.text, "html.parser") table = soup.find("table", {"id": "constituents"}) tickers = [] for row in table.find_all("tr")[1:]: ticker = row.find_all("td")[0].text.strip() ticker = str(ticker.replace('.', '-')) tickers.append(ticker) with open("sp500tickers.pickle", "wb") as f: pickle.dump(tickers, f) print(tickers) return tickers # Function to retrieve stock data from Yahoo Finance def get_data_from_yahoo(reload_sp500=False): if reload_sp500: tickers = save_sp500_tickers() else: with open("sp500tickers.pickle", "rb") as f: tickers = pickle.load(f) if not os.path.exists('stock_dfs'): os.makedirs('stock_dfs') start = dt.datetime(2000, 1, 1) end = dt.datetime(2016, 12, 31) for ticker in tickers: print(ticker) if not os.path.exists(f'stock_dfs/{ticker}.csv'): data = yf.download(ticker, start=start, end=end) data.to_csv(f'stock_dfs/{ticker}.csv') else: print(f'Already have {ticker}') # Function to compile the stock data into a single DataFrame import csv def compile_data(): with open("sp500tickers.pickle", "rb") as f: tickers = pickle.load(f) main_df = pd.DataFrame() for count, ticker in enumerate(tickers): with open(f'stock_dfs/{ticker.replace(".", "_")}.csv', 'r') as file: reader = csv.reader(file) next(reader) # Skip the header row df = pd.DataFrame(reader, columns=['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']) df['Date'] = pd.to_datetime(df['Date']) df.set_index('Date', inplace=True) df[['Open', 'High', 'Low', 'Close', 'Adj Close']] = df[['Open', 'High', 'Low', 'Close', 'Adj Close']].astype(float) df['Volume'] = df['Volume'].astype(object) df.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True) df.rename(columns={'Adj Close': ticker, 'Volume': f'{ticker}_Volume'}, inplace=True) if main_df.empty: main_df = df else: main_df = main_df.join(df, how='outer') if count % 10 == 0: print(count) print(main_df.head()) main_df.to_csv('sp500_joined_close.csv') # Function to visualize the correlation matrix of the stock data def visualize_data(): df = pd.read_csv('sp500_joined_close.csv') df_corr = df.corr() print(df_corr.head()) data = df_corr.values fig = plt.figure() ax = fig.add_subplot(1, 1, 1) heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn) fig.colorbar(heatmap) ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False) ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False) ax.invert_yaxis() ax.xaxis.tick_top() column_labels = df_corr.columns row_labels = df_corr.index ax.set_xticklabels(column_labels, rotation=90) ax.set_yticklabels(row_labels) plt.tight_layout() plt.show() save_sp500_tickers() get_data_from_yahoo() compile_data() visualize_data()Thank you for your helps!!!
Larz60+ write Dec-13-2023, 02:23 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future projects.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future projects.