Python Forum
I am getting a valueError. And not sure why? My goal is to visualize the correlation
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
I am getting a valueError. And not sure why? My goal is to visualize the correlation
#1
Hi all

I am just learning python. I am trying to visualize the correlation matrix of the S&P 500 stock data but I am getting a ValueError: could not convert string to float: '2000-01-03'.

Here is the coding:

import os
import pandas as pd
import requests
import pickle
import yfinance as yf
from pandas_datareader import data as pdr
import bs4 as bs
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np

# Function to save the S&P 500 tickers from the Wikipedia page

def save_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    resp = requests.get(url)
    soup = bs.BeautifulSoup(resp.text, "html.parser")
    table = soup.find("table", {"id": "constituents"})
    tickers = []

    for row in table.find_all("tr")[1:]:
        ticker = row.find_all("td")[0].text.strip()
        ticker = str(ticker.replace('.', '-'))
        tickers.append(ticker)

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)

    print(tickers)
    return tickers

# Function to retrieve stock data from Yahoo Finance
def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            tickers = pickle.load(f)

    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2000, 1, 1)
    end = dt.datetime(2016, 12, 31)

    for ticker in tickers:
        print(ticker)
        if not os.path.exists(f'stock_dfs/{ticker}.csv'):
            data = yf.download(ticker, start=start, end=end)
            data.to_csv(f'stock_dfs/{ticker}.csv')
        else:
            print(f'Already have {ticker}')

# Function to compile the stock data into a single DataFrame
import csv

def compile_data():
    with open("sp500tickers.pickle", "rb") as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        with open(f'stock_dfs/{ticker.replace(".", "_")}.csv', 'r') as file:
            reader = csv.reader(file)
            next(reader)  # Skip the header row

            df = pd.DataFrame(reader, columns=['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            df[['Open', 'High', 'Low', 'Close', 'Adj Close']] = df[['Open', 'High', 'Low', 'Close', 'Adj Close']].astype(float)
            df['Volume'] = df['Volume'].astype(object)

            df.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)
            df.rename(columns={'Adj Close': ticker, 'Volume': f'{ticker}_Volume'}, inplace=True)

            if main_df.empty:
                main_df = df
            else:
                main_df = main_df.join(df, how='outer')

        if count % 10 == 0:
            print(count)

    print(main_df.head())
    main_df.to_csv('sp500_joined_close.csv')

# Function to visualize the correlation matrix of the stock data
def visualize_data():
    df = pd.read_csv('sp500_joined_close.csv')
    df_corr = df.corr()
    print(df_corr.head())

    data = df_corr.values
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
    fig.colorbar(heatmap)
    ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False)
    ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False)
    ax.invert_yaxis()
    ax.xaxis.tick_top()

    column_labels = df_corr.columns
    row_labels = df_corr.index

    ax.set_xticklabels(column_labels, rotation=90)
    ax.set_yticklabels(row_labels)
    plt.tight_layout()
    plt.show()

save_sp500_tickers()
get_data_from_yahoo()
compile_data()
visualize_data()
Thank you for your helps!!!
Larz60+ write Dec-13-2023, 02:23 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future projects.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Goal Seek tdutcher05 1 802 Nov-17-2023, 10:33 PM
Last Post: deanhystad
  I get an FileNotFouerror while try to open(file,"rt"). My goal is to replace str decoded 1 1,413 May-06-2022, 01:44 PM
Last Post: Larz60+
  Error in find pearson correlation function erneelgupta 1 1,875 Mar-01-2022, 03:41 PM
Last Post: stevendaprano
  How to increase the size of a png picture for the heatmap of the correlation? lulu43366 9 3,538 Oct-06-2021, 04:15 PM
Last Post: deanhystad
  How to remove a column or two columns in a correlation heatmap? lulu43366 3 5,243 Sep-30-2021, 03:47 PM
Last Post: lulu43366
  Python Networkx: Visualize an edge weight with a bubble/circle uvw 0 2,010 Sep-01-2021, 06:26 AM
Last Post: uvw
  Correlation thomaschu 0 1,552 Jan-29-2020, 05:45 PM
Last Post: thomaschu
  Computing correlation in audio files ryanblumenow 0 2,778 Jan-15-2020, 06:11 PM
Last Post: ryanblumenow
  Visualize Geo Map/Calculate distance zarize 1 1,900 Dec-05-2019, 08:36 PM
Last Post: Larz60+
  I am getting an Error, and not sure why? My goal is to print out rahulne22 7 3,078 Dec-01-2019, 03:53 PM
Last Post: snippsat

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020