Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
[split] Newbie needs help
#1
I have a similar problem. I am trying to fetch stock data using yfinance and pandas then create a matrix for several stock symbols and associated data.
The correct format I want should look like this:
Output:
Symbol Close High Low Open Volume MACD MACD Signal MACD Hist ADX RSI AAPL 243.85 249.1 241.82 248.93 55740700 -0.12 -0.1 -0.02 25.3 55.6 MSFT 418.58 426.07 414.85 425.53 16896500 -0.08 -0.07 -0.01 30.1 60.2 GOOGL 189.43 192 187.5 190.65 20370800 -0.05 -0.04 -0.01 28.7 58.9
However, the format (when data is saved to excel) created is the following with duplicated columns:
Output:
Symbol Close Close Close High High High Low Low Low Open Open Open Volume Volume Volume AAPL 232.6199951 235.2299957 228.1300049 228.1999969 53677600 GOOGL 85.3200073 186.9400024 184.2799988 185.0299988 21215300 MSFT 411.4400024 412.4899902 409.2999878 409.6400146 18122500
The Price data colums duplicate and with the populated data staggerd across the colums. I have posted s the python code below, any assistance would be gratefully appreciated!

--------------------------------
import talib
import pandas as pd
import yfinance as yf
from datetime import datetime
import os

# Function to fetch stock data
def fetch_stock_data(symbol, start_date, end_date):
    """
    Fetches historical stock data for the given symbol and date range.
    """
    try:
        data = yf.download(symbol, start=start_date, end=end_date)
        
        # Check if data is empty
        if data.empty:
            raise ValueError(f"No data found for symbol: {symbol}")
        
        # Add a column for the stock symbol
        data['Symbol'] = symbol
        
        return data
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

# Function to calculate indicators
def calculate_indicators(data):
    """
    Calculates technical indicators using TA-Lib.
    """
    try:
        # Convert Pandas Series to 1-dimensional NumPy arrays
        close_prices = data['Close'].values.flatten()  # Ensure 1D array
        high_prices = data['High'].values.flatten()    # Ensure 1D array
        low_prices = data['Low'].values.flatten()      # Ensure 1D array

        # Calculate RSI (Relative Strength Index) with a 7-day period
        data['RSI'] = talib.RSI(close_prices, timeperiod=7)

        # Calculate daily MACD (Moving Average Convergence Divergence)
        data['MACD'], data['MACD Signal'], data['MACD Hist'] = talib.MACD(
            close_prices, fastperiod=12, slowperiod=26, signalperiod=9
        )

        # Calculate ADX (Average Directional Index)
        data['ADX'] = talib.ADX(high_prices, low_prices, close_prices, timeperiod=14)

        return data
    except Exception as e:
        print(f"Error calculating indicators for {data['Symbol'].iloc[0]}: {e}")
        return None

[b]# Function to save data to Excel
def save_to_excel(data_dict, filename=r"C:\Users\pmsch\OneDrive\Desktop\stock_data.xlsx"):
    """
    Saves the stock data to an Excel file, with each date in a separate worksheet.
    """
    try:
        # Create the directory if it doesn't exist
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        
        # Save the Excel file
        with pd.ExcelWriter(filename, engine="openpyxl", mode="w") as writer:
            for date, data in data_dict.items():
                # Use the date as the worksheet title
                sheet_name = date.strftime("%Y-%m-%d")
                
                # Create a new DataFrame with the desired layout
                formatted_data = pd.DataFrame()

                # Add tickers and their data to the DataFrame
                for symbol, group in data.groupby('Symbol'):
                    # Extract the row for the current ticker
                    row = group.iloc[0]  # Assuming one row per ticker per date
                    
                    # Select only the required columns
                    row = row[['Symbol', 'Close', 'High', 'Low', 'Open', 'Volume', 'MACD', 'MACD Signal', 'MACD Hist', 'ADX', 'RSI']]
                    
                    # Append the row to the formatted DataFrame
                    formatted_data = pd.concat([formatted_data, row.to_frame().T], ignore_index=True)
                
                # Reset index to avoid MultiIndex columns
                formatted_data.reset_index(drop=True, inplace=True)
                
                # Drop any unnecessary levels
                if isinstance(formatted_data.columns, pd.MultiIndex):
                    formatted_data.columns = formatted_data.columns.get_level_values(0)
                
                # Save the formatted data to the worksheet
                formatted_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Data saved to {filename}")
    except Exception as e:
        print(f"Error saving data to Excel: {e}")

# Main function
def main():
    # Define the stock symbols and date range
    symbols = ["AAPL", "MSFT", "GOOGL"]  # Example: Apple, Microsoft, Google
    start_date = "2025-01-01"  # Start date is January 1, 2025
    end_date = datetime.today().strftime("%Y-%m-%d")  # End date is today

    # Dictionary to store data for all symbols, grouped by date
    all_data_by_date = {}

    # Fetch and process data for each symbol
    for symbol in symbols:
        print(f"Fetching data for {symbol} from {start_date} to {end_date}...")
        stock_data = fetch_stock_data(symbol, start_date, end_date)
        
        if stock_data is not None:
            # Calculate indicators
            stock_data_with_indicators = calculate_indicators(stock_data)
            
            if stock_data_with_indicators is not None:
                # Group data by date and add to the dictionary
                for date, group in stock_data_with_indicators.groupby(stock_data_with_indicators.index.date):
                    if date not in all_data_by_date:
                        all_data_by_date[date] = []
                    all_data_by_date[date].append(group)
                print(f"Data for {symbol} processed successfully.")
            else:
                print(f"No indicators calculated for {symbol}.")
        else:
            print(f"No data fetched for {symbol}.")

    # Combine data for each date into a single DataFrame
    combined_data_by_date = {}
    for date, data_list in all_data_by_date.items():
        combined_data_by_date[date] = pd.concat(data_list)
        # Reset index to avoid MultiIndex columns
        combined_data_by_date[date].reset_index(drop=True, inplace=True)
        # Drop any unnecessary levels
        if isinstance(combined_data_by_date[date].columns, pd.MultiIndex):
            combined_data_by_date[date].columns = combined_data_by_date[date].columns.get_level_values(0)

    # Save all data to Excel
    if combined_data_by_date:
        save_to_excel(combined_data_by_date)
    else:
        print("No data to save.")

if __name__ == "__main__":
    main()
buran write Feb-13-2025, 03:42 AM:
Split from https://python-forum.io/thread-43923.html
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020