[split] Newbie needs help

Schoe1 · (This post was last modified: Feb-13-2025, 03:42 AM by buran.)

I have a similar problem. I am trying to fetch stock data using yfinance and pandas then create a matrix for several stock symbols and associated data.
The correct format I want should look like this:

Output:Symbol	    Close	High	     Low	     Open	Volume	    MACD	MACD Signal	MACD Hist	ADX	RSI
AAPL	        243.85	249.1	241.82	248.93	55740700	-0.12  	-0.1	            -0.02	        25.3	55.6
MSFT	    418.58	426.07	414.85	425.53	16896500	-0.08	    -0.07         	-0.01	        30.1	60.2
GOOGL	    189.43	192	    187.5	190.65	20370800	-0.05  	-0.04         	-0.01	        28.7	58.9

However, the format (when data is saved to excel) created is the following with duplicated columns:

Output:Symbol	Close	Close	Close	High	  High	High	  Low	  Low  	Low	  Open  	Open	Open	Volume  	Volume	    Volume
AAPL	   232.6199951			       235.2299957			  228.1300049		      228.1999969		        	53677600		
GOOGL			                85.3200073		    	186.9400024			     184.2799988			         185.0299988			                 21215300
MSFT		        411.4400024		        412.4899902			      409.2999878			    409.6400146		            	18122500

The Price data colums duplicate and with the populated data staggerd across the colums. I have posted s the python code below, any assistance would be gratefully appreciated!

--------------------------------

import talib
import pandas as pd
import yfinance as yf
from datetime import datetime
import os

# Function to fetch stock data
def fetch_stock_data(symbol, start_date, end_date):
    """
    Fetches historical stock data for the given symbol and date range.
    """
    try:
        data = yf.download(symbol, start=start_date, end=end_date)
        
        # Check if data is empty
        if data.empty:
            raise ValueError(f"No data found for symbol: {symbol}")
        
        # Add a column for the stock symbol
        data['Symbol'] = symbol
        
        return data
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

# Function to calculate indicators
def calculate_indicators(data):
    """
    Calculates technical indicators using TA-Lib.
    """
    try:
        # Convert Pandas Series to 1-dimensional NumPy arrays
        close_prices = data['Close'].values.flatten()  # Ensure 1D array
        high_prices = data['High'].values.flatten()    # Ensure 1D array
        low_prices = data['Low'].values.flatten()      # Ensure 1D array

        # Calculate RSI (Relative Strength Index) with a 7-day period
        data['RSI'] = talib.RSI(close_prices, timeperiod=7)

        # Calculate daily MACD (Moving Average Convergence Divergence)
        data['MACD'], data['MACD Signal'], data['MACD Hist'] = talib.MACD(
            close_prices, fastperiod=12, slowperiod=26, signalperiod=9
        )

        # Calculate ADX (Average Directional Index)
        data['ADX'] = talib.ADX(high_prices, low_prices, close_prices, timeperiod=14)

        return data
    except Exception as e:
        print(f"Error calculating indicators for {data['Symbol'].iloc[0]}: {e}")
        return None

[b]# Function to save data to Excel
def save_to_excel(data_dict, filename=r"C:\Users\pmsch\OneDrive\Desktop\stock_data.xlsx"):
    """
    Saves the stock data to an Excel file, with each date in a separate worksheet.
    """
    try:
        # Create the directory if it doesn't exist
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        
        # Save the Excel file
        with pd.ExcelWriter(filename, engine="openpyxl", mode="w") as writer:
            for date, data in data_dict.items():
                # Use the date as the worksheet title
                sheet_name = date.strftime("%Y-%m-%d")
                
                # Create a new DataFrame with the desired layout
                formatted_data = pd.DataFrame()

                # Add tickers and their data to the DataFrame
                for symbol, group in data.groupby('Symbol'):
                    # Extract the row for the current ticker
                    row = group.iloc[0]  # Assuming one row per ticker per date
                    
                    # Select only the required columns
                    row = row[['Symbol', 'Close', 'High', 'Low', 'Open', 'Volume', 'MACD', 'MACD Signal', 'MACD Hist', 'ADX', 'RSI']]
                    
                    # Append the row to the formatted DataFrame
                    formatted_data = pd.concat([formatted_data, row.to_frame().T], ignore_index=True)
                
                # Reset index to avoid MultiIndex columns
                formatted_data.reset_index(drop=True, inplace=True)
                
                # Drop any unnecessary levels
                if isinstance(formatted_data.columns, pd.MultiIndex):
                    formatted_data.columns = formatted_data.columns.get_level_values(0)
                
                # Save the formatted data to the worksheet
                formatted_data.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"Data saved to {filename}")
    except Exception as e:
        print(f"Error saving data to Excel: {e}")

# Main function
def main():
    # Define the stock symbols and date range
    symbols = ["AAPL", "MSFT", "GOOGL"]  # Example: Apple, Microsoft, Google
    start_date = "2025-01-01"  # Start date is January 1, 2025
    end_date = datetime.today().strftime("%Y-%m-%d")  # End date is today

    # Dictionary to store data for all symbols, grouped by date
    all_data_by_date = {}

    # Fetch and process data for each symbol
    for symbol in symbols:
        print(f"Fetching data for {symbol} from {start_date} to {end_date}...")
        stock_data = fetch_stock_data(symbol, start_date, end_date)
        
        if stock_data is not None:
            # Calculate indicators
            stock_data_with_indicators = calculate_indicators(stock_data)
            
            if stock_data_with_indicators is not None:
                # Group data by date and add to the dictionary
                for date, group in stock_data_with_indicators.groupby(stock_data_with_indicators.index.date):
                    if date not in all_data_by_date:
                        all_data_by_date[date] = []
                    all_data_by_date[date].append(group)
                print(f"Data for {symbol} processed successfully.")
            else:
                print(f"No indicators calculated for {symbol}.")
        else:
            print(f"No data fetched for {symbol}.")

    # Combine data for each date into a single DataFrame
    combined_data_by_date = {}
    for date, data_list in all_data_by_date.items():
        combined_data_by_date[date] = pd.concat(data_list)
        # Reset index to avoid MultiIndex columns
        combined_data_by_date[date].reset_index(drop=True, inplace=True)
        # Drop any unnecessary levels
        if isinstance(combined_data_by_date[date].columns, pd.MultiIndex):
            combined_data_by_date[date].columns = combined_data_by_date[date].columns.get_level_values(0)

    # Save all data to Excel
    if combined_data_by_date:
        save_to_excel(combined_data_by_date)
    else:
        print("No data to save.")

if __name__ == "__main__":
    main()

buran write Feb-13-2025, 03:42 AM:
Split from https://python-forum.io/thread-43923.html

[split] Newbie needs help

User Panel Messages

Announcements