I have a similar problem. I am trying to fetch stock data using yfinance and pandas then create a matrix for several stock symbols and associated data.
The correct format I want should look like this:
--------------------------------
The correct format I want should look like this:
Output:Symbol Close High Low Open Volume MACD MACD Signal MACD Hist ADX RSI
AAPL 243.85 249.1 241.82 248.93 55740700 -0.12 -0.1 -0.02 25.3 55.6
MSFT 418.58 426.07 414.85 425.53 16896500 -0.08 -0.07 -0.01 30.1 60.2
GOOGL 189.43 192 187.5 190.65 20370800 -0.05 -0.04 -0.01 28.7 58.9
However, the format (when data is saved to excel) created is the following with duplicated columns:Output:Symbol Close Close Close High High High Low Low Low Open Open Open Volume Volume Volume
AAPL 232.6199951 235.2299957 228.1300049 228.1999969 53677600
GOOGL 85.3200073 186.9400024 184.2799988 185.0299988 21215300
MSFT 411.4400024 412.4899902 409.2999878 409.6400146 18122500
The Price data colums duplicate and with the populated data staggerd across the colums. I have posted s the python code below, any assistance would be gratefully appreciated! --------------------------------
import talib import pandas as pd import yfinance as yf from datetime import datetime import os # Function to fetch stock data def fetch_stock_data(symbol, start_date, end_date): """ Fetches historical stock data for the given symbol and date range. """ try: data = yf.download(symbol, start=start_date, end=end_date) # Check if data is empty if data.empty: raise ValueError(f"No data found for symbol: {symbol}") # Add a column for the stock symbol data['Symbol'] = symbol return data except Exception as e: print(f"Error fetching data for {symbol}: {e}") return None # Function to calculate indicators def calculate_indicators(data): """ Calculates technical indicators using TA-Lib. """ try: # Convert Pandas Series to 1-dimensional NumPy arrays close_prices = data['Close'].values.flatten() # Ensure 1D array high_prices = data['High'].values.flatten() # Ensure 1D array low_prices = data['Low'].values.flatten() # Ensure 1D array # Calculate RSI (Relative Strength Index) with a 7-day period data['RSI'] = talib.RSI(close_prices, timeperiod=7) # Calculate daily MACD (Moving Average Convergence Divergence) data['MACD'], data['MACD Signal'], data['MACD Hist'] = talib.MACD( close_prices, fastperiod=12, slowperiod=26, signalperiod=9 ) # Calculate ADX (Average Directional Index) data['ADX'] = talib.ADX(high_prices, low_prices, close_prices, timeperiod=14) return data except Exception as e: print(f"Error calculating indicators for {data['Symbol'].iloc[0]}: {e}") return None [b]# Function to save data to Excel def save_to_excel(data_dict, filename=r"C:\Users\pmsch\OneDrive\Desktop\stock_data.xlsx"): """ Saves the stock data to an Excel file, with each date in a separate worksheet. """ try: # Create the directory if it doesn't exist os.makedirs(os.path.dirname(filename), exist_ok=True) # Save the Excel file with pd.ExcelWriter(filename, engine="openpyxl", mode="w") as writer: for date, data in data_dict.items(): # Use the date as the worksheet title sheet_name = date.strftime("%Y-%m-%d") # Create a new DataFrame with the desired layout formatted_data = pd.DataFrame() # Add tickers and their data to the DataFrame for symbol, group in data.groupby('Symbol'): # Extract the row for the current ticker row = group.iloc[0] # Assuming one row per ticker per date # Select only the required columns row = row[['Symbol', 'Close', 'High', 'Low', 'Open', 'Volume', 'MACD', 'MACD Signal', 'MACD Hist', 'ADX', 'RSI']] # Append the row to the formatted DataFrame formatted_data = pd.concat([formatted_data, row.to_frame().T], ignore_index=True) # Reset index to avoid MultiIndex columns formatted_data.reset_index(drop=True, inplace=True) # Drop any unnecessary levels if isinstance(formatted_data.columns, pd.MultiIndex): formatted_data.columns = formatted_data.columns.get_level_values(0) # Save the formatted data to the worksheet formatted_data.to_excel(writer, sheet_name=sheet_name, index=False) print(f"Data saved to {filename}") except Exception as e: print(f"Error saving data to Excel: {e}") # Main function def main(): # Define the stock symbols and date range symbols = ["AAPL", "MSFT", "GOOGL"] # Example: Apple, Microsoft, Google start_date = "2025-01-01" # Start date is January 1, 2025 end_date = datetime.today().strftime("%Y-%m-%d") # End date is today # Dictionary to store data for all symbols, grouped by date all_data_by_date = {} # Fetch and process data for each symbol for symbol in symbols: print(f"Fetching data for {symbol} from {start_date} to {end_date}...") stock_data = fetch_stock_data(symbol, start_date, end_date) if stock_data is not None: # Calculate indicators stock_data_with_indicators = calculate_indicators(stock_data) if stock_data_with_indicators is not None: # Group data by date and add to the dictionary for date, group in stock_data_with_indicators.groupby(stock_data_with_indicators.index.date): if date not in all_data_by_date: all_data_by_date[date] = [] all_data_by_date[date].append(group) print(f"Data for {symbol} processed successfully.") else: print(f"No indicators calculated for {symbol}.") else: print(f"No data fetched for {symbol}.") # Combine data for each date into a single DataFrame combined_data_by_date = {} for date, data_list in all_data_by_date.items(): combined_data_by_date[date] = pd.concat(data_list) # Reset index to avoid MultiIndex columns combined_data_by_date[date].reset_index(drop=True, inplace=True) # Drop any unnecessary levels if isinstance(combined_data_by_date[date].columns, pd.MultiIndex): combined_data_by_date[date].columns = combined_data_by_date[date].columns.get_level_values(0) # Save all data to Excel if combined_data_by_date: save_to_excel(combined_data_by_date) else: print("No data to save.") if __name__ == "__main__": main()