(Jun-18-2018, 08:24 PM)ljmetzger Wrote: You can get more data, but it seems like you have to limit the amount of data at one time to 60 day chunks. The following example is the equivalent of (January 3, 2018 thru June 12, 2018) the manual URL of: https://finance.yahoo.com/quote/AAPL/his...equency=1d Significant points: a. The package monthdelta is required (which you probably do not have installed). To install from the Windows cmd.exe (or equivalent) command line (or Linux equivalent):pip install monthdelta
b. The code is similar to the code that I previously posted. Epoch (seconds) calculation was moved into function get_ticker(). c. The following code snippet was used to iterate through the dates (maximum of two months at a time) and also to concatenate the data frame from get_ticker() into one large dataframe:iteration_number = 0 while date1 <= end_date: iteration_number += 1 # Create 'date2' in a 60 day Window or less # Start 'date2' two months from 'date1' # Change the 'day of the month' to the 1st day of the month # Subtract 'one day' to change the 1st day of the month, into the last day of the previous month date2 = date1 + monthdelta.monthdelta(2) date2 = datetime.date(date2.year, date2.month, 1) date2 = date2 - datetime.timedelta(days=1) # Do not allow 'date2' to go beyond the 'End Date' if date2 > end_date: date2 = end_date print("Processing {} thru {}.".format(date1, date2)) stock_symbol = 'AAPL' df = get_ticker(stock_symbol, date1, date2) if iteration_number == 1: dfall = df.copy() else: frames = [dfall, df] dfall = pd.concat(frames) # # # print(dfall) # # # print("len of dfall = {}".format(len(dfall))) # Increment the first date for the next pass date1 = date1 + monthdelta.monthdelta(2) date1 = datetime.date(date1.year, date1.month, 1)import bs4 as bs import urllib.request import pandas as pd import time import datetime import monthdelta def get_ticker(ticker, date1, date2): format_string='%Y-%m-%d %H:%M:%S' # One day (86400 second) adjustment required to get dates printed to match web site manual output _date1 = date1.strftime("%Y-%m-%d 00:00:00") date1_epoch = str(int(time.mktime(time.strptime(_date1, format_string)))- 86400) print("") print(date1, date1_epoch, " + 86,400 = ", str(int(date1_epoch) + 86400)) _date2 = date2.strftime("%Y-%m-%d 00:00:00") date2_epoch = str(int(time.mktime(time.strptime(_date2, format_string)))) print(date2, date2_epoch) url = 'https://finance.yahoo.com/quote/' + ticker + '/history?period1=' + date1_epoch + '&period2=' + date2_epoch + '&interval=1d&filter=history&frequency=1d' source = urllib.request.urlopen(url).read() soup =bs.BeautifulSoup(source,'lxml') tr = soup.find_all('tr') data = [] for table in tr: td = table.find_all('td') row = [i.text for i in td] data.append(row) columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'] data = data[1:-2] df = pd.DataFrame(data) df.columns = columns df.set_index(columns[0], inplace=True) df = df.convert_objects(convert_numeric=True) df = df.iloc[::-1] df.dropna(inplace=True) return df # January 3, 2018 = 1514955600 (seconds since UNIX epoch in 1970) # June 12, 2018 = 1528776000 # https://finance.yahoo.com/quote/AAPL/history?period1=1514955600&period2=1528776000&interval=1d&filter=history&frequency=1d print("") print("") start_date = datetime.date(2018, 1, 3) end_date = datetime.date(2018, 6, 12) today = datetime.date.today() # The statements in this group are for debugging purposes only format_string='%Y-%m-%d %H:%M:%S' t1 = start_date.strftime("%Y-%m-%d 00:00:00") t2 = end_date.strftime("%Y-%m-%d 00:00:00") start_date_epoch = str(int(time.mktime(time.strptime(t1, format_string)))) end_date_epoch = str(int(time.mktime(time.strptime(t2,format_string)))) # Output all 'original' dates print('Today :', today) print('Start Date:', start_date, 'Start Date Epoch:', start_date_epoch) print('End Date:', end_date, 'End Date Epoch:', end_date_epoch) # Initialize 'date1' date1 = start_date # Initialize 'date1' date1 = start_date # Do not allow the 'End Date' to be AFTER today if today < end_date: end_date = today iteration_number = 0 while date1 <= end_date: iteration_number += 1 # Create 'date2' in a 60 day Window or less date2 = date1 + monthdelta.monthdelta(2) date2 = datetime.date(date2.year, date2.month, 1) date2 = date2 - datetime.timedelta(days=1) # Do not allow 'date2' to go beyond the 'End Date' if date2 > end_date: date2 = end_date print("Processing {} thru {}.".format(date1, date2)) stock_symbol = 'AAPL' df = get_ticker(stock_symbol, date1, date2) if iteration_number == 1: dfall = df.copy() else: frames = [dfall, df] dfall = pd.concat(frames) # # # print(dfall) # # # print("len of dfall = {}".format(len(dfall))) # Increment the first date for the next pass date1 = date1 + monthdelta.monthdelta(2) date1 = datetime.date(date1.year, date1.month, 1) print(dfall) print("len of dfall = {}".format(len(dfall)))Lewis
that`s awesome! thanks so much for spending time on this. The only thing I changed for now is that I omitted the monthdelta library. I'm using anaconda and I tried to conda install it but that didn't work. instead I used datetime.timedelta which seems to do the trick. Below the full code with my changes FYI.
import bs4 as bs import urllib.request import pandas as pd import time import datetime def get_ticker(ticker, date1, date2): format_string='%Y-%m-%d %H:%M:%S' # One day (86400 second) adjustment required to get dates printed to match web site manual output _date1 = date1.strftime("%Y-%m-%d 00:00:00") date1_epoch = str(int(time.mktime(time.strptime(_date1, format_string)))- 86400) print("") print(date1, date1_epoch, " + 86,400 = ", str(int(date1_epoch) + 86400)) _date2 = date2.strftime("%Y-%m-%d 00:00:00") date2_epoch = str(int(time.mktime(time.strptime(_date2, format_string)))) print(date2, date2_epoch) url = 'https://finance.yahoo.com/quote/' + ticker + '/history?period1=' + date1_epoch + '&period2=' + date2_epoch + '&interval=1d&filter=history&frequency=1d' source = urllib.request.urlopen(url).read() soup =bs.BeautifulSoup(source,'lxml') tr = soup.find_all('tr') data = [] for table in tr: td = table.find_all('td') row = [i.text for i in td] data.append(row) columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'] data = data[1:-2] df = pd.DataFrame(data) df.columns = columns df.set_index(columns[0], inplace=True) df = df.convert_objects(convert_numeric=True) df = df.iloc[::-1] df.dropna(inplace=True) return df # January 3, 2018 = 1514955600 (seconds since UNIX epoch in 1970) # June 12, 2018 = 1528776000 # https://finance.yahoo.com/quote/AAPL/history?period1=1514955600&period2=1528776000&interval=1d&filter=history&frequency=1d print("") print("") start_date = datetime.date(2005, 1, 3) end_date = datetime.date(2018, 6, 12) today = datetime.date.today() # The statements in this group are for debugging purposes only format_string='%Y-%m-%d %H:%M:%S' t1 = start_date.strftime("%Y-%m-%d 00:00:00") t2 = end_date.strftime("%Y-%m-%d 00:00:00") start_date_epoch = str(int(time.mktime(time.strptime(t1, format_string)))) end_date_epoch = str(int(time.mktime(time.strptime(t2,format_string)))) # Output all 'original' dates print('Today :', today) print('Start Date:', start_date, 'Start Date Epoch:', start_date_epoch) print('End Date:', end_date, 'End Date Epoch:', end_date_epoch) # Initialize 'date1' date1 = start_date # Initialize 'date1' date1 = start_date # Do not allow the 'End Date' to be AFTER today if today < end_date: end_date = today iteration_number = 0 while date1 <= end_date: iteration_number += 1 # Create 'date2' in a 60 day Window or less date2 = date1 + datetime.timedelta(days=60) date2 = datetime.date(date2.year, date2.month, 1) date2 = date2 - datetime.timedelta(days=1) # Do not allow 'date2' to go beyond the 'End Date' if date2 > end_date: date2 = end_date print("Processing {} thru {}.".format(date1, date2)) stock_symbol = 'AAPL' df = get_ticker(stock_symbol, date1, date2) if iteration_number == 1: dfall = df.copy() else: frames = [dfall, df] dfall = pd.concat(frames) # # # print(dfall) # # # print("len of dfall = {}".format(len(dfall))) # Increment the first date for the next pass date1 = date1 + datetime.timedelta(days=60) date1 = datetime.date(date1.year, date1.month, 1) print(dfall) print("len of dfall = {}".format(len(dfall)))