Hi,
Beautiful Soup wont be best for this approach. Because, as soon as when you hit the URL then csv file downloaded to the default download location and there is no option to capture the page data.
my approach is need to call the url by passing the required parameters and downloaded csv file content to be captured in the dataframe.
Thanks for shaing the two web crawling link.
Any alternate approach would be better.
Thanks
/* Python Script */
import requests
import pandas as pd
from datetime import date, timedelta
import urllib as u
import wget
pd.options.display.width = 1500
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 75
url = "https://www.nseindia.com/api/equity-stockIndices?index=SECURITIES%20IN%20F%26O"
headers = { "user-agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" , "accept-encoding" : "gzip, deflate", "accept-language" : "en-US,en;q=0.9"}
cookie_dict = { 'bm_sv' : "93ECE40F315004D8086198FE4F2FAAFF~hVZZVk7LTIm/q1Wmp/HrMN12nAXZSE0FQZm7ForAw2DCWqAc5GZWhYxDtHCMq3S2X6HmPBhbmqHEUMA348kdDtLmFV4lizcCvxdC+xcKgBJ6B5AhDKN9UOlU2/kL0xbFNicj1pv6n9ezYv1PPSoOEL35C/FD6R7rFRz1qHWlRVc"}
def prevMonth_Fist_Last_Date ():
last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=1)
start_day_of_prev_month = date.today().replace(day=1) - timedelta(days=last_day_of_prev_month.day)
last_day_of_prev_month = last_day_of_prev_month.strftime("%d-%m-%Y")
start_day_of_prev_month = start_day_of_prev_month.strftime(
def downloadHistoricalData(ind):
fdate, ldate = prevMonth_Fist_Last_Date ()
session = requests.session()
for cookie in cookie_dict:
session.cookies.set(cookie, cookie_dict[cookie])
url = "https://www.nseindia.com/api/historical/cm/equity?symbol=" + ind + "&series=[%22EQ%22]&from=" + fdate + "&to=" + ldate + "&csv=true"
r = requests.get(url, allow_redirects=True, data={'download_open': 'Download', 'format_open': '.csv'})
redirect = requests.get(url).content
print(redirect)
print(url)
print (r)
print(downloadHistoricalData('Infy')