Aug-06-2018, 09:01 AM
i am scraping a website who has multiple pages and using javascript like this one
<td><a href="javascript:__doPostBack('gv_AgentList1','Page$2')">2</a></td><td><a href="javascript:__doPostBack('gv_AgentList1','Page$3')">3</a></td>here they change pages and this effect reflect in the __EVENTARGUMENT in the DOM like this
__EVENTARGUMENT:Page$2I tried to loop over it but received same first page results multiple times. Can anyone help me.Below are my code:
from bs4 import BeautifulSoup import requests import csv import sqlite3 url = "https://rera.cgstate.gov.in/" final_data = [] def getdatabyget(url,values): res = requests.get(url,values) text = res.text return text def readheaders(): global url, final_data for i in range(1, 4): argument = "Page$"+ str(i+1) htmldata = getdatabyget(url, {}) soup = BeautifulSoup(htmldata, "html.parser") EVENTVALIDATION = soup.select("#__EVENTVALIDATION")[0]['value'] VIEWSTATE = soup.select("#__VIEWSTATE")[0]['value'] headers= {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type':'application/x-www-form-urlencoded', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0'} formfields = {"__ASYNCPOST":"true", "__EVENTARGUMENT":argument, "__EVENTTARGET":"gv_AgentList1", "__EVENTVALIDATION":EVENTVALIDATION, "__LASTFOCUS":"", "__VIEWSTATE":VIEWSTATE, "ApplicantType":"0", "Button1":"Search", "color_value":"0", "District_Name":"0", "DropDownList1":"0", "DropDownList2":"0", "DropDownList4":"0", "DropDownList5":"0", "group1":"on", "hdnSelectedOption":"0", "hdnSelectedOptionForContractor":"0", "language_value":"0", "Mobile":"", "Tehsil_Name":"0", "TextBox1":"", "TextBox2":"", "TextBox3":"", "TextBox4":"", "TextBox5":"", "TextBox6":"", "ToolkitScriptManager1":"appr1|Button1", "txt_otp":"", "txt_proj_name":"", "txtRefNo":"", "txtRefNoForContractor":""} s = requests.session() res = s.post(url, data=formfields, headers=headers).text soup = BeautifulSoup(res, "html.parser") data = soup.find_all("table")[0] gettr = data.find_all("tr")[1:-2] for i in gettr: add_list = [] blank = "" projectname = i.find_all("td")[0].text reranumber = i.find_all("td")[1].text.replace(" ","") Authorised = i.find_all("td")[2].text.replace("\n","") promoternme = i.find_all("td")[3].text.replace("\n","") projecttype = i.find_all("td")[4].text.replace("\n","") district = i.find_all("td")[5].text.replace("\n","") tehsil = i.find_all("td")[6].text.replace("\n","") approveddate = i.find_all("td")[7].text.replace("\n","") enddate = i.find_all("td")[8].text.replace("\n","") add_list.append(projectname) print(add_list) readheaders()The above is the code. How can i solve this matter . Please do enlight.