Oct-13-2019, 03:50 AM
Hello, I am writing a small web scraper for a dataset that is publically available. My code works as long I don't put conditional loop, but when I do so it gives me a "return self.attrs[key]" error. Can someone please tell me what am I doing wrong?
The working Code:
The working Code:
import re import requests import pandas as pd from bs4 import BeautifulSoup as soup from urllib.request import urlopen as uReq my_url = 'https://www.espncricinfo.com/series/19372/scorecard/1190773/singapore-vs-malaysia-7th-match-icc-mens-t20-world-cup-asia-region-final-2019' uClient = uReq(my_url) page_html = uClient.read() uClient.close() page_soup = soup(page_html, 'html.parser') containerMain = page_soup.findAll("div", {"class": "layout-bc"}) containerMain = containerMain[0] containerColb = containerMain.findAll("div", {"class": "col-b"}) containerColb = containerColb[0] containerSC = containerColb.findAll("div", {"class": "scorecard-section batsmen"}) containerSC1 = containerSC[0] containerSC2 = containerSC[1] containerFlexi = containerSC1.findAll("div", {"class": "flex-row"}) lengthFlexi = len(containerFlexi) containerFlexl = containerSC2.findAll("div", {"class": "flex-row"}) lengthFlexl = len(containerFlexl) counter = 0 containerFlexi = containerFlexi[counter] containerWrap = containerFlexi.findAll("div", {"class": "wrap batsmen"}) containerWrap = containerWrap[counter] containerCr = containerWrap.findAll("div", {"class": "cell runs"}) containerRuns = containerCr[0] containerRuns = str(containerRuns) split_run1 = '">' + containerRuns.split('">', 1)[1] split_run2 = split_run1.split('<', 1)[0] + '<' run1 = split_run2.replace('">', "") run2 = run1.replace('<',"") runS = int(run2) containerSR = containerCr[5] containerSR = str(containerSR) split_Sr1 = '">' + containerSR.split('">', 1)[1] split_Sr2 = split_Sr1.split('<', 1)[0] + '<' Sr1 = split_Sr2.replace('">', "") Sr2 = Sr1.replace('<',"") SrS = float(Sr2) # length = len(containerRuns) print(runS) print(SrS)THE ERROR CODE AFTER I PUT A WHILE LOOP:
import re import requests import pandas as pd from bs4 import BeautifulSoup as soup from urllib.request import urlopen as uReq my_url = 'https://www.espncricinfo.com/series/19372/scorecard/1190773/singapore-vs-malaysia-7th-match-icc-mens-t20-world-cup-asia-region-final-2019' uClient = uReq(my_url) page_html = uClient.read() uClient.close() page_soup = soup(page_html, 'html.parser') containerMain = page_soup.findAll("div", {"class": "layout-bc"}) containerMain = containerMain[0] containerColb = containerMain.findAll("div", {"class": "col-b"}) containerColb = containerColb[0] containerSC = containerColb.findAll("div", {"class": "scorecard-section batsmen"}) containerSC1 = containerSC[0] containerSC2 = containerSC[1] containerFlexi = containerSC1.findAll("div", {"class": "flex-row"}) lengthFlexi = len(containerFlexi) containerFlexl = containerSC2.findAll("div", {"class": "flex-row"}) lengthFlexl = len(containerFlexl) counter = 0 while counter < lengthFlexi: containerFlexi = containerFlexi[counter] containerWrap = containerFlexi.findAll("div", {"class": "wrap batsmen"}) containerWrap = containerWrap[counter] containerCr = containerWrap.findAll("div", {"class": "cell runs"}) containerRuns = containerCr[0] containerRuns = str(containerRuns) split_run1 = '">' + containerRuns.split('">', 1)[1] split_run2 = split_run1.split('<', 1)[0] + '<' run1 = split_run2.replace('">', "") run2 = run1.replace('<',"") runS = int(run2) containerSR = containerCr[5] containerSR = str(containerSR) split_Sr1 = '">' + containerSR.split('">', 1)[1] split_Sr2 = split_Sr1.split('<', 1)[0] + '<' Sr1 = split_Sr2.replace('">', "") Sr2 = Sr1.replace('<',"") SrS = float(Sr2) counter +=1 # length = len(containerRuns) print(runS) print(SrS)How do I resolve this? Thanks!