Failure in web scraping by Beautiful Soup

<html> <head> <script> Challenge=305158; ChallengeId=395740057; GenericErrorMessageCookies="Cookies must be enabled in order to view this page."; </script> <script> function test(var1) { var var_str=""+Challenge; var var_arr=var_str.split(""); var LastDig=var_arr.reverse()[0]; var minDig=var_arr.sort()[0]; var subvar1 = (2 * (var_arr[2]))+(var_arr[1]*1); var subvar2 = (2 * var_arr[2])+var_arr[1]; var my_pow=Math.pow(((var_arr[0]*1)+2),var_arr[1]); var x=(var1*3+subvar1)*1; var y=Math.cos(Math.PI*subvar2); var answer=x*y; answer-=my_pow*1; answer+=(minDig*1)-(LastDig*1); answer=answer+subvar2; return answer; } </script> <script> client = null; if (window.XMLHttpRequest) { var client=new XMLHttpRequest(); } else { if (window.ActiveXObject) { client = new ActiveXObject('MSXML2.XMLHTTP.3.0'); }; } if (!((!!client)&&(!!Math.pow)&&(!!Math.cos)&&(!![].sort)&&(!![].reverse))) { document.write("Not all needed JavaScript methods are supported.<BR>"); } else { client.onreadystatechange = function() { if(client.readyState == 4) { var MyCookie=client.getResponseHeader("X-AA-Cookie-Value"); if ((MyCookie == null) || (MyCookie=="")) { document.write(client.responseText); return; } var cookieName = MyCookie.split('=')[0]; if (document.cookie.indexOf(cookieName)==-1) { document.write(GenericErrorMessageCookies); return; } window.location.reload(true); } }; y=test(Challenge); client.open("POST",window.location,true); client.setRequestHeader('X-AA-Challenge-ID', ChallengeId); client.setRequestHeader('X-AA-Challenge-Result',y); client.setRequestHeader('X-AA-Challenge',Challenge); client.setRequestHeader('Content-Type' , 'text/plain'); client.send(); } </script> </head> <body> <noscript>JavaScript must be enabled in order to view this page.</noscript> </body> </html>

from urllib.request import urlopen from bs4 import BeautifulSoup import requests import pandas as pd import xlsxwriter import re import os ## Scrapping all racing day on the site race_day_place = 'HV' race_day_url='https://racing.hkjc.com/racing/info/meeting/Results/English/Local/' race_day_url_content = requests.get(race_day_url) race_day_url_content.encoding = 'utf-8' race_day_url_html_content = race_day_url_content.text race_day_soup = BeautifulSoup(race_day_url_html_content, 'lxml') race_day_soup2 = race_day_soup.find('div', class_="rowDiv5") race_day = race_day_soup2.find('td', class_="tdAlignR") options = race_day.find_all("option", {'value':re.compile('^Local')} ) raceday = options[1:] jc_raceday_list = [] for each in raceday: value = each.text jc_raceday_list.append(value) ## Scrapping all racing day in my folder jay_raceday = os.listdir('C://AnyDirectory') jay_raceday2 = [] for eachfile in jay_raceday: os.path.splitext(eachfile)[0] jay_raceday2.append(eachfile[0:10]) jay_raceday3 = [d[8:10]+"/"+d[5:7]+"/"+d[:4] for d in jay_raceday2] ## Identify the difference above and append it in a list daydeviation = [] for day in jc_raceday_list: if day not in jay_raceday3: daydeviation.append(day) ## Convert into appropriate format for each_deviation in daydeviation: each_deviation = [d[6:10]+d[3:5]+d[0:2] for d in daydeviation] ## Looping all missing racing day for deviation in each_deviation: ## Scrapping entries data booklet_name = deviation[0:4]+'-'+deviation[4:6]+'-'+deviation[6:9] entries_race_place = 'HV' entries_url = 'http://racing.hkjc.com/racing/info/meeting/Entries/English/Local/'+deviation+'/'+entries_race_place entries_request = requests.get(entries_url) entries_request.encoding = 'utf-8' entries_request_html_content = entries_request.text entries_soup = BeautifulSoup(entries_request_html_content, 'lxml') entries_table = entries_soup.find('table', class_='col_12') if entries_table is None: entries_race_place = 'ST' entries_url = 'http://racing.hkjc.com/racing/info/meeting/Entries/English/Local/'+deviation+'/'+entries_race_place entries_request = requests.get(entries_url) entries_request.encoding = 'utf-8' entries_request_html_content = entries_request.text entries_soup = BeautifulSoup(entries_request_html_content, 'lxml') entries_table = entries_soup.find('table', class_='col_12') if entries_table: entries_trs = entries_table.find_all('tr') entries_content = [] for entries_tr in entries_trs[6:]: for entries_td2 in entries_tr.find_all('td', {'class': ['alignL2', 'alignL2-grey']}): entries_content.append(entries_td2.text.strip('\n\r\t": ')) writer = pd.ExcelWriter('C:\\AnyDirectory\\'+booklet_name+'.xlsx', engine='xlsxwriter') ## Scrapping all the result for page in range (1,13): result_race_place = 'HV' result_url = 'http://racing.hkjc.com/racing/info/meeting/Results/English/Local/'+deviation+'/'+result_race_place+'/'+str(page) result_request = requests.get(result_url) result_request.encoding = 'utf-8' result_html_content = result_request.text result_soup = BeautifulSoup(result_html_content, 'lxml') result_table = result_soup.find('table', class_='tableBorder trBgBlue tdAlignC number12 draggable') if result_table is None: result_race_place = 'ST' result_url = 'http://racing.hkjc.com/racing/info/meeting/Results/English/Local/'+deviation+'/'+result_race_place+'/'+str(page) result_request = requests.get(result_url) result_request.encoding = 'utf-8' result_html_content = result_request.text result_soup = BeautifulSoup(result_html_content, 'lxml') result_table = result_soup.find('table', class_='tableBorder trBgBlue tdAlignC number12 draggable') if result_table: hds = result_soup.find('thead') if hds: headers = [] for hds_td in hds.find_all('td'): headers.append(hds_td.text.strip('\n\r\t": ')) headers += ['Ace'] result_content = [] result_row = [] result_trs = result_table.find_all('tr', {'class': ['trBgGrey', 'trBgWhite']}) for result_tr in result_trs: result_tds = result_tr.find_all('td', {'nowrap': 'nowrap'}) for result_td in result_tds: result_row.append(result_td.text.strip('\n\r\t": ')) result_content.append(result_row) result_row = [] for each_result in result_content: new_result = each_result[2].split(sep='(')[0] for that in entries_content: if new_result in that and ('+' or '*' or '#') in that: answer = that.split(sep=new_result)[1][1] if answer.isdigit(): ace = '-' else: ace = answer each_result.append(ace) elif new_result in that and ('+' or '*' or '#') not in that: ace = '-' each_result.append(ace) if len(each_result) > 13: del each_result[-1] df = pd.DataFrame(result_content, columns=headers) df.to_excel(writer, sheet_name='Race'+str(page)) else: continue

from selenium import webdriver race_day_url='https://racing.hkjc.com/racing/info/meeting/Results/English/Local/' browser = webdriver.Firefox() browser.get(race_day_url) time.sleep(3) print(browser.page_source)

Hide/Show

Output:...

<div class="content"><a class="first" href="//www.hkjc.com/english/corporate/faq.asp" target="_blank">FAQ</a> | <a href="javascript:NewWindow('//www.hkjc.com/infomenu/en/contact/feedback.aspx','mypopup','770','550','yes','yes');">Contact Us</a> | <a href="javascript:NewWindow('//mcs.hkjc.com/','mypopup','770','550','yes','yes');">Media Communication System</a> | <a href="//www.hkjc.com/english/corporate/corp_sitemap.asp">Site Map</a> | <a href="javascript:NewWindow('//www.hkjc.com/english/betting/betting_rule.htm','mypopup','770','550','yes','yes');">Rules </a>| <a href="//www.hkjc.com/responsible-gambling/en/index.aspx" target="_blank">Responsible Gambling Policy</a> | <a href="//www.hkjc.com/english/corporate/corp_privacy.asp">Privacy</a> | <a href="//www.hkjc.com/english/corporate/corp_disclaimer.asp">Disclaimer</a> | <a href="//www.hkjc.com/home/english/corporate/security-tips.aspx">Security Tips</a><br></div>
<div class="copyright">Copyright © 2000-2019 The Hong Kong Jockey Club. All rights reserved.</div>
</div></div><script src="//common.hkjc.com/wa/wa_tracker.js" type="text/javascript"></script><script src="//common.hkjc.com/wa/wa_tracker_config.js" type="text/javascript"></script><script type="text/javascript">
		var waTaggingFired = false;

		var firedfunc = function() {
			if(waTaggingFired){
				return;
			}
						
			//Original WA tagging logic goes here
			
					try {
						WATracker.trackPageView('racing-content');
					} catch (e) {}
						
		   
			waTaggingFired = true;
		};

		if ( typeof(SSO) == 'undefined') {
		}
		else {
			SSO.OnUserProfileLoaded(firedfunc);
		}
	</script>

            
        </footer>
        
    

<iframe sandbox="allow-scripts allow-same-origin" title="Adobe ID Syncing iFrame" id="destination_publishing_iframe_hkjcweb_0" name="destination_publishing_iframe_hkjcweb_0_name" style="display: none; width: 0px; height: 0px;" src="https://hkjcweb.demdex.net/dest5.html?d_nsid=0#https%3A%2F%2Fracing.hkjc.com%2Fracing%2Finformation%2FEnglish%2Fracing%2FLocalResults.aspx" class="aamIframeLoaded"></iframe></body></html>