(Dec-21-2017, 09:10 PM)DevinGP Wrote:(Dec-21-2017, 07:33 PM)metulburr Wrote: Then it probably is using javscript and you are only left with selenium as an option.
I didnt know the results might be javascript though.
Do you mind telling me how I would implement Selenium into my current code or at least pointing me to a tutorial on someone using it to scrape the titles and summaries? Thank you!
from selenium import webdriver import time from bs4 import BeautifulSoup DRIVERPATH = '/home/metulburr/chromedriver' class Data: def __init__(self, search): self.url = 'https://www.google.com/' self.setup_driver(self.url) #self.browser.delete_all_cookies() self.search = search self.handle_search() self.get_data() time.sleep(1010000000) def get_data(self): soup = BeautifulSoup(self.browser.page_source, 'html.parser') divs = soup.find_all('div', {'class':'g'}) for div in divs: print(div.a.text) print(div.a['href']) desc = div.find('span', {'class':'st'}) print(desc.text) def handle_search(self): self.browser.find_element_by_xpath('//*[@id="lst-ib"]').click() self.browser.find_element_by_id("lst-ib").send_keys(self.search) time.sleep(1) self.browser.find_element_by_xpath('//*[@id="sbtc"]/div[2]/div[2]/div[1]/div/ul/li[7]/div/span[1]/span/input').click() time.sleep(1) def setup_driver(self, url): self.browser = webdriver.Chrome(DRIVERPATH) self.browser.set_window_position(0,0) self.browser.get(self.url) data = Data('python forum') data.browser.quit()
Recommended Tutorials: