Dec-31-2018, 05:18 PM
I am trying to scrape a web page that's giving me a bit of trouble.
There is no issue loading first and second pages, but then the
second page has a search box which has a different XPath each time called.
I am able to find the search box by searching for the <input> tag on the page.
The problem lies in trying to enter text into the search box and getting result.
Here's an almost working (except for above issue) snippet the code:
There is no issue loading first and second pages, but then the
second page has a search box which has a different XPath each time called.
I am able to find the search box by searching for the <input> tag on the page.
The problem lies in trying to enter text into the search box and getting result.
Here's an almost working (except for above issue) snippet the code:
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains import time class GetOregonBusinessLists: def __init__(self): self.parse_BusinessLists(searchitem='Active Businesses LLC', savename='ActiveLLC.html') def parse_BusinessLists(self, searchitem, savename): mainurl = 'https://data.oregon.gov' caps = webdriver.DesiredCapabilities().FIREFOX caps["marionette"] = True browser = webdriver.Firefox(capabilities=caps) browser.get(mainurl) time.sleep(5) data_catalog = browser.find_element(By.XPATH, '/html/body/div[2]/div/div[5]/div/div[2]/div/div[1]/div[2]/div[2]/div/a/div/div/p[2]') hover = ActionChains(browser).move_to_element(data_catalog) hover.perform() data_catalog.click() time.sleep(5) # Fetching search box by -- tag name -- as xpath changes with each access inputElement = browser.find_element_by_tag_name('input') attrs = browser.execute_script('var items = {}; for (index = 0; index < arguments[0].attributes.length; ++index) { items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value }; return items;', inputElement) print(f'attrs: {attrs}') inputElement.send_keys(searchitem) inputElement.send_keys(Keys.RETURN) page = str(browser.page_source) time.sleep(5) browser.close() print(page) if __name__ == '__main__': GetOregonBusinessLists()
Error:attrs: {'autocomplete': 'off', 'class': 'autocomplete-input _-_-_-common-autocomplete-components-SearchBox-_search-box-module_search-box-static-mobile_ksx6z', 'id': 'autocomplete-search-input-18721', 'placeholder': 'Search', 'type': 'search', 'value': ''}
Traceback (most recent call last):
File "/media/larz60/Data-1TB/Projects/BusinessListings/src/Oregon/ForForum.py", line 41, in <module>
GetOregonBusinessLists()
File "/media/larz60/Data-1TB/Projects/BusinessListings/src/Oregon/ForForum.py", line 11, in __init__
self.parse_BusinessLists(searchitem='Active Businesses LLC', savename='ActiveLLC.html')
File "/media/larz60/Data-1TB/Projects/BusinessListings/src/Oregon/ForForum.py", line 31, in parse_BusinessLists
inputElement.send_keys(searchitem)
File "/media/larz60/Data-1TB/Projects/BusinessListings/business_venv/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 479, in send_keys
'value': keys_to_typing(value)})
File "/media/larz60/Data-1TB/Projects/BusinessListings/business_venv/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "/media/larz60/Data-1TB/Projects/BusinessListings/business_venv/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py",line 321, in execute
self.error_handler.check_response(response)
File "/media/larz60/Data-1TB/Projects/BusinessListings/business_venv/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotInteractableException: Message: Element <input id="autocomplete-search-input-18721" class="autocomplete-input _-_-_-common-autocomplete-components-SearchBox-_search-box-module_search-box-static-mobile_ksx6z" type="search"> is not reachable bykeyboar