Python Forum

Hello there,

First of all, I am not an expert for Python, but I rather learned it AD HOC. Namely, I have a script that scrapes name, sizes and prices for each size for products. I create excel file that the script reads, open links, gets the data, and saves it in a new excel file. Now, the issue is that the script is not able to click on a popup window (cookie) that shows up when run on server and accept the cookies - when I run it on my PC, the script works fine.

Here is the code:

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
from bs4 import BeautifulSoup
from xlwt import Workbook
import pandas as pd
from selenium.webdriver.chrome.options import Options

# Set up Chrome options for headless browsing
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")  # Add this line if running as root user

# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)

# Reading the file where the links of products are saved
df = pd.read_excel('OttyDuvets_Links.xlsx')

# Create a single Workbook and Worksheet
wb = Workbook()
ws = wb.add_sheet('Sheet1')

# Defining the Headers in Excel Worksheet
ws.write(0, 0, 'Name')
ws.write(0, 1, 'Size')
ws.write(0, 2, 'Price')
row = 1

# Iterate through all size options using a for loop
for i in range(len(df['Links'])):
    driver.get(df['Links'][i])
    time.sleep(5)

    # Wait for the "cc-btn-decision" class to appear and then click on it
    try:
        decision_btn = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div/div[2]/a[3]'))
        )
        decision_btn.click()
    except Exception as e:
        print("Error clicking on 'cc-btn-decision':", str(e))

    time.sleep(3)  # Adjust this delay as needed

    # Parsing the source code of the webpage
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Name of the Product
    name_element = soup.find('h1')
    name = name_element.text.strip() if name_element else "Name not found"
    print(name)

    # Find the button that triggers the dropdown
    button = driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]')
    button.click()

    # Add a delay to allow the dropdown to appear
    time.sleep(3)

    # Iterate through all size options using a for loop
    for j in range(len(driver.find_elements(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]/option'))):
        # Find the dropdown container and size options each time
        dropdown_container = driver.find_element(By.CLASS_NAME, 'select')
        size_options = dropdown_container.find_elements(By.TAG_NAME, 'option')
        option = size_options[j]

        # Get the size from the option
        selected_size = option.text.strip()
        print(selected_size)

        # Scroll into view using JavaScript
        driver.execute_script("arguments[0].scrollIntoView();", option)
        time.sleep(1)

        # Click on the size option to select it
        try:
            option.click()
        except StaleElementReferenceException:
            # If the element is stale, re-find the dropdown and the option
            dropdown_container = driver.find_element(By.CLASS_NAME, 'select')
            size_options = dropdown_container.find_elements(By.TAG_NAME, 'option')
            option = size_options[j]
            option.click()

        time.sleep(3)

        # Find all the price elements for each selected size
        price_elements = driver.find_elements(By.XPATH,
                                              '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/div[1]/div[1]/div[1]/span[2]')

        # Iterate through all price elements for the selected size
        for price_element in price_elements:
            price = price_element.text.strip()
            print(price)

            # Saving the name, size, and price of the product in the worksheet
            ws.write(row, 0, name)
            ws.write(row, 1, selected_size)
            ws.write(row, 2, price)
            row += 1

    # Click on the dropdown again to show the rest of the options
    driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]').click()
    time.sleep(3)


# Save the single Workbook with its name and type
wb.save('OttyDuvets_Details.xls')

# Close the browser
driver.quit()

This is the error log I get:

Error:root@ip-xxx-xx-xx-xxx:/path/path/scrappingProjects/Otty# python3 scrappingOttyDuvets.py
Deluxe Microfibre Duvet
Traceback (most recent call last):
  File "scrappingOttyDuvets.py", line 50, in <module>
    button.click()
  File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/remote/webelement.py", line 94, in click
    self._execute(Command.CLICK_ELEMENT)
  File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/remote/webelement.py", line 395, in _execute
    return self._parent.execute(command, params)
  File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/remote/webdriver.py", line 347, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/remote/errorhandler.py", line 229, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementClickInterceptedException: Message: element click intercepted: Element <select id="Option-template--14439267893323__main-0" class="select__select" name="options[Size]" form="product-form-template--14439267893323__main" data-forced-variant="">...</select> is not clickable at point (600, 580). Other element would receive the click: <iframe referrerpolicy="origin" id="attentive_creative" title="Sign Up via Text for Offers" src="https://creatives.attn.tv/creatives-dynamic/multiPage/index.html" style="width: 100%; height: 100%; position: fixed; inset: 0px; opacity: 1; transition: opacity 500ms ease 0s; border: 0px; overflow: hidden; visibility: visible;"></iframe>
  (Session info: chrome-headless-shell=122.0.6261.128)
Stacktrace:
#0 0x56443887bf33 <unknown>
#1 0x564438573ce6 <unknown>
#2 0x5644385c5a73 <unknown>
#3 0x5644385c398e <unknown>
#4 0x5644385c1367 <unknown>
#5 0x5644385c0732 <unknown>
#6 0x5644385b3d27 <unknown>
#7 0x5644385e0eb2 <unknown>
#8 0x5644385b36b8 <unknown>
#9 0x5644385e107e <unknown>
#10 0x5644385ff899 <unknown>
#11 0x5644385e0c53 <unknown>
#12 0x5644385b1db3 <unknown>
#13 0x5644385b277e <unknown>
#14 0x56443884186b <unknown>
#15 0x564438845885 <unknown>
#16 0x56443882f181 <unknown>
#17 0x564438846412 <unknown>
#18 0x56443881325f <unknown>
#19 0x56443886a528 <unknown>
#20 0x56443886a723 <unknown>
#21 0x56443887b0e4 <unknown>
#22 0x7f7d88b75609 start_thread

To me, it looks like ti cannot click on the dropdown element due to the overlay of the cookie popup. Can someone help? I also have a few scripts that work great on my PC but not on the server....the URL I am trying to scrape is: https://otty.com/products/luxury-microfi...uble-duvet

scrapemasta