Python Forum

Output:DevTools listening on ws://127.0.0.1:56769/devtools/browser/34ec545d-5d6c-434d-86c6-fc77922dbfe6

DevTools listening on ws://127.0.0.1:56770/devtools/browser/a0b62188-71dd-4d82-865d-cbec7cf42dc5
Error loading jobs from LinkedIn: 'WebDriver' object has no attribute 'find_element_by_css_selector'
Error loading jobs from LinkedIn: 'WebDriver' object has no attribute 'find_element_by_css_selector'
Total duplicates dropped: 0
Total duplicates dropped: 0

-- main.py --

from selenium.webdriver.support.ui import WebDriverWait


import json
import os
import sys
import threading

import pandas as pd

from IndeedScraper import IndeedScraper
from LinkedInScraper import LinkedInScraper
from ScraperUtil import ScraperUtil



def run_search(json_file_name):
    """Collects job listings that match the parameters stored in the provided json file."""

    # Attempt to load the json file. If it isn't successful, close the program.
    try:
        search_keywords, location, ignore_keywords, experience = load_json("./Search Configs/"+json_file_name)
    except:
        return

    # Store just the file name, not the file extension. This is useful for creating a new Excel file.
    file_name = json_file_name.split('.')[0]
    all_dataFrames = [scrape_indeed(search_keywords, location, ignore_keywords, experience),
                      scrape_linkedin(search_keywords, location, ignore_keywords, experience)]

    # Stores the search results within an Excel file.
    store_in_excel_file(file_name, all_dataFrames)


def load_json(json_file_name):
    # Load search variables from config file.
    try:
        with open(json_file_name, "r") as jsonfile:
            config = json.load(jsonfile)

            # Save all search parameters as variables.
            search_keywords = config["search_keywords"]
            location = config["location"]
            ignore_keywords = config['ignore_keywords']
            experience = str(config['experience'].lower())

            # Warn the user if they haven't provided a valid experience parameter.
            if experience not in ["junior", "mid", "senior"]:
                print(
                    "Warning: Experience value in", json_file_name,
                    " is invalid. please choose either 'Junior', 'Mid', "
                    "or 'Senior'. Jobs of all experience levels will be included in this search.")

            # Print a summary of the search parameters.
            print("Read config successfully.")
            print("search_keywords=", search_keywords)
            print("location=", location)
            print("ignore_keywords=", ignore_keywords)
            print("experience=", experience)
            return search_keywords, location, ignore_keywords, experience
    except Exception as e:
        raise ValueError("Error, could not load ", json_file_name, str(e))


def scrape_indeed(search_keywords, location, ignore_keywords, experience):
    """Instantiates and calls scrape() method on a LinkedInScraper object.
    returns the dataFrame stored in the object once the search is complete."""

    indeed = IndeedScraper()
    try:
        indeed.scrape(search_keywords, location, ignore_keywords, experience)
        print(indeed.data.shape[0], "jobs loaded from Indeed.")
        return indeed.data
    except Exception as e:
        print("Error loading jobs from Indeed: " + str(e))
        return ScraperUtil.construct_dataframe([])  # Return an empty dataFrame.


def scrape_linkedin(search_keywords, location, ignore_keywords, experience):
    """Instantiates and calls scrape() method on an IndeedScraper object.
        returns the dataFrame stored in the object once the search is complete."""

    linkedin = LinkedInScraper()
    try:
        linkedin.scrape(search_keywords, location, ignore_keywords, experience)
        print(linkedin.data.shape[0], "jobs loaded from LinkedIn.")
        return linkedin.data

    except Exception as e:
        print("Error loading jobs from LinkedIn: " + str(e))
        return ScraperUtil.construct_dataframe([])  # Return an empty dataFrame.


def store_in_excel_file(file_name, all_dataFrames):
    """Stores all job listings in an Excel file. If the file exists, new listings are added to the existing file.
    Otherwise, a new Excel file is created."""
    master_dataFrame = ScraperUtil.construct_dataframe([])

    try:
        master_dataFrame = pd.read_excel(file_name + '.xlsx')
    except:
        print(file_name + ".xlsx doesn't exist yet. Creating new file.")

    all_dataFrames.append(master_dataFrame)

    new_dataFrame = pd.concat(all_dataFrames)
    length_before = new_dataFrame.shape[0]
    new_dataFrame.drop_duplicates(keep='last', subset=['Title', 'Company', 'Source', 'Date Posted'], inplace=True)
    length_after = new_dataFrame.shape[0]
    total_duplicates = length_before - length_after
    print("Total duplicates dropped:", total_duplicates)
    new_dataFrame.to_excel(file_name + '.xlsx', index=False)


if __name__ == "__main__":

    all_threads = []
    for entry in os.scandir(path="./Search Configs"):
        if entry.name.split('.')[1] == 'json':
            all_threads.append(threading.Thread(target=run_search, args=(entry.name,)))

    if(len(all_threads) == 0):
        print("No json files found in 'Search Configs' directory. No search will be made.")
    else:
        for thread in all_threads:
            thread.start()

        for thread in all_threads:
            thread.join()

you will need to import 'By': from selenium.webdriver.common.by import By

(Sep-19-2022, 02:38 PM)Larz60+ Wrote: [ -> ]you will need to import 'By': from selenium.webdriver.common.by import By

Thanks... but/
please be a bit clearer how i solve the error.
I have imported the module 'By', and still the same result....

Result from cmd:
DevTools listening on ws://127.0.0.1:52127/devtools/browser/c2cf40a2-864e-47dc-91b1-c1d235cd25f0
Error loading jobs from LinkedIn: 'WebDriver' object has no attribute 'find_element_by_css_selector'

I don't even see where you are using find_element_by_css_selector
The following is the way I have done it successfully:
element = self.browser.find_element(By.CSS_SELECTOR, "#txtCommonPageNo")

and it works fine on my system.
I'm not familiar with any of the packages you are using, so this is all I can offer at this point.

rickadams

Larz60+

rickadams

Larz60+