Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Selenium Python
#1
Hi anyone can help me im having errors with my codes see screenshots[Image: 5br98m.jpg] and my codes

from tkinter import filedialog, messagebox, Tk, Entry, Button, Label, W

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import csv


class LinkedInScraper:


    def __init__(self):

        self.driver = webdriver.Firefox()


        self.scrape()

        self.driver.close()

    ###################################
    ##  Driver
    ###################################

    def scrape(self, numRetry=1):

        #pull data from csv
        inputFile = self.getInputFile()
        inputData = self.loadSearchData(inputFile)

        #login
        self.driver.get('https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin')
        self.getCredentials()


        outputData = []
        failed = []

        for i in range(len(inputData)):

            #get the url
            if inputData[i][3] == '':
                url = self.search(inputData[i][0] + " " + inputData[i][1] + " " + inputData[i][2])
                inputData[i][3] = url
            else:
                url = inputData[i][3]


            if url == '':
                outputData.append(['Not found'])
                failed.append(i)
                continue

            self.driver.get(url)
            outputData.append(self.extractExperiences(True))


        #circle back for the failed lookups
        while(numRetry > 0):

            for j in range(len(failed)):
                i = failed[j]

                url = self.search(inputData[i][0] + " " + inputData[i][1] + " " + inputData[i][2])

                if url == '':
                    continue

                outputData[i].pop()
                self.driver.get(url)
                inputData[i][3] = url
                outputData[i].append(self.extractExperiences(True))
               # outputData[i].append("SEARCH CONSTRAINT RELAXED")
            numRetry -= 1


        #write the data
        self.writeSearchData(inputFile[:-4]+ "_out.csv", inputData, outputData)



    ###################################
    ##  Business Logic
    ###################################

    def getCredentials(self):
        master = Tk()
        master.title("LinkedIn Credentials")

        w = 350  # width for the Tk root
        h = 100  # height for the Tk root

        # get screen width and height
        ws = master.winfo_screenwidth()  # width of the screen
        hs = master.winfo_screenheight()  # height of the screen

        # calculate x and y coordinates for the Tk root window
        x = (ws / 2) - (w / 2)
        y = (hs / 2) - (h / 2)

        # set the dimensions of the screen
        # and where it is placed
        master.geometry('%dx%d+%d+%d' % (w, h, x, y))


        Label(master, text="Username").grid(row=0)
        Label(master, text="Password").grid(row=1)

        e1 = Entry(master)
        e2 = Entry(master, show="*")

        e1.grid(row=0, column=1)
        e2.grid(row=1, column=1)

        def callback():
            username = e1.get()
            password = e2.get()
            master.destroy()
            self.login(username,password)


        Button(master, text='Execute Script', command=callback).grid(row=3, column=0, sticky=W, pady=4)
        master.mainloop()


    def login(self, username, password):
        elem = self.driver.find_element_by_name('session_key')
        elem.send_keys(username)

        elem = self.driver.find_element_by_name('session_password')
        elem.send_keys(password)

        elem.send_keys(Keys.RETURN)


    def search(self, key):
        elem = self.driver.find_element_by_xpath("//input[@placeholder='Search']")
        elem.clear()
        elem.send_keys(key)
        elem.send_keys(Keys.RETURN)

        try:
            elem = WebDriverWait(self.driver, 2, poll_frequency=0.1).until(
                EC.presence_of_element_located((By.CLASS_NAME, "results-list"))
            )
            elems = elem.find_elements_by_tag_name('a')

            return elems[0].get_attribute('href')

        except TimeoutException:
            return ''


    def extractExperiences(self, currentOnly=False):

        experiences = []
        try:

            elem = WebDriverWait(self.driver, 10, poll_frequency=0.1).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "experience-section")))

            elems = elem.find_elements_by_class_name('pv-profile-section__card-item')

            for elem in elems:
                meta = elem.find_element_by_class_name('pv-entity__summary-info')

                dateRange = meta.find_elements_by_class_name('pv-entity__date-range')
                if (not currentOnly or (len(dateRange) > 0 and 'Present' in dateRange[0].text)):
                    jobInfo = [meta.find_element_by_tag_name('h3').text]

                    company = meta.find_elements_by_class_name('pv-entity__secondary-title')
                    if len(company) > 0:
                        jobInfo.append(company[0].text)
                    else:
                        jobInfo.append("UNKNOWN COMPANY")

                    location = meta.find_elements_by_class_name('pv-entity__location')
                    if len(location) > 0:
                        jobInfo.append(location[0].text[9:])
                    else:
                        jobInfo.append("UNKNOWN LOCATION")

                    experiences.extend(jobInfo)

        except TimeoutException:
            pass

        return experiences


    ###################################
    ##  Input Loading
    ###################################


    def getInputFile(self):
        root = Tk()
        root.overrideredirect(1)
        root.withdraw()

        messagebox.showinfo("Select Search Information",
                            "In the next prompt, select your SEARCH INFORMATION.")
        while (True):
            inputFile= filedialog.askopenfilename()
            if inputFile:
                break
            messagebox.showerror("Error: Select File",
                                 "You must select the search information!")


        root.destroy()
        return inputFile


    def loadSearchData(self, input):
        with open(input) as f:
            reader = csv.reader(f)
            searchData = []
            next(reader)
            for row in reader:
                searchData.append(row)

        return searchData


    ###################################
    ##  Writing
    ###################################

    def writeSearchData(self, output, inputData, outputData):
        with open(output, "w+") as f:
            writer = csv.writer(f)
            writer.writerow(['First Name', 'Last Name', 'Auxiliary (Input)', 'URL', 'Search Info'])
            for i,row in enumerate(inputData):
                row.extend(outputData[i])
                writer.writerow(row)


def main():
    LinkedInScraper()

if __name__ == "__main__":
    LinkedInScraper()
Reply
#2
You could try a webdriver wait for that input before actually searching it. Often that is the main cause for an unexpected "unable to locate element". But the first thing i would try is follow it (not in background) and see if a popup is between login and the main page. For example when i logged in, i got a page asking for a phone number not the main page, which would break the code.

Also please post the traceback in BBcode tags "error", not a screenshot. It helps us be able to copy and paste it.
Recommended Tutorials:
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Error in Selenium: CRITICAL:root:Selenium module is not installed...Exiting program. AcszE 1 3,584 Nov-03-2017, 08:41 PM
Last Post: metulburr

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020