Mar-30-2018, 11:16 AM
Hi
,
What code is missing from the script below ... Specific;
sample_df.to_csv (Desktop / "+ str (title_csv) +" .csv ",
encoding = "utf-8")? What do I add to any line so that this code works?

What code is missing from the script below ... Specific;
sample_df.to_csv (Desktop / "+ str (title_csv) +" .csv ",
encoding = "utf-8")? What do I add to any line so that this code works?
/// import requests from bs4 import BeautifulSoup import pandas as pd import time title = input("enter title: ") title_csv = title url = "https://www.indeed.com/jobs?q="+str(title)+"&l" page = requests.get(url) soup = BeautifulSoup(page.text, "html.parser") print(soup.prettify()) def extract_job_title_from_result(soup): jobs = [] for div in soup.find_all(name="div", attrs={"class":"row"}): for a in div.find_all(name="a", attrs={"data-tn-element":"jobTitle"}): jobs.append(a["title"]) return(jobs) print(extract_job_title_from_result(soup)) def extract_company_from_result(soup): companies = [] for div in soup.find_all(name="div", attrs={"class":"row"}): company = div.find_all(name="span", attrs={"class":"company"}) if len(company) > 0: for b in company: companies.append(b.text.strip()) else: sec_try = div.find_all(name="span", attrs={"class":"result-link-source"}) for span in sec_try: companies.append(span.text.strip()) return(companies) extract_company_from_result(soup) def extract_location_from_result(soup): locations = [] spans = soup.findAll('span', attrs={"class":"locations"}) for span in spans: locations.append(span.text) return(locations) extract_location_from_result(soup) def extract_salary_from_result(soup): salaries = [] for div in soup.find_all(name="div", attrs={"class":"row"}): try: salaries.append(div.find("nobr").text) except: try: div_two = div.find(name="div", attrs={"class":"sjcl"}) div_three = div_two.find("div") salaries.append(div_three.text.strip()) except: salaries.append("Nothing_Found") return(salaries) extract_salary_from_result(soup) def extract_summary_from_result(soup): summaries = [] spans = soup.findAll("span", attrs={"class":"Summary"}) for span in spans: summaries.append(span.text.strip()) return(summaries) print(extract_summary_from_result(soup)) max_results_per_city = 5000 city_set = ['name1', 'name2', ] columns = ["city", "job_title", "company_name", "location", "summary", "salary"] sample_df = pd.DataFrame(columns = columns) for city in city_set: for start in range(0, max_results_per_city, 40): page=requests.get("http://www.indeed.com/jobs?q="+str(title)+"&+%2420%2C000&l=" + str(city) + "&start=" + str(start)) time.sleep(1) soup = BeautifulSoup(page.text, "lxml", from_encoding="utf-8") for div in soup.find_all(name="div", attrs={"class":"row"}): num = (len(sample_df) + 1) job_post = [] job_post.append(city) for a in div.find_all(name="a", attrs={"data-tn-element":"jobTitle"}): job_post.append(a["title"]) company = div.find_all(name="span", attrs={"class":"company"}) if len(company) > 0: for b in company: job_post.append(b.text.strip()) else: sec_try = div.find_all(name="span", attrs={"class":"result-link-source"}) for span in sec_try: job_post.append(span.text) c = div.findAll("span", attrs={"class":"location"}) c = div.findAll("span", attrs={"class":"location"}) for span in c: job_post.append(span.text) d = div.findAll("span", attrs={"class":"summary"}) for span in d: job_post.append(span.text.strip()) try: job_post.append(div.find("nobr").text) except: try: div_two = div.find(name="div", attrs={"class":"sjcl"}) div_three = div_two.find("div") job_post.append(div_three.text.strip()) except: job_post.append("Nothing_found") sample_df.loc[num] = job_post sample_df.to_csv(Desktop/"+str(title_csv)+".csv", encoding="utf-8")