Dec-17-2020, 12:28 AM
hi there - good day dear python-experts.
running a parser in VSCode - and write the results into a csv-file
i ve got a tiny error on a
running a parser in VSCode - and write the results into a csv-file
i ve got a tiny error on a
import requests from bs4 import BeautifulSoup import re import csv from tqdm import tqdm first = "https://path ?page={}" second = "https://path /{}_en" def catch(url): with requests.Session() as req: pages = [] print("Loading All IDS\n") for item in tqdm(range(0, 347)): r = req.get(url.format(item)) soup = BeautifulSoup(r.content, 'html.parser') numbers = [item.get("href").split("/")[-1].split("_")[0] for item in soup.findAll( "a", href=re.compile("^path/"), class_="btn btn-default")] pages.append(numbers) return numbers def parse(url): links = catch(first) with requests.Session() as req: with open("Data.csv", 'w', newline="", encoding="UTF-8") as f: writer = csv.writer(f) writer.writerow(["Name", "Address", "Site", "Phone", "Description", "Scope", "Rec", "Send", "PIC", "OID", "Topic"]) print("\nParsing Now... \n") for link in tqdm(links): r = req.get(url.format(link)) soup = BeautifulSoup(r.content, 'html.parser') task = soup.find("section", class_="col-sm-12").contents name = task[1].text add = task[3].find( "i", class_="fa fa-location-arrow fa-lg").parent.text.strip() try: site = task[3].find("a", class_="link-default").get("href") except: site = "N/A" try: phone = task[3].find( "i", class_="fa fa-phone").next_element.strip() except: phone = "N/A" desc = task[3].find( "h3", class_="eyp-project-heading underline").find_next("p").text scope = task[3].findAll("span", class_="pull-right")[1].text rec = task[3].select("tbody td")[1].text send = task[3].select("tbody td")[-1].text pic = task[3].select( "span.vertical-space")[0].text.split(" ")[1] oid = task[3].select( "span.vertical-space")[-1].text.split(" ")[1] topic = [item.next_element.strip() for item in task[3].select( "i.fa.fa-check.fa-lg")] writer.writerow([name, add, site, phone, desc, scope, rec, send, pic, oid, "".join(topic)]) parse(second)see the output -
python /home/martin/dev/vscode/euro.py martin@mx:~ $ python /home/martin/dev/vscode/euro.py Loading All IDS 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 347/347 [08:01<00:00, 1.39s/it] Traceback (most recent call last): File "/home/martin/dev/vscode/euro.py", line 65, in <module> parse(second) File "/home/martin/dev/vscode/euro.py", line 29, in parse with open("Data.csv", 'w', newline="", encoding="UTF-8") as f: TypeError: file() takes at most 3 arguments (4 given) martin@mx:~well i think that i have an error here
with open("Data.csv", 'w', newline="", encoding="UTF-8") as f:i guess i need to have a closer look at the arguments here