Jan-23-2019, 10:49 AM
(This post was last modified: Jan-23-2019, 10:49 AM by CaptainCsaba.)
from urllib.request import urlopen as uReq from bs4 import BeautifulSoup as soup filename = "products.csv" f= open(filename, "w") headers = "name, special_cond, sector, subsector, index, marketcond, isin\n" f.write(headers) while open('input.txt') as url_file: for my_url in url_file: uClient = uReq(my_url.stip()) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") name = page_soup.find('h1', attrs={'class','tesummary'}).text.replace("\n", "") spec = page_soup.find('div', attrs={'class':'commonTable table-responsive'}).find('tr', attrs={'class':'even'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text.replace("\r", "") sect = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text subsect = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text index = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text mainmarket = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text isin = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text f.write(name.replace("\r", " ") + "," + spec.replace("\n", "") + "," + sect + "," + subsect + "," + index.replace(",", "|") + "," + mainmarket + "," + isin + "\n" f.close()This is the error:
>>> while open('input.txt') as url_file: File "<stdin>", line 1 while open('input.txt') as url_file: ^ SyntaxError: invalid syntax >>> for my_url in url_file: ... File "<stdin>", line 2 ^ IndentationError: expected an indented blockAlso for some reason is gives me the same error in the end:
... f.close() File "<stdin>", line 3 f.close() ^ SyntaxError: invalid syntax