Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Looping with Beautifulsoup
#5
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup

filename = "products.csv"
f= open(filename, "w")
headers = "name, special_cond, sector, subsector, index, marketcond, isin\n"
f.write(headers)

while open('input.txt') as url_file:
	for my_url in url_file:
		uClient = uReq(my_url.stip())
		page_html = uClient.read()
		uClient.close()
		page_soup = soup(page_html, "html.parser")

	
		name = page_soup.find('h1', attrs={'class','tesummary'}).text.replace("\n", "") 
		spec = page_soup.find('div', attrs={'class':'commonTable table-responsive'}).find('tr', attrs={'class':'even'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text.replace("\r", "")
		sect = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text
		subsect = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text
		index = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text
		mainmarket = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text
		isin = page_soup.find('div', attrs={'id':'pi-colonna2'}).find('div', attrs={'class':'table-responsive'}).find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').find_next('td').text


		f.write(name.replace("\r", " ") + "," + spec.replace("\n", "") + "," + sect + "," + subsect + "," + index.replace(",", "|") + "," + mainmarket + "," + isin + "\n"
	
f.close()

	
This is the error:

>>> while open('input.txt') as url_file:
  File "<stdin>", line 1
    while open('input.txt') as url_file:
                             ^
SyntaxError: invalid syntax
>>> for my_url in url_file:
...
  File "<stdin>", line 2

    ^
IndentationError: expected an indented block
Also for some reason is gives me the same error in the end:
... f.close()
  File "<stdin>", line 3
    f.close()
    ^
SyntaxError: invalid syntax
Reply


Messages In This Thread
Looping with Beautifulsoup - by CaptainCsaba - Jan-22-2019, 11:55 AM
RE: Looping with Beautifulsoup - by ichabod801 - Jan-22-2019, 04:20 PM
RE: Looping with Beautifulsoup - by CaptainCsaba - Jan-23-2019, 07:17 AM
RE: Looping with Beautifulsoup - by buran - Jan-23-2019, 09:41 AM
RE: Looping with Beautifulsoup - by CaptainCsaba - Jan-23-2019, 10:49 AM
RE: Looping with Beautifulsoup - by snippsat - Jan-23-2019, 11:18 AM
RE: Looping with Beautifulsoup - by buran - Jan-23-2019, 11:20 AM
RE: Looping with Beautifulsoup - by CaptainCsaba - Jan-23-2019, 11:30 AM
RE: Looping with Beautifulsoup - by buran - Jan-23-2019, 12:38 PM

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020