Hello everyone, I need your help
I would like to get out of this page
[url=https://www.apo-rot.de/details/bullrich-salz-tabletten/2535395.html?_menuid=7688429&_nav=verdauung_harnwege__organe.magen_darm__verdauung.sodbrennen.zur_schnellen_linderung.tabletten__kapseln[/url]
quantity and manufacturer scraping, unfortunately I can't do it.
my current code:
I would like to get out of this page
[url=https://www.apo-rot.de/details/bullrich-salz-tabletten/2535395.html?_menuid=7688429&_nav=verdauung_harnwege__organe.magen_darm__verdauung.sodbrennen.zur_schnellen_linderung.tabletten__kapseln[/url]
quantity and manufacturer scraping, unfortunately I can't do it.
my current code:
import requests from bs4 import BeautifulSoup import csv import pandas as pd from time import sleep def connect(): try: requests.get('http://google.com') #Python 3.x return True except: return False def gernate_file(): headers_text = [] headers_text.append('Article Number') headers_text.append('Title') headers_text.append('quantity') headers_text.append('Manufacturer') headers_text.append('Price') headers_text.append('Categorie') headers_text.append('Link') headers_text.append('Image Url') headers_text.append('Article Number') df = pd.DataFrame([], columns=headers_text) df.to_csv('Results.csv', index=False, encoding='utf-8-sig') def hasNumbers(inputString): return any(char.isdigit() for char in inputString) # url='https://produkte.migros.ch/milch-extra-12x100g' # url='https://produkte.migros.ch/les-adorables-marc-de-champagne' def profileScraper(url,index): while(not connect()): print('no internet') sleep(5) res= requests.get(url) soup=BeautifulSoup(res.content,features='html.parser') title=soup.find('span',class_='hidden-xs') if title is not None: title=title.text.strip() else: return 0 print(title) Manufacturer=soup.find_next('div',class_='col-xs-8') if Weight is not None: Manufacturer=Manufacturer.text.strip() else: Manufacturer='N/A' print(Manufacturer) Price=soup.find('span',class_='') if Price is not None: Price = Price.text.strip() else: Price='N/A' print(Price) Categories=soup.find_all('a',class_='overflow-hidden') if len(Categories) >0: # print(Categories[2].text.strip()) Categories = Categories[0].text.strip() else: # print(Categories[-2].text.strip()) Categories = Categories[-2].text.strip() print(url) quantity=soup.find('div',{'data-sourcefile':'details_info-standard-details.jsp'}).find_all('div',{'class':'col-xs-4 bold'}) for element in quantity: if "quantity:" in element.text: quantity = element.find_next('div',{'class':'col-xs-8'}).text.strip() print(f"uantity: {quantity}") img_tag1=soup.find('div',class_='details_image') if img_tag1 is not None: img_tag = img_tag1.find('img',class_='img-responsive') if img_tag is not None: img_url=img_tag['src'] img_url_text=img_url.replace('//','') print(img_url_text) article_no = soup.find('div',class_='col-xs-8') if article_no is not None: article_no=article_no.text.strip() print(article_no) return[index,title,'`'+quantity,Manufacturer,Price,Categories,url,img_url_text,article_no] #Migrolist.csv with open('Ikea.csv', 'r', encoding='utf-8') as readFile: reader = csv.reader(readFile) file_lines = list(reader) # print(file_lines[1][8]) gernate_file() for index, row in enumerate(file_lines[1:]): print(index) # print(row[7]) record=[] record = profileScraper(row[6],index+1) df = pd.DataFrame([record]) df.to_csv('Ikea1.csv', index=False, mode='a', encoding='utf-8-sig', header=False) # break print() print() # print(record)many thanks for your help