Python Forum
Python BeautifulSoup IndexError: list index out of range - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Web Scraping & Web Development (https://python-forum.io/forum-13.html)
+--- Thread: Python BeautifulSoup IndexError: list index out of range (/thread-33803.html)



Python BeautifulSoup IndexError: list index out of range - rhat398 - May-28-2021

import requests
from bs4 import BeautifulSoup
import csv
import time

class LightupScraper:

    results = []

    def fetch(self, url):
        print(f'HTTP GET request to URL: {url}', end='')
        res = requests.get(url)
        print(f' | Status Code: {res.status_code}')

        return res

    def save_response(self, res):
        with open('res.html', 'w') as html_file:
            html_file.write(res)

    def load_response(self):
        html = ''

        with open('res.html', 'r') as html_file:
            for line in html_file:
                html += line

            return html

    def parse(self, html):

        content = BeautifulSoup(html, 'lxml')
        titles = [title.text.strip() for title in content.find_all('h4', {'class': 'card-title ols-card-title'})]
        links = [link.find('a')['href'] for link in content.find_all('h4', {'class': 'card-title ols-card-title'})]
        skus = [sku.text for sku in content.find_all('span', {'class': 'productView-info-value ols-card-text--sku'})]
        mpn = [mpn.text.split(':')[-1].strip() for mpn in content.find_all('span', {'class': 'productView-info-name mpn-label ols-card-text--mpn'})]
        details = [ul.find_all('li') for ul in content.find_all('ul', {'class': 'ols-card-text__list'})]
        brand = [''.join([brand.text for brand in detail if 'Brand:' in brand.text]).split(':')[-1].strip() for detail in details]
        base = [''.join([base.text for base in detail if 'Base Type:' in base.text]).split(':')[-1].strip() for detail in details]
        life_hours = [''.join([life_hour.text for life_hour in detail if 'Life Hours:' in life_hour.text]).split(':')[-1].strip() for detail in details]
        lumens = [''.join([lumen.text for lumen in detail if 'Lumens:' in lumen.text]).split(':')[-1].strip() for detail in details]
        warrantys = [''.join([warranty.text for warranty in detail if 'Warranty:' in warranty.text]).split(':')[-1].strip() for detail in details]
        wattages = [''.join([wattage.text for wattage in detail if 'Wattage:' in wattage.text]).split(':')[-1].strip() for detail in details]
        features = [feature.text.split() for feature in content.find_all('span', {'class': 'ols-card-text__list--features'})]
        prices = [price.text for price in content.find_all('span', {'class': 'price price--withoutTax'})]
        #print(prices)



        for feature in features:
            feat = feature

        for item in range(0, len(titles)):
            self.results.append({
                'titles': titles[item],
                'skus': skus[item],
                'mpn': mpn[item],
                'brand': brand[item],
                'base': base[item],
                'life_hours': life_hours[item],
                'lumens': lumens[item],
                'warrantys': warrantys[item],
                'wattages': wattages[item],
                'feature': feat[item],
                'links': links[item],
                'price': prices[item]
            })

    def to_csv(self):
        with open('lightup.csv', 'w', newline='') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=self.results[0].keys())
            writer.writeheader()

            for row in self.results:
                writer.writerow(row)

            print('Exported results to lightup.csv')

    def run(self):
        
        page_num = 3

        for page in range(1, page_num + 1):
            base_url = 'https://www.lightup.com/standard-household-lighting.html?p='
            base_url += str(page)
            res = self.fetch(base_url)
            self.parse(res.text)
            #time.sleep(30)

        self.to_csv()
        # html = self.load_response()
        # self.parse(html)
        #self.save_response(html.text)


if __name__ == '__main__':
    scraper = LightupScraper()
    scraper.run()
Error:
Error:
File "lightup_scraper.py", line 66, in parse 'price': prices[item] IndexError: list index out of range
I tried to scrape the prices but i am getting list index out of range error because the tag which is responsible for price is returning 14 elements and the other tags returning 16 this is because some price tags are different e.g for price per case tag is price price--withoutTax price-per--case and for single product price price--withoutTax.I tried try except block but no luck it gives me whole another list not individual prices i can't get my head around this problem may be someone can give me some pointers to actually make this work.


RE: Python BeautifulSoup IndexError: list index out of range - Daring_T - May-28-2021

There are several ways to fix this, here's a couple of ways I thought of:

Here's a oneliner for line 66
'price': None if item < titles else prices[item]
if you want a function:

def get(list, index, default=None):
	try:
		return list[index]
	except IndexError:
		return default