Sep-06-2022, 05:24 AM
from scrapy import Spider from scrapy.http import Request class TesterSpider(Spider): name = 'tester' allowed_domains = ['books.toscrape.com'] start_urls = ['http://books.toscrape.com/'] def parse(self, response): books = response.xpath("//h3/a/@href").extract() for book in books: absolute_url = response.urljoin(book) yield Request(absolute_url, callback=self.parse_book) # process next page next_page_url = response.xpath("//a[text()='next']/@href").extract_first() absolute_next_page_url = response.urljoin(next_page_url) yield Request(absolute_next_page_url) def parse_book(self, response): title = response.xpath("//h1/text()").extract_first() price = response.xpath("//*[@class='price_color']/text()").extract_first() img_url = response.xpath("//img/@src").extract_first() img_url = img_url.replace('../..', 'https://books.toscrape.com') rating = response.xpath("//p[starts-with(@class,'star-rating')]/@class").extract_first() rating = rating.replace('star-rating ', '') desc = response.xpath("//div[(@id='product_description')]/following-sibling::p/text()").extract_first() # Product Description upc = product_desc(response, 'UPC') product_type = product_desc(response, 'Product Type') availability = product_desc(response, 'Availability') number_of_reviews = product_desc(response, 'Number of reviews') yield{ 'Title': title, 'Price': price, 'Location': img_url, 'Rating': rating, 'Description': desc, 'UPC': upc, 'Product Type': product_type, 'Availability': availability, 'Reviews': number_of_reviews } def product_desc(response, lookup): return response.xpath("//th[text()='" + lookup + "']/following-sibling::td/text()").extract_first()As you can see, at the very bottom, the function 'product_desc' is defined, but just above that where I called it just above the yield block, my IDE, VS Code reports that it is undefined. Can anyone spot what I am missing.
Thank you