Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Python scrapy scraped_items
#1
i was testing the following code to see the results and on debugging i saw the scraped_items and it was like 4.777,....that wasnt the results i wanted to get....second i wanted to scrape each def function to different file and finnaly to scrape all the functions...and not only the first and second functions..... :(

Thank you very much!!! :D

here's is my actual code:
# -*- coding: utf-8 -*-
import scrapy


class SccbotSpider(scrapy.Spider):
    name = 'SccBot'
    start_urls = ['https://spurverbreiterung.de/index.php?cat=c182_Radbefestigungsteile.html']

    def parse(self, response):
        tab1 = response.css('#tab1')
        for container in tab1.css('tr > td[align="center"]'):
            scraped_info = {
            'TextBox' : container.css('a::text').extract(),
            'LinkBox' : container.css('a::attr(href)').extract(),
            'CurrentUrl' : response.url
            }
            yield scraped_info

        urls = tab1.css('tr > td[align="center"] > a::attr(href)').extract()
        for url in urls:
            url = response.urljoin(url)
            yield scrapy.Request(url=url, callback=self.parse_details)

    def parse_details(self, response):
        for containerx in response.css('tr > td[align="center"]'):
            scraped_items = {
            'TextBox' : containerx.css('a::text').extract(),
            'LinkBox' : containerx.css('a::attr(href)').extract(),
            'CurrentUrl' : response.url
            }
            yield scraped_items

        urls = response.css('tr > td[align="center"] > a::attr(href)').extract()
        for url in urls:
            url = response.urljoin(url)
            yield scrapy.Request(url=url, callback=self.parse_items)


    def parse_items(self, response):
        for products in response.css('div.inhalt > a.product_link'):
            scraped_products = {
            'Category' : response.css('#main_content > h1::text').extract(),
            'CategoryType' : response.css('div.content_boxes > div.rad_header::text').extract(),
            'ProductName' : products.css('div.prod-name::text').extract(),
            'ProductNumber' : products.css('div.art-nr > span::text').extract(),
            'Price' : products.css('div.preis').extract(),
            'AvaibilityIcon' : products.css('div.ampel > img::attr(src)').extract(),
            'ProductLink' : products.css('a.product_link::attr(href)').extract(),
            'CurrentURL' : response.url
            }
            yield scraped_products

        urls = response.css('div.inhalt > a.product_link::attr(href)').extract()
        for url in urls:
            url = response.urljoin(url)
            yield scrapy.Request(url=url, callback=self.parse_ims)

    def parse_ims(self, response):
        for productss in response.css('div.wrapper'):
            scraped_rads = {
            'Title' : productss.css('#product_info > h1::text').extract(),
            'Price' : productss.css('div.productsinfo_price > span::text').extract(),
            'ProductDetails' : productss.css('div.product_details.clear > table').extract(),
            'ProductInfo' : productss.css('div.productsinfo_right').extract(),
            'ProductImg' : productss.css('div.productsinfo_img > ul > img::attr(src)').extract(),
            'MoreDetails' : productss.css('div.textf_rechts').extract(),
            'CurrentURL' : response.url,
            }
            yield scraped_rads
Reply


Messages In This Thread
Python scrapy scraped_items - by Baggelhsk95 - Nov-12-2018, 12:39 PM
RE: Python scrapy scraped_items - by stranac - Nov-12-2018, 04:43 PM
RE: Python scrapy scraped_items - by Baggelhsk95 - Nov-13-2018, 08:30 AM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Python Scrapy Date Extraction Issue tr8585 1 3,331 Aug-05-2020, 04:32 AM
Last Post: tr8585
  Python Scrapy tr8585 2 2,390 Aug-04-2020, 04:11 AM
Last Post: tr8585
  Python - Scrapy Baggelhsk95 0 2,294 Apr-24-2019, 01:07 PM
Last Post: Baggelhsk95
  Python Scrapy ebay API Baggelhsk95 0 3,222 Nov-21-2018, 11:22 AM
Last Post: Baggelhsk95
  Python - Scrapy - CSS selector Baggelhsk95 1 5,568 Nov-07-2018, 04:45 PM
Last Post: stranac
  Python - Scrapy - Contains Baggelhsk95 3 4,539 Oct-27-2018, 03:42 PM
Last Post: stranac
  Python - Scrapy Login in Baggelhsk95 3 4,859 Oct-23-2018, 04:24 PM
Last Post: stranac
  Python - Scrapy Ebay Test Baggelhsk95 4 4,362 Oct-16-2018, 12:37 PM
Last Post: snippsat
  Python - Scrapy Login form Baggelhsk95 4 10,831 Oct-16-2018, 08:01 AM
Last Post: Baggelhsk95
  Python - Scrapy Javascript Pagination (next_page) Baggelhsk95 3 10,023 Oct-08-2018, 01:20 PM
Last Post: stranac

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020