Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
missing append in a loop?
#1
hi guys,

i got stucked on last thing to do my first basic scrapping...

The script is done in a loop but it is getting data only from one page instead of 50~. Why is like that? Am i missing some .append?

from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import urllib

headers = {
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://www.pararius.com/apartments/amsterdam',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
    'Content-Type': 'text/plain',
}

data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}'


#for n in range(1, num_pages):
page = 'https://www.pararius.com/apartments/amsterdam/page-1'

    
    
r = requests.get(page, headers=headers, data=data)
content = (r.text)
soup = BeautifulSoup(content, 'html.parser')


#pagination- find max pages
page1 = soup.find('ul', {'class': 'pagination'})
pages = page1.find_all('li')
last_page = pages[-3]
num_pages = last_page.find('a').text

fulldata = []

for n in range(1, int(num_pages)+1):
    page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(n)
    print(page)


    for section in soup.find_all(class_='property-list-item-container'):
        dlink = section.find('a').get('href')
        type = section.find('span', {'class': 'type'}).text
        neighborhood = section.find('a').text.strip().split()[1]
        size = section.find('li', {'class': 'surface'}).text.strip().split()[0]
        bedrooms = section.find('li', {'class': 'surface'}).text.strip().split()[2]
        furniture = section.find('li', {'class': 'surface'}).text.strip().split()[4]
        if furniture == 'upholstered':
            furniture = "Unfurnished"
        elif furniture == 'furnished or upholstered':
            furniture = "Furnished & Unfurnished"
        #availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6]
        price = section.find('p', {'class': 'price '}).text.strip().split()[0]
        curr = "EUR" if "€" in price else "other"

        data = {
            'Direct Link':[dlink],
            'Typee':[type],
            'Neighborhood':[neighborhood],
            'Size':[size],
            'Bedrooms':[bedrooms],
            'Furniture':[furniture],
            'Price':[price],
            'Currency':[curr]
            }
        fulldata.append({
        'Direct Link': dlink,
        'Typee': type,
        'Neighborhood': neighborhood,
        'Size': size,
        'Bedrooms': bedrooms,
        'Price': price,
        'Currency': curr
        })

    
print(fulldata)
df = pd.DataFrame(fulldata)

df.to_excel(r'C:\Users\user\Desktop\scrap_data\tests\test.xlsx')
Reply


Messages In This Thread
missing append in a loop? - by zarize - Sep-05-2019, 07:45 AM
RE: missing append in a loop? - by buran - Sep-05-2019, 07:55 AM
RE: missing append in a loop? - by zarize - Sep-05-2019, 07:59 AM

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020