Jun-07-2020, 07:04 AM
Looking for some help scraping a website that requires a login. Essentially the website is to get trading card prices (that I believe are from ebay) but in a format that allows search beyond the 90 days that is on ebays site. Login url is https://members.pwccmarketplace.com/login The url I search from is https://members.pwccmarketplace.com/ I searched the previous posts and found one I thought I could try replicate but to no success. Below is the code, any help whether it could work or not would be appreciated.
#https://stackoverflow.com/questions/47438699/scraping-a-website-with-python-3-that-requires-login import requests from lxml import html from bs4 import BeautifulSoup import unicodecsv as csv import os import sys import io import time import datetime from datetime import datetime from datetime import date import pandas as pd import numpy as np from time import sleep from random import randint from urllib.parse import quote Product_name = [] Price = [] Date_sold = [] url = "https://www.pwccmarketplace.com/login" values = {"email": "[email protected]", "password": "password"} session = requests.Session() r = session.post(url, data=values) Search_name = input("Search for: ") Exclude_terms = input("Exclude these terms (- infront of all, no spaces): ") qstr = quote(Search_name) qstrr = quote(Exclude_terms) Number_pages = int(input("Number of pages you want searched (Number -1): ")) pages = np.arange(1, Number_pages) for page in pages: params = {"Category": 6, "deltreeid": 6, "do": "Delete Tree"} url = "https://www.pwccmarketplace.com/market-price-research?q=" + qstr + "+" + qstrr + "&year_min=2004&year_max=2020&price_min=0&price_max=10000&sort_by=date_desc&sale_type=auction&items_per_page=250&page=" + str(page) result = session.get(url, data=params) soup = BeautifulSoup(result.text, "lxml") search = soup.find_all('tr') sleep(randint(2,10)) for container in search:Any help appreciated