Jun-02-2020, 05:49 PM
I'm trying to scrape data from a game's market and use the data to track it.
So I have the scraper log in, it goes and pulls some items on the market and stores them in a table.
Now I'm trying to have it use those stored items in the table, to compare it to similar items being sold on the market. I have it so after it stores each item in the table, it goes and checks for the item id on each item, then looks them up.
When it goes to look them up again, it is making me log into the game again. Is this because it isn't passing cookies? Also, am I doing all this correctly? I want it to be optimized as best I can.
Here is my code:
So I have the scraper log in, it goes and pulls some items on the market and stores them in a table.
Now I'm trying to have it use those stored items in the table, to compare it to similar items being sold on the market. I have it so after it stores each item in the table, it goes and checks for the item id on each item, then looks them up.
When it goes to look them up again, it is making me log into the game again. Is this because it isn't passing cookies? Also, am I doing all this correctly? I want it to be optimized as best I can.
Here is my code:
from bs4 import BeautifulSoup from lxml import html import requests import re from collections import defaultdict # Start the session session = requests.Session() market_items = defaultdict(dict) compare_prices = defaultdict(dict) # Create the payload username = "EMAIL" password = "PASSWORD" authenticity_token = 0 LOGIN_URL = "https://web.simple-mmo.com/login" URL = "https://web.simple-mmo.com/market/collectables/all" PriceURL = "https://web.simple-mmo.com/market/all/all" def findAveragePrice(): session_requests = requests.session() key_items = market_items.items() t = len(key_items) for key in market_items: lookID = market_items[key]["ID"] print(lookID) payload = { "itemid": lookID, "new_page": "true", "_token": authenticity_token } result = session_requests.get(PriceURL, data = payload, headers = dict(referer = PriceURL)) soup = BeautifulSoup(result.content, 'html.parser') print(result.text) #print(market_items[key]["PRICE"]) pricematch = soup.find_all('div', class_='individual-item') print(pricematch) for match in pricematch: x = match.find('a')['onclick'] title = x.split("retrieveMarketItem(")[1].strip().split(')')[0] ITEMID = title.split(",")[0].lstrip() RdmNum = title.split(",'")[1].lstrip().split("'")[0] price = title.split(" '")[1].lstrip().split("',")[0] player = title.split(" '")[2].lstrip().split("'")[0] time = title.split(" '")[3].lstrip().split("'")[0] compare_prices[key]["PRICE"] print(compare_prices) def main(): session_requests = requests.session() result = session_requests.get(LOGIN_URL) tree = html.fromstring(result.text) authenticity_token = list(set(tree.xpath("//input[@name='_token']/@value")))[0] payload = { "email": username, "password": password, "_token": authenticity_token } result = session_requests.post(LOGIN_URL, data = payload, headers = dict(referer = LOGIN_URL)) result = session_requests.get(URL, headers = dict(referer = URL)) tree = html.fromstring(result.content) bucket_names = tree.xpath("//div[@class='individual-item']/span/text()") soup = BeautifulSoup(result.content, 'html.parser') #print(soup) collectables = soup.find_all('div', class_='individual-item') for collectable in collectables: x = collectable.find('a')['onclick'] #print(x) title = x.split("retrieveMarketItem(")[1].strip().split(')')[0] ITEMID = title.split(",")[0].lstrip() RdmNum = title.split(",'")[1].lstrip().split("'")[0] price = title.split(" '")[1].lstrip().split("',")[0] player = title.split(" '")[2].lstrip().split("'")[0] time = title.split(" '")[3].lstrip().split("'")[0] #print(title) #print("ID: " + ITEMID) #print("UniqueID: " + RdmNum) #print("PRICE: " + price) #print("PLAYER: " + player) #print("TIME: " + time) #market_items["UniqueID"] = RdmNum market_items[RdmNum]["ID"] = ITEMID market_items[RdmNum]["PRICE"] = price market_items[RdmNum]["SELLER"] = player market_items[RdmNum]["TIME"] = time #print(market_items) findAveragePrice() if __name__ == '__main__': main()So right now, all I'm getting returned is the login page. I think its because cookies aren't being passed through? But Im not sure how to fix that.