Hi Pythonians,
I am trying to retrieve data from MagicBricks,
The code runs fine, however when the file is saved, there is no data stored.
Request you to have a check and provide your expertise in how can I approach this problem.
I am posting the code below.
I am trying to retrieve data from MagicBricks,
The code runs fine, however when the file is saved, there is no data stored.
Request you to have a check and provide your expertise in how can I approach this problem.
I am posting the code below.
import os from bs4 import BeautifulSoup from selenium import webdriver import sys import json chromedriver = "C:/Users/MithunT/OneDrive/Chrome Driver/chromedriver.exe" # path to the chromedriver executable chromedriver = os.path.expanduser(chromedriver) print('chromedriver path: {}'.format(chromedriver)) sys.path.append(chromedriver) driver = webdriver.Chrome(chromedriver) def get_house_links(driver): house_links=[] cities=['Hyderabad'] for city in cities: for i in range(1,100): driver.get("https://www.magicbricks.com/property-for-sale/residential-real-estate?proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment,Residential-House,Villa,Residential-Plot&page="+str(i)+"&cityName="+str(city)) soup = BeautifulSoup(driver.page_source, 'html.parser') get_data(soup) def get_data(soup): h=[] prop=soup.find_all('div',class_='m-srp-card SRCard') for i in prop: name="" id_="" description="" url="" price="" priceInWord="" cityName="" addressLocality="" longitude="" latitude="" numberOfRooms="" bathroom="" bedroom="" floorSize="" floorno="" furnshingstatus="" agentName="" agentCompanyName="" agentMaskedmobilenumber="" meta=i.find_all('meta') for m in meta: if m['itemprop']=='name': name=m['content'] if m['itemprop']=='description': description=m['content'] if m['itemprop']=='url': url=str('https://www.magicbricks.com'+m['content']) if m['itemprop']=='addressLocality': addressLocality=m['content'] if m['itemprop']=='longitude': longitude=m['content'] if m['itemprop']=='latitude': latitude=m['content'] if m['itemprop']=='numberOfRooms': numberOfRooms=m['content'] if m['itemprop']=='floorSize': floorSize=m['content'] s=i.find('span',class_='hidden') bathroom=s['data-bathroom'] bedroom=s['data-bedroom'] floorno=s['data-floorno'] furnshingstatus=s['data-furnshingstatus'] price=s['data-price'] id_=s['id'] ag=soup.find('span',id=id_) priceInWord=ag['data-priced'] cityName=ag['data-cityname'] agentName=ag['data-soname'] agentCompanyName=ag['data-companyname'] agentMaskedmobilenumber=ag['data-maskedmobilenumber'] var = { "name":name, "id":id_, "description":description, "url":url, "price":price, "priceInWord":priceInWord, "location": { "cityName":cityName, "addressLocality":addressLocality, "longitude":longitude, "latitude":latitude, }, "flatDetails": { "numberOfRooms":numberOfRooms, "bathroom":bathroom, "bedroom":bedroom, "floorSize":floorSize, "floorno":floorno, "furnshingstatus":furnshingstatus, }, "agentDetails": { "agentName":agentName, "agentCompanyName":agentCompanyName, "agentMaskedmobilenumber":agentMaskedmobilenumber, } } #print(var) h.append(var) with open("data.json",'r+') as f: feeds = json.load(f) for i in h: feeds['property'].append(var) f.seek(0) json.dump(feeds, f) v={ "property":[] } with open("data.json", mode='w', encoding='utf-8') as f: json.dump(v,f) get_house_links(driver)
Larz60+ write Oct-09-2022, 10:34 PM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Fixed for you this time. Please use BBCode tags on future posts.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Fixed for you this time. Please use BBCode tags on future posts.