![]() |
Code Assistance needed in saving the file - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: Code Assistance needed in saving the file (/thread-38412.html) |
Code Assistance needed in saving the file - MithunT - Oct-09-2022 Hi Pythonians, I am trying to retrieve data from MagicBricks, The code runs fine, however when the file is saved, there is no data stored. Request you to have a check and provide your expertise in how can I approach this problem. I am posting the code below. import os from bs4 import BeautifulSoup from selenium import webdriver import sys import json chromedriver = "C:/Users/MithunT/OneDrive/Chrome Driver/chromedriver.exe" # path to the chromedriver executable chromedriver = os.path.expanduser(chromedriver) print('chromedriver path: {}'.format(chromedriver)) sys.path.append(chromedriver) driver = webdriver.Chrome(chromedriver) def get_house_links(driver): house_links=[] cities=['Hyderabad'] for city in cities: for i in range(1,100): driver.get("https://www.magicbricks.com/property-for-sale/residential-real-estate?proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment,Residential-House,Villa,Residential-Plot&page="+str(i)+"&cityName="+str(city)) soup = BeautifulSoup(driver.page_source, 'html.parser') get_data(soup) def get_data(soup): h=[] prop=soup.find_all('div',class_='m-srp-card SRCard') for i in prop: name="" id_="" description="" url="" price="" priceInWord="" cityName="" addressLocality="" longitude="" latitude="" numberOfRooms="" bathroom="" bedroom="" floorSize="" floorno="" furnshingstatus="" agentName="" agentCompanyName="" agentMaskedmobilenumber="" meta=i.find_all('meta') for m in meta: if m['itemprop']=='name': name=m['content'] if m['itemprop']=='description': description=m['content'] if m['itemprop']=='url': url=str('https://www.magicbricks.com'+m['content']) if m['itemprop']=='addressLocality': addressLocality=m['content'] if m['itemprop']=='longitude': longitude=m['content'] if m['itemprop']=='latitude': latitude=m['content'] if m['itemprop']=='numberOfRooms': numberOfRooms=m['content'] if m['itemprop']=='floorSize': floorSize=m['content'] s=i.find('span',class_='hidden') bathroom=s['data-bathroom'] bedroom=s['data-bedroom'] floorno=s['data-floorno'] furnshingstatus=s['data-furnshingstatus'] price=s['data-price'] id_=s['id'] ag=soup.find('span',id=id_) priceInWord=ag['data-priced'] cityName=ag['data-cityname'] agentName=ag['data-soname'] agentCompanyName=ag['data-companyname'] agentMaskedmobilenumber=ag['data-maskedmobilenumber'] var = { "name":name, "id":id_, "description":description, "url":url, "price":price, "priceInWord":priceInWord, "location": { "cityName":cityName, "addressLocality":addressLocality, "longitude":longitude, "latitude":latitude, }, "flatDetails": { "numberOfRooms":numberOfRooms, "bathroom":bathroom, "bedroom":bedroom, "floorSize":floorSize, "floorno":floorno, "furnshingstatus":furnshingstatus, }, "agentDetails": { "agentName":agentName, "agentCompanyName":agentCompanyName, "agentMaskedmobilenumber":agentMaskedmobilenumber, } } #print(var) h.append(var) with open("data.json",'r+') as f: feeds = json.load(f) for i in h: feeds['property'].append(var) f.seek(0) json.dump(feeds, f) v={ "property":[] } with open("data.json", mode='w', encoding='utf-8') as f: json.dump(v,f) get_house_links(driver) |