Python Forum

Full Version: Code Assistance needed in saving the file
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi Pythonians,

I am trying to retrieve data from MagicBricks,
The code runs fine, however when the file is saved, there is no data stored.

Request you to have a check and provide your expertise in how can I approach this problem.
I am posting the code below.


import os
from bs4 import BeautifulSoup
from selenium import webdriver
import sys
import json


chromedriver = "C:/Users/MithunT/OneDrive/Chrome Driver/chromedriver.exe" # path to the chromedriver executable
chromedriver = os.path.expanduser(chromedriver)
print('chromedriver path: {}'.format(chromedriver))
sys.path.append(chromedriver)
driver = webdriver.Chrome(chromedriver)


def get_house_links(driver):
    house_links=[]
    cities=['Hyderabad']
    for city in cities:
        for i in range(1,100):
            driver.get("https://www.magicbricks.com/property-for-sale/residential-real-estate?proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment,Residential-House,Villa,Residential-Plot&page="+str(i)+"&cityName="+str(city))
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            get_data(soup)
            
            
            
def get_data(soup):
    h=[]
    prop=soup.find_all('div',class_='m-srp-card SRCard')
    for i in prop:
        name=""
        id_=""
        description=""
        url=""
        price=""
        priceInWord=""
        cityName=""
        addressLocality=""
        longitude=""
        latitude=""
        numberOfRooms=""
        bathroom=""
        bedroom=""
        floorSize=""
        floorno=""
        furnshingstatus=""
        agentName=""
        agentCompanyName=""
        agentMaskedmobilenumber=""
        meta=i.find_all('meta')
        for m in meta:
            if m['itemprop']=='name':
                name=m['content']
            if m['itemprop']=='description':
                description=m['content']
            if m['itemprop']=='url':
                url=str('https://www.magicbricks.com'+m['content'])
            if m['itemprop']=='addressLocality':
                addressLocality=m['content']
            if m['itemprop']=='longitude':
                longitude=m['content']
            if m['itemprop']=='latitude':
                latitude=m['content']
            if m['itemprop']=='numberOfRooms':
                numberOfRooms=m['content']
            if m['itemprop']=='floorSize':
                floorSize=m['content']
        s=i.find('span',class_='hidden')
        bathroom=s['data-bathroom']
        bedroom=s['data-bedroom']
        floorno=s['data-floorno']   
        furnshingstatus=s['data-furnshingstatus']
        price=s['data-price']
        id_=s['id']
        ag=soup.find('span',id=id_)
        priceInWord=ag['data-priced']
        cityName=ag['data-cityname']
        agentName=ag['data-soname']
        agentCompanyName=ag['data-companyname']
        agentMaskedmobilenumber=ag['data-maskedmobilenumber']
        var = {
                "name":name,
                "id":id_,
                "description":description,
                "url":url,
                "price":price,
                "priceInWord":priceInWord,
                "location":
                        {
                            "cityName":cityName,
                            "addressLocality":addressLocality,
                            "longitude":longitude,
                            "latitude":latitude,
                        },
                "flatDetails":
                        {
                            "numberOfRooms":numberOfRooms,
                            "bathroom":bathroom,
                            "bedroom":bedroom,
                            "floorSize":floorSize,
                            "floorno":floorno,
                            "furnshingstatus":furnshingstatus,
                        },
                "agentDetails":
                        {
                            "agentName":agentName,
                            "agentCompanyName":agentCompanyName,
                            "agentMaskedmobilenumber":agentMaskedmobilenumber,
                        }

            }
        #print(var)
        h.append(var)
    with open("data.json",'r+') as f:
        feeds = json.load(f)
        for i in h:
            feeds['property'].append(var)
        f.seek(0)
        json.dump(feeds, f)
        
        
v={
    "property":[]
}
with open("data.json", mode='w', encoding='utf-8') as f:
    json.dump(v,f)
    

    
get_house_links(driver)