Jan-17-2018, 08:26 AM
hi,
I am trying to create a webscraper which will take links from csv file and scrape data.
but whenever i run this code it says object of nonetype has no len().
Below is my code:
error is receive is :
I am trying to create a webscraper which will take links from csv file and scrape data.
but whenever i run this code it says object of nonetype has no len().
Below is my code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import requests from bs4 import BeautifulSoup import time import os import csv file = {} final_data = [] def read_file(): global file f = open ( "Data.csv" ) for line in f.readlines(): data = line.split( "," ) file [data[ 0 ]] = data[ 1 ] def writedata(alldata1, filename): with open ( "./" + filename, "w" )as csvfile: csvfile = csv.writer(csvfile, delimiter = "," ) csvfile.writerow("") for i in range ( 0 , len (alldata1)): csvfile.writerow(alldata1[i]) def parse_data(): global file global final_data for data_obj in file .keys(): link = file [data_obj] #print(link) data = getbyget(link, {}) soup = BeautifulSoup(data, "html.parser" ) get_details = soup.find_all( class_ = "box2-body" )[ 0 ] details = get_details.find_all( id = "ctl00_ContentPlaceHolder1_Lblproposedenddt" ) name = get_details.find_all( id = "ctl00_ContentPlaceHolder1_lblprojectname" ) date = "" names = "" sublist = [] for more in details: date = more.text sublist.append(date) for nme in name: names = nme.text sublist.append(names) get_state = soup.find_all( class_ = "box2-body" )[ 1 ] state = get_state.find_all( id = "ctl00_ContentPlaceHolder1_lblState" ) city = get_state.find_all( id = "ctl00_ContentPlaceHolder1_lbldistrict" ) cities = "" states = "" for statename in state: states = statename.text sublist.append(states) for citys in city: cities = citys.text sublist.append(cities) get_pmptername = soup.find_all( class_ = "box2-body" )[ 2 ] promotername = get_pmptername.find_all( id = "ctl00_ContentPlaceHolder1_Lblpromotername" ) geta = get_pmptername.find_all( "a" ) alinks = "" promote = "" for promoter in promotername: promote = promoter.text sublist.append(promote) for linka in geta: sublist.append(alinks) final_data.append(sublist) print (final_data) def getbyget(url, values): res = requests.get(url, data = values) data = res.text return data def main(): read_file() datas = parse_data() writedata(datas, "UP_DATA.csv" ) main() |
Error:Traceback (most recent call last):
File "C:\Users\prince.bhatia\Desktop\up_additional\up_rera_additional_details - Copy.py", line 80, in <module>
main()
File "C:\Users\prince.bhatia\Desktop\up_additional\up_rera_additional_details - Copy.py", line 79, in main
writedata(datas, "UP_DATA.csv")
File "C:\Users\prince.bhatia\Desktop\up_additional\up_rera_additional_details - Copy.py", line 21, in writedata
for i in range(0, len(alldata1)):
TypeError: object of type 'NoneType' has no len()
i have append data into final_data so it should load data into csv but it throws error for no len(), can someone please help on this?