Python Forum

Full Version: Web Scraping Error : Not getting expected result
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi,

I am a beginner in Python programming. Of late, I have started learning web scraping after completing python programming course from Udemy. I was trying to scrape this website but unfortunately could not get the csv file. Moreover, total number is also not printing. There maybe more errors. Can you please figure it out and help me with simple explanation. I am just a couple of days old in web scraping.

from bs4 import BeautifulSoup
import requests
import pandas as pd

url = 'https://www.programmableweb.com/apis/directory'

api_dict = {}
api_no = 0
while True:
    response = requests.get(url)
    data = response.text
    soup = BeautifulSoup(data, 'html.parser')
    apis = soup.find_all('td',{'class':'views-field views-field-title col-md-3'})


    for api in apis:
        name = api.find('a').text
        api_no += 1 
        #print(name)
    url_tag = soup.find('a',{'title':'Go to next page'})
    if url_tag.get('href'):
        url = 'https://www.programmableweb.com' + url_tag.get('href')
        #print(url)
    else:
        break
    
print('Total APIs: ',api_no)
api_dict_df = pd.DataFrame.from_dict(api_dict, orient = 'index', columns = ['API name'])
api_dict_df.head()
api_dict_df.to_csv('api_detail.csv')
from bs4 import BeautifulSoup
import requests
import pandas as pd

url = 'https://www.programmableweb.com/apis/directory'

api_dict = {}
api_no = 0
while True:
    response = requests.get(url)
    data = response.text
    soup = BeautifulSoup(data, 'html.parser')
    apis = soup.find_all('td',{'class':'views-field views-field-title col-md-3'})


    for api in apis:
        name = api.find('a').text
        api_no += 1 
        #print(name)
    url_tag = soup.find('a',{'title':'Go to next page'})
    if url_tag.get('href'):
        url = 'https://www.programmableweb.com' + url_tag.get('href')
        #print(url)
    else:
        break
    
print('Total APIs: ',api_no)
api_dict_df = pd.DataFrame.from_dict(api_dict, orient = 'index', columns = ['API name'])
api_dict_df.head()
api_dict_df.to_csv('api_detail.csv')
This code will show what's available the group you're interested in (prints out all tr's and td's)
All you have to do is select the elements that you want, and then extract that data.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sys

def scrape_page():
    url = 'https://www.programmableweb.com/apis/directory'
    
    api_dict = {}
    api_no = 0
    # while True:
    response = requests.get(url)
    if response.status_code == 200:
        data = response.text
    else:
        print(f"Unable to fetch page, bad status: {response.status_code}")
        sys.exit(-1)
    soup = BeautifulSoup(data, 'html.parser')
    tbody = soup.select('.views-table > tbody:nth-child(2)')[0]
    trs = tbody.find_all('tr')
    for n, tr in enumerate(trs):
        tds = tr.find_all('td')
        for n1, td in enumerate(tds):
            print(f"\n--------------------- tr_{n}, td_{n1} ---------------------")
            print(td)    
(Oct-08-2019, 03:52 AM)Larz60+ Wrote: [ -> ]This code will show what's available the group you're interested in (prints out all tr's and td's)
All you have to do is select the elements that you want, and then extract that data.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sys

def scrape_page():
    url = 'https://www.programmableweb.com/apis/directory'
    
    api_dict = {}
    api_no = 0
    # while True:
    response = requests.get(url)
    if response.status_code == 200:
        data = response.text
    else:
        print(f"Unable to fetch page, bad status: {response.status_code}")
        sys.exit(-1)
    soup = BeautifulSoup(data, 'html.parser')
    tbody = soup.select('.views-table > tbody:nth-child(2)')[0]
    trs = tbody.find_all('tr')
    for n, tr in enumerate(trs):
        tds = tr.find_all('td')
        for n1, td in enumerate(tds):
            print(f"\n--------------------- tr_{n}, td_{n1} ---------------------")
            print(td)    

Thank you very much for the solution. As you can understand, I am just a few days old in web scraping. Can you also suggest one good tutorial which I should learn from?

Regards,
Ravi
(Oct-08-2019, 07:53 AM)adminravi Wrote: [ -> ]I am just a few days old in web scraping. Can you also suggest one good tutorial which I should learn from?
Have a couple here about this topic.
Web-Scraping part-1
Web-Scraping part-2