Python Forum

Full Version: BeautifulSoup and pagination.
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
I have a script that download MP3  on the site. I don't know how to pass other page to download other music after downloading of all music on the page. It seem to me that I should add some nambers to url but I realy cannot figure it out.
Here is my code:
#!/usr/bin/python3.4
#-*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import requests
import os
from datetime import datetime


def main():

    start = datetime.now()    

    url = 'https://muzofond.org/search/e%20mantra'
    html = requests.get(url).text

    soup = BeautifulSoup(html, 'lxml')
    
    os.system('clear')
    print('-----' * 10)

    name = []
    href = []
    for x in soup.find_all('a', 'dl'):
        #print(x.get('download'))
        #print(x.get('download'), x.get('href'))
        name.append(x.get('download'))
        href.append(x.get('href'))

    name.pop(0)
    href.pop(0)
    
    dirname = '/home/mikefromru/music/E-mantra/'

    print('Download...')
    numberSong = len(name)
    i = 0
    while i != len(name):
        e_url = requests.get(href[-1], stream=True)
        f = open(dirname + name[-1], 'wb')
        f.write(e_url.content)
        f.close()
        print(numberSong, '-', name[-1])
        href.pop()
        name.pop()
        numberSong -=1

    end = datetime.now()
    total = end - start
    print('The program was warking for {} min'.format(str(total)))
    print('done')


if __name__ == '__main__':
    main()
Can do it in a loop with string formatting.
import time

for page in range(1,5):
    time.sleep(2)
    url = 'https://muzofond.org/search/e%20mantra/{}'.format(page)
    print(url)
Output:
https://muzofond.org/search/e%20mantra/1 https://muzofond.org/search/e%20mantra/2 https://muzofond.org/search/e%20mantra/3 https://muzofond.org/search/e%20mantra/4
If something block when downloading more than one page,
then it's fine task to train on launching parallel tasks.