Something like this.
Eg should work for all kind of list,so here a one with 3 pages and two singe pages.
import requests from bs4 import BeautifulSoup cats = ['romance_8', 'childrens_11', 'travel_2'] pages = [] for book in cats: url = f'https://books.toscrape.com/catalogue/category/books/{book}' #url = 'http://books.toscrape.com/catalogue/category/books/travel_2' # one page response = requests.get(url) soup = BeautifulSoup(response.content, 'lxml') singel_page = soup.select_one('li.next > a') if singel_page is None: pages.append(response.url) else: # Here just set a high number and break out if not 200 for page_nr in range(1, 100): gen_url = f'{url}/page-{page_nr}.html' page = requests.get(gen_url) if page.status_code == 200: pages.append(page.url) else: break
>>> pages ['https://books.toscrape.com/catalogue/category/books/romance_8/page-1.html', 'https://books.toscrape.com/catalogue/category/books/romance_8/page-2.html', 'https://books.toscrape.com/catalogue/category/books/childrens_11/page-1.html', 'https://books.toscrape.com/catalogue/category/books/childrens_11/page-2.html', 'http://books.toscrape.com/catalogue/category/books/travel_2/']Some points i also check for single pages that not has
next
page,so that it expand list it will add also those pages. Eg should work for all kind of list,so here a one with 3 pages and two singe pages.
cats = ['fantasy_19', 'travel_2', 'science_22'] >>> pages ['https://books.toscrape.com/catalogue/category/books/fantasy_19/page-1.html', 'https://books.toscrape.com/catalogue/category/books/fantasy_19/page-2.html', 'https://books.toscrape.com/catalogue/category/books/fantasy_19/page-3.html', 'http://books.toscrape.com/catalogue/category/books/travel_2/', 'http://books.toscrape.com/catalogue/category/books/science_22/']