The following will extract the page links from the web page in your last post, and print the url's that reference pages
(indexes for remaining pages)
it will also print download links and zipfile names
the actual download links appear to be like: https://www.flightsim.com/vbfs/fslib.php...&fid=64358
(indexes for remaining pages)
it will also print download links and zipfile names
the actual download links appear to be like: https://www.flightsim.com/vbfs/fslib.php...&fid=64358
import requests from bs4 import BeautifulSoup class MyAttempt: def __init__(self): self.build_catalog() def build_catalog(self): page1_url = 'https://www.flightsim.com/vbfs/fslib.php?searchid=65842563' page = self.get_page(page1_url) soup = BeautifulSoup(page, 'lxml') for link in soup.findAll('a', href=True): url = link['href'] text = link.text if 'page=' in url: print(f'page in url: {url}\ntext: {text}\n') if 'copyright' in url: print(f'actual download link: {url}\ntext: {text}\n') def get_page(self, url): ok_status = 200 page = None response = requests.get(url, allow_redirects=False) if response.status_code == ok_status: page = response.content else: print(f'Could not load url: {url}') return page if __name__ == '__main__': MyAttempt()Please note copyright!