Apr-29-2020, 02:09 PM
I saw it in the German forum: https://www.python-forum.de/viewtopic.php?f=21&t=48163
This should download all pdfs.
requests, bs4 and xlrd are required
have fun
This should download all pdfs.
requests, bs4 and xlrd are required
have fun
import sys from pathlib import Path import xlrd import requests from bs4 import BeautifulSoup URLS = { "Free Emergency Nursing titles (German)": "https://resource-cms.springernature.com/springer-cms/rest/v1/content/17856246/data/v3", "Free English textbook titles (all disciplines)": "https://resource-cms.springernature.com/springer-cms/rest/v1/content/17858272/data/v4", "Free German textbook titles (all disciplines)": "https://resource-cms.springernature.com/springer-cms/rest/v1/content/17863240/data/v2", } DOWNLOAD_PATH = Path("Download") DOWNLOAD_PATH.mkdir(exist_ok=True) def get_urls(): sheets = [] for kind, url in URLS.items(): print("Downloading", kind) rep = requests.get(url) sheets.append(xlrd.open_workbook(file_contents=rep.content).sheet_by_index(0)) result = [] for sheet in sheets: iterator = zip(sheet.col_slice(0), sheet.col_slice(18)) next(iterator) for title, url in iterator: result.append((title.value, url.value)) return result def get_downloads(pages): base = "https://link.springer.com" for title, url in pages: bs = BeautifulSoup(requests.get(url).content, "html.parser") if result := bs.find("a", href=True, attrs={"class": "c-button"}): yield title, base + result.get("href") def downloader(title, url): _, _, suffix = url.rpartition(".") target = (DOWNLOAD_PATH / title).with_suffix("." + suffix) if not target.exists(): print(f"Downloading `{title}`") with target.open("wb") as fd: rep = requests.get(url, stream=True) for chunk in rep.iter_content(8196): fd.write(chunk) def download(): links_to_books = get_urls() print(f"Downloading {len(links_to_books)} eBooks") for title, url in get_downloads(links_to_books): try: downloader(title, url) except Exception as e: print(e, file=sys.stderr) if __name__ == "__main__": download()
Almost dead, but too lazy to die: https://sourceserver.info
All humans together. We don't need politicians!
All humans together. We don't need politicians!