Hi, I'm trying to get a value from the soup that i created.
Here's what I found:
var next_chapter = "https://www.lelscan-vf.com/manga/vinland-saga/165"
But the problem is that i would like to get the value from the "var next_chapter"
Here's what I tried(even if it's nonsense):
a= soup.find('var next_chapter')
NextUrl = a['var next_chapter']
Thanks for helping
Did you fetch the page?
and then create soup from the html?
Please show complete code
Here it is:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import os
if __name__ == '__main__':
def dossier():
os.chdir("C://Users//Ridha//Desktop//Web-Scrapper")
dossier()
moit="https://www.lelscan-vf.com/manga/"
url="https://www.lelscan-vf.com/manga/vinland-saga/164"
def Navigate(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
L= Img(soup)
return soup
def Img(soup):
L = []
images = soup.findAll('img')
for lien in images:
if 'class' in lien.attrs:
if lien['class']==['img-responsive']:
a= lien['data-src']
L.append(a)
return L
def Next(soup):
a= soup.find('var next_chapter')
NextUrl = a['var next_chapter']
if NextUrl==""
NextUrl="End"
else:
NextUrl=a['var next_chapter']
return NextUrl
If you want to try the code , just type in the console Navigate("https://www.lelscan-vf.com/manga/vinland-saga/164"
)
It ll give you the soup. And in the soup is the variable. I would like to get it and return it. Thanks for helping mate
url works for me Larz60+.
You are doing unnecessary stuff Reldaing.
in
Img(soup)
function it could just be
image = soup.find('img', class_="img-responsive scan-page")
Here go directly to the to the img tag and take out url from
src
attribute.
Example.
import requests
from bs4 import BeautifulSoup
url = 'https://www.lelscan-vf.com/manga/vinland-saga/164'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')
image = soup.find('img', class_="img-responsive scan-page")
image_url = image.get('src').strip()
print(image_url)
Output:
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/164/01.png
The
next_chapter
here can just use url and increment bye one.
import requests
from bs4 import BeautifulSoup
def manga(start_img, stop_img):
for page in range(start_img, stop_img):
url = f'https://www.lelscan-vf.com/manga/vinland-saga/{page}'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')
image = soup.find('img', class_="img-responsive scan-page")
image_url = image.get('src').strip()
print(image_url)
if __name__ == '__main__':
start_img = 164
stop_img = 169
manga(start_img, stop_img)
Output:
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/164/01.png
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/165/01.png
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/166/01.png
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/167/01.png
https://lelscan-vf.com/uploads/manga/vinland-saga/chapters/168/01.png
So maybe your or task is to downloads these images,try that or if just want url then this should do it.
Hey , sorry for bothering you, but y teacher watns it o be like this. Then , It ll be referenced in the main file "Download.py" He made many websites with mangas like mangareader etc... And he used them in a class. the ur that I pasted you is onl an example, It could be any of the mangas in the website.
Here's the main code . He made other imports but I m going to give you ny the one that I tried . If you need the others to try , just tell me. Thanks a lot.(ps: You only have to instantiate the class and then type self.Initsoup()
import requests
import urllib.request
import os
from PIL import Image
import shutil
if __name__ == '__main__':
def dossier():
os.chdir("C://Users//Ridha//Desktop//Web-Scrapper")
dossier()
import lelscan as LS
path = r"C:\Users\Ridha\Desktop\MangaScrapper"
def Download(download_url,name):
if download_url!="End"
req = urllib.request.Request(download_url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urllib.request.urlopen(req).read()
open(name + '.jpg','wb').write(web_byte)
class Site:
def __init__(self,url,Titre):
self.url = url
self.soup = ""
self.ListeLiens = []
self.Titre = Titre
self.chapter = ""
self.compteur = 0
def Initialisation(self):
os.chdir(path)
if self.Titre not in os.listdir():
os.mkdir(self.Titre)
os.chdir(self.Titre)
with open('Titre.txt','w+') as file:
file.write(self.Titre)
def Navigate(self):
if 'lelscan' in self.url:
[self.soup,self.ListeLiens] = LS.Navigate(self.url)
def Next(self):
if self.url != "End":
with open("LastUrl.txt","w+") as file:
file.write(self.url)
if 'lelscan' in self.url:
self.url = LS.Next(self.soup)
def DownloadListe(self):
for lien in self.ListeLiens:
self.compteur +=1
Download(lien,f"{self.compteur:05d}")
def InitSoup(self):
while self.url != "End":
self.Initialisation()
self.Navigate()
self.DownloadListe()
self.Next()