Apr-17-2020, 01:51 PM
from bs4 import BeautifulSoup, SoupStrainer import requests, re def main(): #request ile metin çekilir req = requests.get('http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415340_253892949') soup = BeautifulSoup(req.content,"lxml") # '<a href="#__RefHeading__1419338_253892949">19.905 xhtml:about</a>'' containers = soup.find_all(['tr','td']) filename = "basliklar.txt" f = open(filename, "w") headers = "baslik, link\n" f.write(headers) #başlık ve ona karşılık gelen veri çekilir. #tag'e karşılık bir veri yok!! tag = container.nextSibling.text for container in containers: if container.nextSibling == None: baslik = container.text f.write(baslik + "\n") else: links=([link.get('href')for link in soup.find_all('a')]) print(links) f.close() if __name__ == "__main__": main()