Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Resolving YouTube search links
#10
save this as youtubesearch.py in the same folder
(Credits: searchyoutube made by LBLZR_ https://github.com/LaBlazer/searchyt)

import requests
import logging
import json
import re

class searchyt(object):
    ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
    config_regexp = re.compile(r'ytcfg\.set\(({.+?})\);')

    def __init__(self):
        self.req = requests.Session()
        self.log = logging.getLogger("ytsearch")
        headers = {"connection": "keep-alive",
                    "pragma": "no-cache",
                    "cache-control": "no-cache",
                    "upgrade-insecure-requests": "1",
                    "user-agent": searchyt.ua,
                    "accept": "*/*",
                    "accept-language": "en-US,en;q=0.9",
                    "referer": "https://www.youtube.com/",
                    "dnt": "1",
                    "maxResults": "500"}
        self.req.headers.update(headers)
        self._populate_headers()
    
    def _populate_headers(self):
        resp = self.req.get("https://www.youtube.com/")

        if resp.status_code != 200:
            self.log.debug(resp.text)
            raise Exception(f"error while scraping youtube (response code {resp.status_code})")

        result = searchyt.config_regexp.search(resp.text)
        if not result:
            self.log.debug(resp.text)
            raise Exception(f"error while searching for configuration")

        config = json.loads(result.group(1))
        if not config:
            self.log.debug(resp.text)
            raise Exception(f"error while parsing headers")

        updated_headers = {
            "x-spf-referer": "https://www.youtube.com/",
            "x-spf-previous": "https://www.youtube.com/",
            "x-youtube-utc-offset": "120",
            "x-youtube-client-name": str(config["INNERTUBE_CONTEXT_CLIENT_NAME"]),
            "x-youtube-variants-checksum": str(config["VARIANTS_CHECKSUM"]),
            "x-youtube-page-cl" : str(config["PAGE_CL"]),
            "x-youtube-client-version": str(config["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
            "x-youtube-page-label": str(config["PAGE_BUILD_LABEL"])
        }
        self.log.debug(f"Headers: {updated_headers}")
        self.req.headers.update(updated_headers)

    def _traverse_data(self, data, match):
        # list
        if isinstance(data, list):
            for d in data:
                if isinstance(d, (dict, list)):
                    yield from self._traverse_data(d, match)
            return
        
        # dict
        for key, value in data.items():
            #print(key)
            # if key matches
            if key == match:
                yield value
            if isinstance(value, (dict, list)):
                yield from self._traverse_data(value, match)

    def _parse_videos(self, json_result):
        try:
            json_dict = json.loads(json_result)[1]

            #self.log.debug(json_dict)
            videos = []
            for v in self._traverse_data(json_dict, "videoRenderer"):
                vid = {}
                vid['title'] = v['title']['runs'][0]['text']
                vid['author'] = v['ownerText']['runs'][0]['text']
                vid['id'] = v["videoId"]
                vid['thumb'] = v['thumbnail']['thumbnails'][-1]['url'].split('?', maxsplit=1)[0]
                videos.append(vid)

            return videos
        except Exception as ex:
            self.log.debug(json_result)
            raise ex

    def search(self, query):
        if not isinstance(query, str):
            raise Exception("search query must be a string type")
        
        resp = self.req.get("https://www.youtube.com/results", params = {"search_query": query, "pbj": "1"})

        if resp.status_code != 200:
            self.log.debug(resp.text)
            raise Exception(f"error while getting search results page (status code {resp.status_code})")

        return self._parse_videos(resp.text)
then as an example

import searchyoutube

syt = searchyoutube.searchyt()
findList = []

def find_it(searchtext):
    res = syt.search(searchtext)
    if len(res):
        for x in range(len(res)):
            title = res[x].get("title")
            id = res[x].get("id")
            url = f"https://www.youtube.com/watch?v={id}"
            findList.append(f"{str(x + 1)}:\n{title}\n{id}\n{url}")
        return '\n'.join(findList)

print(find_it(f"Movie Trailer 2020"))
Reply


Messages In This Thread
Resolving YouTube search links - by pythonnewbie138 - Jul-28-2020, 04:20 PM
RE: Resolving YouTube search links - by Larz60+ - Jul-28-2020, 04:28 PM
RE: Resolving YouTube search links - by Larz60+ - Aug-01-2020, 02:09 AM
RE: Resolving YouTube search links - by j.crater - Aug-01-2020, 08:08 AM
RE: Resolving YouTube search links - by j.crater - Aug-01-2020, 07:16 PM
RE: Resolving YouTube search links - by Axel_Erfurt - Aug-01-2020, 08:13 PM
RE: Resolving YouTube search links - by snippsat - Aug-02-2020, 12:00 AM
RE: Resolving YouTube search links - by snippsat - Aug-02-2020, 10:27 PM
RE: Resolving YouTube search links - by snippsat - Aug-04-2020, 04:17 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  webscrapping links and then enter those links to scrape data kirito85 2 3,343 Jun-13-2019, 02:23 AM
Last Post: kirito85

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020