Subtitle Downloader (Subdb and Opensubtitles)

hbknjr · Oct-23-2017, 04:50 PM

I recently discovered opensubtitle API. So here's a script that uses SubDB and OpenSubtitles to download subs.

import argparse
import os
import hashlib
import urllib.request as request
import struct
import gzip
from xmlrpc.client import ServerProxy


VIDEO_FORMATS = ['.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik', '.bix',
                    '.box', '.cam','.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli', '.flic', '.flv','.flx',
                    '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e', '.m4v', '.mjp', '.mjpeg',
                    '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4', '.mpe', '.mpeg', '.mpg',
                    '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.omf', '.ps', '.qt', '.ram', '.rm',
                    '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo', '.vob', '.vro', '.wm', '.wmv',
                    '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid']


class SubDB:
    """ API Link : https://goo.gl/n9B5J4
     """

    def __init__(self):
        self.BASEURL = "http://api.thesubdb.com/?action=download&hash="
        self.HEADERS = {
            'User-Agent': 'SubDB/1.0 (PPY/0.1; https://projectpy.ml/)',
        }

    def get_hash(self,name):
        """Originally from : https://goo.gl/n9B5J4
        """
        readsize = 64 * 1024
        with open(name, 'rb') as f:
            size = os.path.getsize(name)
            data = f.read(readsize)
            f.seek(-readsize, os.SEEK_END)
            data += f.read(readsize)
        return hashlib.md5(data).hexdigest()

    def download_subtitles(self,path,lang='en'):
        """
        :param path: The video file path for which to download subs.
        :param lang: Subtitle Language
        :return:
        """
        url = self.BASEURL + self.get_hash(path) + "&language=" + lang
        req = request.Request(url, headers=self.HEADERS)
        res = request.urlopen(req)
        if res.getcode() == 200:
            with open(os.path.splitext(path)[0] + '.srt', "wb") as sub_file:
                sub_file.write(res.read())
            print("[SubDB]Subtitles Found for: " + path)

    @staticmethod
    def get_lang():
        """
        :return: List of available subtitle languages
        """
        header = {
            'User-Agent': 'SubDB/1.0 (PPY/0.1; https://projectpy.ml/)',
        }
        url = 'http://api.thesubdb.com/?action=languages'
        req = request.Request(url, headers=header)
        res = request.urlopen(req)
        if res.getcode() == 200:
            lang = res.read().decode('utf-8').split(',')
        else:
            # currently available
            lang = ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']
        return lang


class OpenSub:
    """ Some Class Methods Are Copied From : https://goo.gl/7Yohyj
        API Link : https://goo.gl/ZN8Wc1
    """

    def __init__(self):
        self.TAGS = ['bluray', 'cam', 'dvb', 'dvd', 'hd-dvd', 'hdtv', 'ppv', 'telecine', 'telesync', 'tv', 'vhs', 'vod',
                     'web-dl', 'webrip', 'workprint']
        self.OPENSUBTITLES_SERVER = 'http://api.opensubtitles.org/xml-rpc'
        self.USER_AGENT = 'OSTestUserAgentTemp'                              # Only for testing
        self.xmlrpc = ServerProxy(self.OPENSUBTITLES_SERVER,
                                  allow_none=True)
        self.language = 'en'
        self.token = None
        self.user = ''                     # your username
        self.passw = ''                    # your password
        if self.login(self.user, self.passw):
            print("Login Successful")
        else:
            print("OpenSubtitles login Failed. This could reduce the download limit")

    def _get_from_data_or_none(self, key):
        '''Return the key recieved from data if the status is 200,
        otherwise return None.
        '''
        status = self.data.get('status').split()[0]
        return self.data.get(key) if '200' == status else None

    def login(self, username, password):
        '''Returns token is login is ok, otherwise None.
        '''
        self.data = self.xmlrpc.LogIn(username, password,
                                      self.language, self.USER_AGENT)
        token = self._get_from_data_or_none('token')
        if token:
            self.token = token
        return token

    def logout(self):
        '''Returns True if logout is ok, otherwise None.
        '''
        data = self.xmlrpc.LogOut(self.token)
        return '200' in data.get('status')

    def search_subtitles(self, params):
        '''Returns a list with the subtitles info.
        '''
        self.data = self.xmlrpc.SearchSubtitles(self.token, params)
        return self._get_from_data_or_none('data')

    def download_subtitles(self, path,lang=''):
        """
        :param path: The video file path for which to download subs.
        :param lang: Subtitle Language
        :return:
        """
        payload = [self.create_payload(path,lang)]
        search_result = self.search_subtitles(payload)
        if search_result:
            dllink = self.analyse_result(search_result)
            gzfile = request.urlopen(dllink)
            try:
                with gzip.open(gzfile, 'rb') as f:
                    with open(os.path.splitext(path)[0] + '.srt', 'wb') as sub_file:
                        sub_file.write(f.read())
                        print("[OpenSubtitles]Subtitles Found for: " + path)
            except PermissionError:
                print("Permision Error: when creating subtitles for {}:".format(path))

    def analyse_result(self, result):
        """
        :param result: Search result to find appropriate subtitles
        :return: Download Link of best match for subtitles
        """
        score = 0
        dllink = None
        for record in result:
            if record.get('Score', 0) > score:
                score = record.get('Score', 0)
                dllink = record.get('SubDownloadLink')
                print(record.get('sublanguageid'))
        return dllink

    def get_tags(self, path):
        """
        :param path: The video file path for which to download subs.
        :return: tags on video
        """
        name = os.path.basename(path).lower()
        tags = []
        for word in self.TAGS:
            if word in name:
                tags.append(word)
        return tags

    def create_payload(self, path, lang):
        """
        :param path: The video file path for which to download subs.
        :param lang: Subtitle Language
        :return: Payload containing data about file
        """
        payload = {}
        payload['moviebytesize'] = str(os.path.getsize(path))
        payload['sublanguageid'] = lang
        tags = self.get_tags(path)
        if tags:
            payload['tags'] = ','.join(tags)
        payload['moviehash'] = self.get_hash(path)
        return payload

    def get_hash(self, path):
        '''Original from: http://goo.gl/qqfM0
        '''
        size = os.path.getsize(path)
        longlongformat = 'q'  # long long
        bytesize = struct.calcsize(longlongformat)

        try:
            f = open(path, "rb")
        except(IOError):
            return "IOError"

        hash = int(size)

        if int(size) < 65536 * 2:
            return "SizeError"

        for x in range(65536 // bytesize):
            buffer = f.read(bytesize)
            (l_value,) = struct.unpack(longlongformat, buffer)
            hash += l_value
            hash = hash & 0xFFFFFFFFFFFFFFFF  # to remain as 64bit number

        f.seek(max(0, int(size) - 65536), 0)
        for x in range(65536 // bytesize):
            buffer = f.read(bytesize)
            (l_value,) = struct.unpack(longlongformat, buffer)
            hash += l_value
            hash = hash & 0xFFFFFFFFFFFFFFFF

        f.close()
        returnedhash = "%016x" % hash
        return str(returnedhash)


def down_sub(pathlist,lang =''):
    """Driver function to find subtitles with SubDB and OpenSubtitle
    """
    downloader_subdb = SubDB()              # SUBDB
    downloader_os = OpenSub()                 # OpenSubtitles
    for path in pathlist:
        if not os.path.exists(os.path.splitext(path)[0] + '.srt'):
            try:
                downloader_subdb.download_subtitles(path, lang)
            except:
                downloader_os.download_subtitles(path,lang)


def is_video(filepath):
    ext = os.path.splitext(filepath)[1]
    if ext in VIDEO_FORMATS:
        return True
    else:
        return False


def recursive_search(directory, all_vids=[]):
    """:param directory: Path of Directory to be searched recursively
       :param all_vids:  All video files
       :return: Path string of all video files in a directory/subdirectory
    """
    try:
        for entry in os.scandir(directory):
            if entry.is_dir():
                all_vids + (recursive_search(entry.path))
            elif entry.is_file():
                if is_video(entry.path):
                    all_vids.append(entry.path)
    except PermissionError as e:
        print(e)
    return all_vids


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('path', help='Path to the file or directory', type=str)
    parser.add_argument('-l','--language', help='Languages of subtitle as per ISO 639-1 codes',choices=SubDB.get_lang())
    args = parser.parse_args()
    path = args.path
    lang = 'en'
    if args.language:
        lang = args.language

    if os.path.isdir(path):
        down_sub(recursive_search(path),lang)
    else:
        if is_video(path):
            down_sub([path],lang)


if __name__ == "__main__":
    main()

I've also recycled my minor project as python scripts and project sharing hub Cool

. Its a Django based website check it out.
ProjectPy

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	YouTube Video Downloader	BlazingWarlord	1	5,329	May-31-2021, 09:57 AM Last Post: DeaD_EyE
	The Python Tkinter Youtube Audio Downloader for Linux	rootVIII	0	3,100	Dec-19-2018, 11:33 PM Last Post: rootVIII

Subtitle Downloader (Subdb and Opensubtitles)

User Panel Messages

Announcements