Aug-26-2018, 07:50 PM
Odd. I'm confused. I'll post code but no one is going to want to figure out the error. The code is borrowed from something on GitHub (mostly) and I've been away from it for a while. So it's not fresh in my head. Anywho, here it is:
import struct, datetime matroska_tags = { 0x1A45DFA3: ('MASTER', 'EBML'), 0x4286: ('UINT', 'EBML Version'), 0x42F7: ('UINT', 'EBML Read Version'), 0x42F2: ('UINT', 'EBML Max ID Length'), 0x42F3: ('UINT', 'EBML Max Size Length'), 0x4282: ('STRING', 'Doc Type'), 0x4287: ('UINT', 'Doc Type Version'), 0x4285: ('UINT', 'Doc Type Read Version'), 0xEC: ('BINARY', 'Void Data'), 0xBF: ('BINARY', 'CRC-32'), 0x18538067: ('MASTER', 'Segment'), 0x114D9B74: ('MASTER', 'Seek Head'), 0x4DBB: ('MASTER', 'Seek'), 0x53AB: ('BINARY', 'Seek ID'), 0x53AC: ('UINT', 'Seek Position'), 0x1549A966: ('MASTER', 'Information'), 0x73A4: ('BINARY', 'Segment UID'), 0x7384: ('STRING-8', 'Segment Filename'), 0x3CB923: ('BINARY', 'Previous UID'), 0x3C83AB: ('STRING-8', 'Previous Filename'), 0x3EB923: ('BINARY', 'Next UID'), 0x3E83BB: ('STRING-8', 'Next Filename'), 0x4444: ('BINARY', 'Segment Family'), 0x6924: ('MASTER', 'Chapter Translate'), 0x69FC: ('UINT', 'Chapter Translate Edition UID'), 0x69BF: ('UINT', 'Chapter Translate Codec'), 0x69A5: ('BINARY', 'Chapter Translate ID'), 0x2AD7B1: ('UINT', 'Timecode Scale'), 0x4489: ('FLOAT', 'Duration'), 0x4461: ('DATE', 'Date UTC'), 0x7BA9: ('STRING-8', 'Title'), 0x4D80: ('STRING-8', 'Muxing App'), 0x5741: ('STRING-8', 'Writing App'), 0x1F43B675: ('MASTER', 'Cluster'), 0xE7: ('UINT', 'Timecode'), 0x5854: ('MASTER', 'Silent Tracks'), 0x58D7: ('UINT', 'Silent Track Number'), 0xA7: ('UINT', 'Position'), 0xAB: ('UINT', 'Previous Size'), 0xA3: ('BINARY', 'Simple Block'), 0xA0: ('MASTER', 'Block Group'), 0xA1: ('BINARY', 'Block'), 0x75A1: ('MASTER', 'Block Additions'), 0xA6: ('MASTER', 'Block More'), 0xEE: ('UINT', 'Block Add ID'), 0xA5: ('BINARY', 'Block Additional'), 0x9B: ('UINT', 'Block Duration'), 0xFA: ('UINT', 'Reference Priority'), 0xFB: ('SINT', 'Reference Block'), 0xA4: ('BINARY', 'Codec State'), 0x75A2: ('SINT', 'Discard Padding'), 0x8E: ('MASTER', 'Slices'), 0xE8: ('MASTER', 'Time Slice'), 0xCC: ('UINT', 'Lace Number'), 0x1654AE6B: ('MASTER', 'Tracks'), 0xAE: ('MASTER', 'Track Entry'), 0xD7: ('UINT', 'Track Number'), 0x73C5: ('UINT', 'Track UID'), 0x83: ('UINT', 'Track Type'), 0xB9: ('UINT', 'Flag Enabled'), 0x88: ('UINT', 'Flag Default'), 0x55AA: ('UINT', 'Flag Lacing'), 0x6DE7: ('UINT', 'Min Cache'), 0x6DF8: ('UINT', 'Max Cache'), 0x23E383: ('UINT', 'Default Duration'), 0x234E7A: ('UINT', 'Default Decoded Field Duration'), 0x55EE: ('UINT', 'Max Block Addition ID'), 0x536E: ('STRING-8', 'Name'), 0x22B59C: ('STRING', 'Language'), 0x86: ('STRING', 'Codec ID'), 0x63A2: ('BINARY', 'Codec Private'), 0x258688: ('STRING-8', 'Codec Name'), 0x7446: ('UINT', 'Attachment Length'), 0xAA: ('UINT', 'Codec Decode All'), 0x6FAB: ('UINT', 'Track Overlay'), 0x56AA: ('UINT', 'Codec Delay'), 0x56BB: ('UINT', 'Seek Preroll'), 0x6624: ('MASTER', 'Track Translate'), 0x66FC: ('UINT', 'Track Translate Edition UID'), 0x66BF: ('UINT', 'Track Translate Codec'), 0x66A5: ('BINARY', 'Track Translate Track ID'), 0xE0: ('MASTER', 'Video'), 0x9A: ('UINT', 'Interlaced'), 0x9D: ('UINT', 'Field Order'), 0x53B8: ('UINT', 'Stereo Mode'), 0x53C0: ('UINT', 'Alpha Mode'), 0xB0: ('UINT', 'Pixel Height'), 0xBA: ('UINT', 'Pixel Width') } def get_ebml_id(file): byte = file.read(1) dec = ord(byte) if dec & 0b10000000: #1 byte long EBML ID return byte elif dec & 0b01000000: #2 bytes long EBML ID return struct.unpack('>H', byte + file.read(1))[0] elif dec & 0b00100000: #3 bytes long EBML ID return struct.unpack('>L', b'\0' + byte + file.read(2))[0] elif dec & 0b00010000: #4 bytes long EBML ID return struct.unpack('>L', byte + file.read(3))[0] def get_size(file): dec = ord(file.read(1)) if dec & 0b10000000: #1 byte long return dec & 0b01111111 elif dec & 0b01000000: #2 bytes long return struct.unpack('>H', bytes([dec & 0b00111111]) + file.read(1))[0] elif dec & 0b00100000: #3 bytes long return struct.unpack('>L', b'\0' + bytes([dec & 0b00011111]) + file.read(2))[0] elif dec & 0b00010000: #4 bytes long return struct.unpack('>L', bytes([dec & 0b00001111]) + file.read(3))[0] elif dec & 0b00001000: #5 bytes long return struct.unpack('>Q', b'\0\0\0' + bytes([dec & 0b00000111]) + file.read(4))[0] elif dec & 0b00000100: #6 bytes long return struct.unpack('>Q', b'\0\0' + bytes([dec & 0b00000011]) + file.read(5))[0] elif dec & 0b00000010: #7 bytes long return struct.unpack('>Q', b'\0' + bytes([dec & 0b00000001]) + file.read(6))[0] elif dec & 0b00000001: #8 bytes long return struct.unpack('>Q', b'\0' + file.read(7))[0] def read_integer(file, length, signed=False): if length == 1: v = ord(file.read(1)) elif length == 2: v = struct.unpack('>H', file.read(2))[0] elif length == 3: v = struct.unpack('>L', b'\0' + file.read(3))[0] elif length == 4: v = struct.unpack('>L', file.read(4))[0] elif length == 5: v = struct.unpack('>Q', b'\0\0\0' + file.read(5))[0] elif length == 6: v = struct.unpack('>Q', b'\0\0' + file.read(6))[0] elif length == 7: v = struct.unpack('>Q', b'\0' + file.read(7))[0] elif length == 8: v = struct.unpack('>Q', file.read(8))[0] if signed: nbits = (8 - length) + 8 * (length - 1) if v >= (1 << (nbits - 1)): v -= 1 << nbits return v def read_float(file, length): if length == 4: return struct.unpack('>f', file.read(4))[0] elif length == 8: return struct.unpack('>d', file.read(8))[0] def parse(file, _from=0, _to=None): file.seek(_from, 0) while file.tell() < _to: ID = get_ebml_id(file) size = get_size(file) try: _type, _name = matroska_tags[ID] except KeyError: file.seek(size, 1) continue value = 0 if _type == 'SINT': value = read_integer(file, size, True) elif _type == 'UINT': value = read_integer(file, size) elif _type == 'FLOAT': value = read_float(file, size) elif _type == 'STRING-8': value = file.read(size).decode('utf-8') elif _type == 'STRING': value = file.read(size).decode('ascii') elif _type == 'DATE': micro = read_integer(file, size, True) / 1000.0 value = datetime.datetime(2001, 1, 1) + datetime.timedelta(microseconds=micro) elif _type == 'BINARY': value = file.read(size) elif _type == 'MASTER': loc = file.tell() value = parse(file, loc, loc + size) return print(_name + ':', value) with open('D:\\Bent.mkv', 'rb') as f: i = 0 while i < 1024: parse(f, f.tell(), 512) i += 1The part I added today is on lines 159 to 163. It allows the parsing to complete. Without the
except
I get the key error. But I need to figure out how to use the seek position to find the info I want. This is an Matroska EBML parser, by the way.