Odd. I'm confused. I'll post code but no one is going to want to figure out the error. The code is borrowed from something on GitHub (mostly) and I've been away from it for a while. So it's not fresh in my head. Anywho, here it is:
import struct, datetime
matroska_tags = {
0x1A45DFA3: ('MASTER', 'EBML'),
0x4286: ('UINT', 'EBML Version'),
0x42F7: ('UINT', 'EBML Read Version'),
0x42F2: ('UINT', 'EBML Max ID Length'),
0x42F3: ('UINT', 'EBML Max Size Length'),
0x4282: ('STRING', 'Doc Type'),
0x4287: ('UINT', 'Doc Type Version'),
0x4285: ('UINT', 'Doc Type Read Version'),
0xEC: ('BINARY', 'Void Data'),
0xBF: ('BINARY', 'CRC-32'),
0x18538067: ('MASTER', 'Segment'),
0x114D9B74: ('MASTER', 'Seek Head'),
0x4DBB: ('MASTER', 'Seek'),
0x53AB: ('BINARY', 'Seek ID'),
0x53AC: ('UINT', 'Seek Position'),
0x1549A966: ('MASTER', 'Information'),
0x73A4: ('BINARY', 'Segment UID'),
0x7384: ('STRING-8', 'Segment Filename'),
0x3CB923: ('BINARY', 'Previous UID'),
0x3C83AB: ('STRING-8', 'Previous Filename'),
0x3EB923: ('BINARY', 'Next UID'),
0x3E83BB: ('STRING-8', 'Next Filename'),
0x4444: ('BINARY', 'Segment Family'),
0x6924: ('MASTER', 'Chapter Translate'),
0x69FC: ('UINT', 'Chapter Translate Edition UID'),
0x69BF: ('UINT', 'Chapter Translate Codec'),
0x69A5: ('BINARY', 'Chapter Translate ID'),
0x2AD7B1: ('UINT', 'Timecode Scale'),
0x4489: ('FLOAT', 'Duration'),
0x4461: ('DATE', 'Date UTC'),
0x7BA9: ('STRING-8', 'Title'),
0x4D80: ('STRING-8', 'Muxing App'),
0x5741: ('STRING-8', 'Writing App'),
0x1F43B675: ('MASTER', 'Cluster'),
0xE7: ('UINT', 'Timecode'),
0x5854: ('MASTER', 'Silent Tracks'),
0x58D7: ('UINT', 'Silent Track Number'),
0xA7: ('UINT', 'Position'),
0xAB: ('UINT', 'Previous Size'),
0xA3: ('BINARY', 'Simple Block'),
0xA0: ('MASTER', 'Block Group'),
0xA1: ('BINARY', 'Block'),
0x75A1: ('MASTER', 'Block Additions'),
0xA6: ('MASTER', 'Block More'),
0xEE: ('UINT', 'Block Add ID'),
0xA5: ('BINARY', 'Block Additional'),
0x9B: ('UINT', 'Block Duration'),
0xFA: ('UINT', 'Reference Priority'),
0xFB: ('SINT', 'Reference Block'),
0xA4: ('BINARY', 'Codec State'),
0x75A2: ('SINT', 'Discard Padding'),
0x8E: ('MASTER', 'Slices'),
0xE8: ('MASTER', 'Time Slice'),
0xCC: ('UINT', 'Lace Number'),
0x1654AE6B: ('MASTER', 'Tracks'),
0xAE: ('MASTER', 'Track Entry'),
0xD7: ('UINT', 'Track Number'),
0x73C5: ('UINT', 'Track UID'),
0x83: ('UINT', 'Track Type'),
0xB9: ('UINT', 'Flag Enabled'),
0x88: ('UINT', 'Flag Default'),
0x55AA: ('UINT', 'Flag Lacing'),
0x6DE7: ('UINT', 'Min Cache'),
0x6DF8: ('UINT', 'Max Cache'),
0x23E383: ('UINT', 'Default Duration'),
0x234E7A: ('UINT', 'Default Decoded Field Duration'),
0x55EE: ('UINT', 'Max Block Addition ID'),
0x536E: ('STRING-8', 'Name'),
0x22B59C: ('STRING', 'Language'),
0x86: ('STRING', 'Codec ID'),
0x63A2: ('BINARY', 'Codec Private'),
0x258688: ('STRING-8', 'Codec Name'),
0x7446: ('UINT', 'Attachment Length'),
0xAA: ('UINT', 'Codec Decode All'),
0x6FAB: ('UINT', 'Track Overlay'),
0x56AA: ('UINT', 'Codec Delay'),
0x56BB: ('UINT', 'Seek Preroll'),
0x6624: ('MASTER', 'Track Translate'),
0x66FC: ('UINT', 'Track Translate Edition UID'),
0x66BF: ('UINT', 'Track Translate Codec'),
0x66A5: ('BINARY', 'Track Translate Track ID'),
0xE0: ('MASTER', 'Video'),
0x9A: ('UINT', 'Interlaced'),
0x9D: ('UINT', 'Field Order'),
0x53B8: ('UINT', 'Stereo Mode'),
0x53C0: ('UINT', 'Alpha Mode'),
0xB0: ('UINT', 'Pixel Height'),
0xBA: ('UINT', 'Pixel Width')
}
def get_ebml_id(file):
byte = file.read(1)
dec = ord(byte)
if dec & 0b10000000: #1 byte long EBML ID
return byte
elif dec & 0b01000000: #2 bytes long EBML ID
return struct.unpack('>H', byte + file.read(1))[0]
elif dec & 0b00100000: #3 bytes long EBML ID
return struct.unpack('>L', b'\0' + byte + file.read(2))[0]
elif dec & 0b00010000: #4 bytes long EBML ID
return struct.unpack('>L', byte + file.read(3))[0]
def get_size(file):
dec = ord(file.read(1))
if dec & 0b10000000: #1 byte long
return dec & 0b01111111
elif dec & 0b01000000: #2 bytes long
return struct.unpack('>H', bytes([dec & 0b00111111]) + file.read(1))[0]
elif dec & 0b00100000: #3 bytes long
return struct.unpack('>L', b'\0' + bytes([dec & 0b00011111]) + file.read(2))[0]
elif dec & 0b00010000: #4 bytes long
return struct.unpack('>L', bytes([dec & 0b00001111]) + file.read(3))[0]
elif dec & 0b00001000: #5 bytes long
return struct.unpack('>Q', b'\0\0\0' + bytes([dec & 0b00000111]) + file.read(4))[0]
elif dec & 0b00000100: #6 bytes long
return struct.unpack('>Q', b'\0\0' + bytes([dec & 0b00000011]) + file.read(5))[0]
elif dec & 0b00000010: #7 bytes long
return struct.unpack('>Q', b'\0' + bytes([dec & 0b00000001]) + file.read(6))[0]
elif dec & 0b00000001: #8 bytes long
return struct.unpack('>Q', b'\0' + file.read(7))[0]
def read_integer(file, length, signed=False):
if length == 1:
v = ord(file.read(1))
elif length == 2:
v = struct.unpack('>H', file.read(2))[0]
elif length == 3:
v = struct.unpack('>L', b'\0' + file.read(3))[0]
elif length == 4:
v = struct.unpack('>L', file.read(4))[0]
elif length == 5:
v = struct.unpack('>Q', b'\0\0\0' + file.read(5))[0]
elif length == 6:
v = struct.unpack('>Q', b'\0\0' + file.read(6))[0]
elif length == 7:
v = struct.unpack('>Q', b'\0' + file.read(7))[0]
elif length == 8:
v = struct.unpack('>Q', file.read(8))[0]
if signed:
nbits = (8 - length) + 8 * (length - 1)
if v >= (1 << (nbits - 1)):
v -= 1 << nbits
return v
def read_float(file, length):
if length == 4:
return struct.unpack('>f', file.read(4))[0]
elif length == 8:
return struct.unpack('>d', file.read(8))[0]
def parse(file, _from=0, _to=None):
file.seek(_from, 0)
while file.tell() < _to:
ID = get_ebml_id(file)
size = get_size(file)
try:
_type, _name = matroska_tags[ID]
except KeyError:
file.seek(size, 1)
continue
value = 0
if _type == 'SINT':
value = read_integer(file, size, True)
elif _type == 'UINT':
value = read_integer(file, size)
elif _type == 'FLOAT':
value = read_float(file, size)
elif _type == 'STRING-8':
value = file.read(size).decode('utf-8')
elif _type == 'STRING':
value = file.read(size).decode('ascii')
elif _type == 'DATE':
micro = read_integer(file, size, True) / 1000.0
value = datetime.datetime(2001, 1, 1) + datetime.timedelta(microseconds=micro)
elif _type == 'BINARY':
value = file.read(size)
elif _type == 'MASTER':
loc = file.tell()
value = parse(file, loc, loc + size)
return print(_name + ':', value)
with open('D:\\Bent.mkv', 'rb') as f:
i = 0
while i < 1024:
parse(f, f.tell(), 512)
i += 1
The part I added today is on lines 159 to 163. It allows the parsing to complete. Without the
except
I get the key error. But I need to figure out how to use the seek position to find the info I want. This is an Matroska EBML parser, by the way.