Python Forum
JS Buffer.from VS struct.pack
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
JS Buffer.from VS struct.pack
#1
I'm trying to port over a little of bit of a JS library so I can work with it in Python. It allows for you to create Celeste (the game) maps through code. Celeste maps are saved as binary files therefore the JS library uses Buffer.from. The only parts of the library I have been converting are the parts that are required to write a map, not reading from one. The code for this is in bin.js (the "logic") and BinaryWriter.js (creates the buffers and filestream)

My python implementation of the binary writer uses struct.pack instead:
class Writer():
    def __init__(self, name):
        self.file = open(name, "wb")

    def write_String(self, data):
        self.write_var_length(len(data))
        self.file.write(data.encode('utf-8'))

    def write_var_length(self, length):
        b = []

        while (length > 127):
            b.append(length & 127 | 0b10000000)
            length = math.floor(length / 128)

        b.append(length)
        self.file.write(bytes(b))

    def write_UInt8(self, data):
        d = struct.pack("<B", data)
        self.file.write(d)

    def write_Uint16(self, data):
        d = struct.pack("<H", data)
        self.file.write(d)

    def write_Int16(self, data):
        d = struct.pack("<h", data)
        self.file.write(d)

    def write_Int32(self, data):
        d = struct.pack("<i", data)
        self.file.write(d)

    def write_Float(self, data):
        d = struct.pack("<f", data)
        self.file.write(d)

    def write(self, data, type="string"):
        if(type == "string"):
            self.write_String(data)
        elif(type == "uint8"):
            self.write_UInt8(data)
        elif(type == "uint16"):
            self.write_Uint16(data)
        elif(type == "int16"):
            self.write_Int16(data)
        elif(type == "int32"):
            self.write_Int32(data)
        elif(type == "float"):
            self.write_Float(data)
        else:
            self.file.write(data)

    def close(self):
        self.file.close()
(I've tried both little endian and big endian and found that little endian produced a closer result to what I needed)

And this is the python version of bin.js:
def get_attribute_names(element):
    attr = {}
    for key, value in element.items():
        if (key.find('_') != 0 and value != None):
            attr[key] = value

    return attr


class Encoder():
    ranges = [
        {"type": 'uint8', "range": [0, 255]},
        {"type": 'int16', "range": [-32768, 32767]},
        {"type": 'int32', "range": [-2147483648, 2147483647]}
    ]

    def __init__(self, writer):
        self.f = writer

    def populate_encode_key_names(self, d, seen):
        name = d["__name"]

        try:
            seen[name] = seen[name] + 1
        except KeyError:
            seen[name] = 1

        try:
            children = d["__children"]
        except KeyError:
            children = []

        for key, value in d.items():
            if (key.find('__') != 0):
                try:
                    seen[key] = seen[key] + 1
                except KeyError:
                    seen[key] = 1

            if (isinstance(value, str) and key != 'innerText'):
                try:
                    seen[value] = seen[value] + 1
                except KeyError:
                    seen[value] = 1

        for child in children:
            self.populate_encode_key_names(child, seen)

    def encode_element(self, element, lookup):
        if (isinstance(element, list)):
            for el in element:
                self.encode_element(el, lookup)
        else:
            attrs = get_attribute_names(element)

            try:
                children = element["__children"]
            except KeyError:
                children = []

            try:
                self.f.write(lookup[element["__name"]], "uint16")
            except KeyError:
                self.f.write(0, "uint16")

            self.f.write(len(attrs.keys()), "uint8")

            for key, value in attrs.items():
                try:
                    self.f.write(lookup[key], "uint16")
                except KeyError:
                    self.f.write(0, "uint16")

                self.encode_value(key, value, lookup)

            self.f.write(len(children), "uint16")
            self.encode_element(children, lookup)

    def encode_value(self, attr, value, lookup):
        if(isinstance(value, float)):
            self.f.write(4, "uint8")
            self.f.write(value, "float")

        elif(isinstance(value, int) and not isinstance(value, bool)):
            for i in range(0, len(Encoder.ranges)):
                type = Encoder.ranges[i]["type"]
                min, max = Encoder.ranges[i]["range"]

                if(value >= min and value <= max):
                    self.f.write(i + 1, "uint8")
                    self.f.write(value, type)

        elif(isinstance(value, bool)):
            self.f.write(0, "uint8")
            self.f.write(1 if value else 0, "uint8")

        elif(isinstance(value, str)):
            try:
                index = lookup[value]
            except KeyError:
                index = 0

            if(index == 0):
                encoded_value = self.encode_run_length(value)
                encoded_length = len(encoded_value)

                if(encoded_length < len(value) and encoded_length <= Encoder.ranges[1]["range"][1]):
                    self.f.write(7, "uint8")
                    self.f.write(encoded_length, "uint16")
                    self.f.write(encoded_value, "plain")
                else:
                    self.f.write(6, "uint8")
                    self.f.write(value, "string")
            else:
                self.f.write(5, "uint8")
                self.f.write(index, "uint16")

    def encode_run_length(self, string):
        count = 0
        res = []
        current = ord(string[0])
        chars = [ord(c[0]) for c in list(string)]

        for char in chars:
            if (char != current or count == 255):
                res.append(count)
                res.append(current)
                count = 1
                current = char
            else:
                count += 1

        res.append(count)
        res.append(current)

        return bytes(res)

class CelesteMap():
	def __init__(self, file_name="./custom_map.bin"):
		self.header = "CELESTE MAP"

		self.f = Writer(file_name)
		self.e = Encoder(self.f)

		self.f.write(self.header)

	def write_file(self, data=None):
		if(data is None): #or not isinstance(data, World)):
			raise Exception("Data cannot be None!")

		seen = {}
		#data = data.to_formatted_data()

		self.e.populate_encode_key_names(data, seen)

		lookup = list(seen.keys())
		lookup_dict = {k: i for (i, k) in enumerate(lookup)}

		self.f.write(data["_package"], "string")
		self.f.write(len(lookup), "uint16")

		[self.f.write(l, "string") for l in lookup]
		self.e.encode_element(data, lookup_dict)

		self.close()

	def close(self):
		self.f.close()
(CelesteMap.write_file is equivalent to encode in bin.js)
The map data I have been testing with is:
{'_package': 'test', '__name': 'Map', '__children': [{'__name': 'levels', '__children': [{'_size': [40, 23], 'name': 'room_0', 'musicLayer1': False, 'musicLayer2': False, 'musicLayer3': False, 'musicLayer4': False, 'musicProgress': '', 'ambienceProgress': '', 'dark': False, 'space': False, 'underwater': False, 'whisper': False, 'disableDownTransition': False, 'delayAltMusicFade': False, 'music': 'music_oldsite_awake', 'altMusic': '', 'windPattern': 'None', 'cameraOffsetX': 0, 'cameraOffsetY': 0, '__name': 'level', 'x': 0, 'y': 0, 'c': 0, 'width': 40, 'height': 23, '__children': [{'__name': 'solids', 'innerText': 'dd\ndd'}, {'__name': 'bg', 'innerText': ''}, {'__name': 'objtiles', 'innerText': ''}, {'__name': 'fgtiles', 'tileset': 'Scenery'}, {'__name': 'bgtiles', 'tileset': 'Scenery'}, {'__name': 'entities', '__children': []}, {'__name': 'triggers', '__children': []}, {'__name': 'fgdecals', 'tileset': 'Scenery', '__children': []}, {'__name': 'bgdecals', 'tileset': 'Scenery', '__children': []}]}]}, {'__name': 'Style', '__children': [{'__name': 'Foregrounds', '__children': []}, {'__name': 'Backgrounds', '__children': []}]}, {'__name': 'Filler', '__children': []}]}
(this is the already formatted data which is why I have commented out data = data.to_formatted_data()

The output bin files are almost identical with a few minor differences which means I unfortunately can't load the map. From write_file I have printed parameters, values of for loops, if statements, etc, for every function call to make sure the values at that line are exactly the value in the JS library. I don't think there is any line in the python script that will not have the same values as the respective line in the JS script.
That's why I think the problem is something to do with a difference in struct.pack versus Buffer.from but I don't use either enough to know what that difference is.

I created a pastebin of the output of both scripts (JS top, python bottom).
If anyone could help out I would really appreciate it!
Reply
#2
Why are you testing with a map? Test by writing an unsigned int or a string. I would write a test for each data type and compare the resulting files.
Reply
#3
(Apr-05-2021, 03:14 PM)deanhystad Wrote: Why are you testing with a map? Test by writing an unsigned int or a string. I would write a test for each data type and compare the resulting files.

Just tested both versions with the same inputs and (unfortunately?) got the same output file from both.

I have seen that write_UInt8 is called about 15 times more that it should. I found this issue before but I didn't really figure out why, for some reason, so that's what I'm looking at now. It may well be the same for some other functions too.
Reply
#4
Ok, I think I fixed it. I missed a break in a for loop so it was calling the write function more times that it needed to.
The map now actually loads in the map editor. which is good. It doesn't look how I expected it to but I don't think its an issue with the code.

Edit: Found the reason. I deleted a bit of code that multiplied the room size by 8 because I didn't know why it was doing that. Turns out it's because the game "builds" in pixels, not blocks, and a block is 8x8 pixels so to make a room 10x10 it needs to actually be 80x80.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Python Struct Question, decimal 10, \n and \x0a. 3python 2 648 Aug-11-2023, 09:29 AM
Last Post: 3python
  Read buffer from bluetooth frohr 2 2,077 Jun-01-2022, 01:31 PM
Last Post: frohr
  Seperate output buffer matt_the_hall 2 2,314 Mar-15-2021, 08:44 PM
Last Post: matt_the_hall
  PyAudio Buffer Data Calculation MclarenF1 0 2,100 Aug-21-2020, 10:55 AM
Last Post: MclarenF1
  struct.unpack failed Roro 2 3,278 Jun-13-2020, 05:28 PM
Last Post: DreamingInsanity
  Pack integer values as single bytes in a struct bhdschmidt 3 2,275 Jun-09-2020, 09:23 PM
Last Post: bhdschmidt
  struct.decode() and '\0' deanhystad 1 3,142 Apr-09-2020, 04:13 PM
Last Post: TomToad
  Additional buffer for Windows constantin01 0 1,362 Mar-31-2020, 10:24 AM
Last Post: constantin01
  Getting MemoryError frames.write(buffer) kunwarsingh 0 1,568 Feb-10-2020, 09:39 PM
Last Post: kunwarsingh
  How to remove empty struct from matlab file in python? python_newbie09 0 2,353 Jun-25-2019, 12:13 PM
Last Post: python_newbie09

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020