Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
LZS Decompression
#1
I have a large amount of data that was compressed using LZS compression. I have written and combined existing code to decompress this data. The decompression requires that the data is read at the bit level and then recombined into bytes to be written to file. My current code is way too slow. Can anyone suggest a faster way of decompressing LZS compressed data?

#!/usr/bin/env python
#-*- coding:utf-8 -*-

##############################################################
# Lempel-Ziv-Stac decompression
# BitReader and RingList classes
# 
# Copyright (C) 2011  Filippo Valsorda - FiloSottile
# filosottile.wiki gmail.com - www.pytux.it
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# 
##############################################################

import collections

class BitReader:
    """
    Gets a string or a iterable of chars (also mmap)
    representing bytes (ord) and permits to extract
    bits one by one like a stream
    """
    def __init__(self, bytes):
        self._bits = collections.deque()
        
        for byte in bytes:
            #byte = ord(byte)
            for n in range(8):
                self._bits.append(bool((byte >> (7-n)) & 1))
            
    def getBit(self):
        return self._bits.popleft()
        
    def getBits(self, num):
        res = 0
        for i in range(num):
            res += self.getBit() << num-1-i
        return res
        
    def getByte(self):
        return self.getBits(8)
        
    def __len__(self):
        return len(self._bits)
        
class RingList:
    """
    When the list is full, for every item appended
    the older is removed
    """
    def __init__(self, length):
        self.__data__ = collections.deque()
        self.__full__ = False
        self.__max__ = length

    def append(self, x):
        if self.__full__:
            self.__data__.popleft()
        self.__data__.append(x)
        if self.size() == self.__max__:
            self.__full__ = True

    def get(self):
        return self.__data__

    def size(self):
        return len(self.__data__)

    def maxsize(self):
        return self.__max__
        
    def __getitem__(self, n):
        if n >= self.size():
            return None
        
        return self.__data__[n]
        
def LZSDecompress(data, window = RingList(2048)):
    """
    Gets a string or a iterable of chars (also mmap)
    representing bytes (ord) and an optional
    pre-populated dictionary; return the decompressed
    string and the final dictionary
    """

    reader = BitReader(data)
    #result = ''
    result=[]
    
    while True:
        bit = reader.getBit()
        
        if not bit:
            char = reader.getByte()
            #result += chr(char)
            result.append(char)
            window.append(char)
        else:
            bit = reader.getBit()
            if bit:
                offset = reader.getBits(7)
                if offset == 0:
                    # EOF
                    break
            else:
                offset = reader.getBits(11)
            
            lenField = reader.getBits(2) + 2
            if lenField > 4:
                lenField2 = reader.getBits(2)
                lenField += lenField2
                if lenField > 7:
                    while True:
                        lenField3 = reader.getBits(4)
                        lenField += lenField3
                        if lenField3 < 15:
                            break
            length = lenField
            
            for i in range(length):
                char = window[-offset]
                #result += chr(char)
                result.append(char)
                window.append(char)
    
    return result#, window
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Compression/Decompression Code not working Zanbezi 3 3,653 May-10-2017, 04:46 PM
Last Post: nilamo

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020