From 5cc4256a931b98ea167291397421d0db60c5d40c Mon Sep 17 00:00:00 2001 From: pacien Date: Sun, 2 Dec 2018 00:22:56 +0100 Subject: implement lzss block --- src/blocks/lzssblock.nim | 31 +++++-- src/huffman/huffmantree.nim | 10 +-- src/lzss/lzsschain.nim | 17 ++-- src/lzss/lzssencoder.nim | 2 +- src/lzsshuffman/lzsshuffmandecoder.nim | 34 ++++++++ src/lzsshuffman/lzsshuffmanencoder.nim | 34 ++++++++ src/lzsshuffman/lzsshuffmanstats.nim | 32 ++++++++ src/lzsshuffman/lzsshuffmansymbol.nim | 34 ++++++++ tests/tlzss.nim | 9 --- tests/tlzsshuffman.nim | 144 +++++++++++++++++++++++++++++++++ 10 files changed, 315 insertions(+), 32 deletions(-) create mode 100644 src/lzsshuffman/lzsshuffmandecoder.nim create mode 100644 src/lzsshuffman/lzsshuffmanencoder.nim create mode 100644 src/lzsshuffman/lzsshuffmanstats.nim create mode 100644 src/lzsshuffman/lzsshuffmansymbol.nim create mode 100644 tests/tlzsshuffman.nim diff --git a/src/blocks/lzssblock.nim b/src/blocks/lzssblock.nim index f68f665..b23cee2 100644 --- a/src/blocks/lzssblock.nim +++ b/src/blocks/lzssblock.nim @@ -14,19 +14,38 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import ../bitio/bitreader, ../bitio/bitwriter +import lists +import ../bitio/integers, ../bitio/bitreader, ../bitio/bitwriter +import ../lzss/lzsschain, ../lzss/lzssencoder +import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder, ../huffman/huffmandecoder +import ../lzsshuffman/lzsshuffmanstats, ../lzsshuffman/lzsshuffmandecoder, ../lzsshuffman/lzsshuffmanencoder + +const maxDataByteLength = 32_000 type LzssBlock* = object - discard + lzssChain: LzssChain proc readSerialised*(bitReader: BitReader): LzssBlock = - discard + let symbolHuffmanTree = huffmantree.deserialise(bitReader, uint16) + let positionHuffmanTree = huffmantree.deserialise(bitReader, uint16) + let symbolDecoder = symbolHuffmanTree.decoder() + let positionDecoder = positionHuffmanTree.decoder() + LzssBlock(lzssChain: readChain(bitReader, symbolDecoder, positionDecoder, maxDataByteLength)) proc writeSerialisedTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) = - discard + let (symbolStats, positionStats) = aggregateStats(lzssBlock.lzssChain) + let symbolHuffmanTree = buildHuffmanTree(symbolStats) + let positionHuffmanTree = buildHuffmanTree(positionStats) + let symbolEncoder = symbolHuffmanTree.encoder(uint16) + let positionEncoder = positionHuffmanTree.encoder(uint16) + symbolHuffmanTree.serialise(bitWriter) + positionHuffmanTree.serialise(bitWriter) + lzssBlock.lzssChain.writeChain(symbolEncoder, positionEncoder, bitWriter) proc readRaw*(bitReader: BitReader): LzssBlock = - discard + let byteBuf = bitReader.readSeq(maxDataByteLength, uint8) + LzssBlock(lzssChain: lzssEncode(byteBuf.data)) proc writeRawTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) = - discard + let byteSeq = lzssBlock.lzssChain.decode() + bitWriter.writeSeq(byteSeq.len * wordBitLength, byteSeq) diff --git a/src/huffman/huffmantree.nim b/src/huffman/huffmantree.nim index 58a840e..f3fce1b 100644 --- a/src/huffman/huffmantree.nim +++ b/src/huffman/huffmantree.nim @@ -31,6 +31,11 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object of leaf: value*: T +proc maxValue*[T](node: HuffmanTreeNode[T]): T = + case node.kind: + of branch: node.maxChildValue + of leaf: node.value + proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] = HuffmanTreeNode[T]( kind: branch, left: left, right: right, @@ -45,11 +50,6 @@ proc `==`*[T](a, b: HuffmanTreeNode[T]): bool = of branch: a.left == b.left and a.right == b.right of leaf: a.value == b.value -proc maxValue*[T](node: HuffmanTreeNode[T]): T = - case node.kind: - of branch: node.maxChildValue - of leaf: node.value - proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] = let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int proc readNode(): HuffmanTreeNode[T] = diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim index 8b49914..8ebcb1a 100644 --- a/src/lzss/lzsschain.nim +++ b/src/lzss/lzsschain.nim @@ -15,7 +15,7 @@ # along with this program. If not, see . import lists, tables, sugar -import ../bitio/integers, ../huffman/huffmantree +import ../bitio/integers import listpolyfill, lzssnode const maxChainByteLength = 32_000 * wordBitLength @@ -26,6 +26,11 @@ type LzssChain* = proc lzssChain*(): LzssChain = initSinglyLinkedList[LzssNode]() +proc lzssChain*(chainArray: openArray[LzssNode]): LzssChain = + var chain = lzssChain() + for node in chainArray: chain.append(node) + chain + proc decode*(lzssChain: LzssChain): seq[uint8] = result = newSeqOfCap[uint8](maxChainByteLength) for node in lzssChain.items: @@ -35,13 +40,3 @@ proc decode*(lzssChain: LzssChain): seq[uint8] = of reference: let absolutePos = result.len - node.relativePos result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) - -proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] = - result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]()) - for node in lzssChain.items: - case node.kind: - of character: - result.characters.inc(node.character) - of reference: - result.lengths.inc(node.length) - result.positions.inc(node.relativePos) diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim index 8b750fb..82fbe7b 100644 --- a/src/lzss/lzssencoder.nim +++ b/src/lzss/lzssencoder.nim @@ -17,7 +17,7 @@ import lists import listpolyfill, matchtable, lzssnode, lzsschain -const matchGroupLength = 3 +const matchGroupLength* = 3 const maxRefByteLength = high(uint8).int + matchGroupLength let emptySinglyLinkedList = initSinglyLinkedList[int]() diff --git a/src/lzsshuffman/lzsshuffmandecoder.nim b/src/lzsshuffman/lzsshuffmandecoder.nim new file mode 100644 index 0000000..cd71914 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmandecoder.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../bitio/bitreader +import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain +import ../huffman/huffmantree, ../huffman/huffmandecoder +import lzsshuffmansymbol + +proc readChain*(bitReader: BitReader, symbolDecoder, positionDecoder: HuffmanDecoder[uint16], maxDataByteLength: int): LzssChain = + var chain = lzssChain() + var (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, 0) + while not symbol.isEndMarker(): + if byteCursor > maxDataByteLength: raise newException(IOError, "lzss block too long") + if symbol.isCharacter(): + chain.append(lzssCharacter(symbol.uint8)) + else: + let position = positionDecoder.decode(bitReader) + chain.append(unpackLzssReference(symbol, position)) + (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, byteCursor + 1) + chain diff --git a/src/lzsshuffman/lzsshuffmanencoder.nim b/src/lzsshuffman/lzsshuffmanencoder.nim new file mode 100644 index 0000000..ea89f85 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmanencoder.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../bitio/bitwriter +import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain, ../lzss/lzssencoder +import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder +import lzsshuffmansymbol + +proc writeSymbol(bitWriter: BitWriter, encodedSymbol: tuple[bitLength: int, value: uint16]) = + bitWriter.writeBits(encodedSymbol.bitLength, encodedSymbol.value) + +proc writeChain*(lzssChain: LzssChain, symbolEncoder, positionEncoder: HuffmanEncoder[uint16, uint16], bitWriter: BitWriter) = + for node in lzssChain.items: + case node.kind: + of character: + bitWriter.writeSymbol(symbolEncoder.encode(node.character)) + of reference: + bitWriter.writeSymbol(symbolEncoder.encode(shiftLzssLength(node.length))) + bitWriter.writeSymbol(positionEncoder.encode(node.relativePos.uint16)) + bitWriter.writeSymbol(symbolEncoder.encode(endSymbol)) diff --git a/src/lzsshuffman/lzsshuffmanstats.nim b/src/lzsshuffman/lzsshuffmanstats.nim new file mode 100644 index 0000000..037ce5f --- /dev/null +++ b/src/lzsshuffman/lzsshuffmanstats.nim @@ -0,0 +1,32 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tables, lists +import ../lzss/lzssnode, ../lzss/lzsschain +import lzsshuffmansymbol + +proc aggregateStats*(chain: LzssChain): tuple[symbolTable, positionTable: CountTableRef[uint16]] = + var (symbolTable, positionTable) = (newCountTable[uint16](), newCountTable[uint16]()) + for node in chain.items: + case node.kind: + of character: + symbolTable.inc(node.character) + of reference: + symbolTable.inc(shiftLzssLength(node.length)) + positionTable.inc(node.relativePos.uint16) + symbolTable.inc(endSymbol) + if positionTable.len < 1: positionTable.inc(0) # ensure non-empty tree + (symbolTable, positionTable) diff --git a/src/lzsshuffman/lzsshuffmansymbol.nim b/src/lzsshuffman/lzsshuffmansymbol.nim new file mode 100644 index 0000000..cfd3fe4 --- /dev/null +++ b/src/lzsshuffman/lzsshuffmansymbol.nim @@ -0,0 +1,34 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import lists +import ../lzss/lzssnode, ../lzss/lzssencoder + +type Symbol* = uint16 + +const endSymbol* = 256.Symbol + +proc isEndMarker*(symbol: Symbol): bool = + symbol == endSymbol + +proc isCharacter*(symbol: Symbol): bool = + symbol < endSymbol + +proc unpackLzssReference*(symbol: Symbol, position: uint16): LzssNode = + lzssReference(symbol.int - endSymbol.int - 1 + matchGroupLength, position.int) + +proc shiftLzssLength*(length: int): uint16 = + (length + endSymbol.int + 1 - matchGroupLength).uint16 diff --git a/tests/tlzss.nim b/tests/tlzss.nim index 7325d5b..b6b3c51 100644 --- a/tests/tlzss.nim +++ b/tests/tlzss.nim @@ -66,15 +66,6 @@ suite "lzsschain": test "decode": check chain().decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5] - test "stats": - let stats = chain().stats() - check stats.characters == newCountTable(concat( - repeat(0'u8, 2), repeat(1'u8, 2), repeat(2'u8, 1), repeat(3'u8, 1), repeat(4'u8, 1), repeat(5'u8, 3))) - check stats.lengths == newCountTable(concat( - repeat(3, 2), repeat(4, 1))) - check stats.positions == newCountTable(concat( - repeat(3, 1), repeat(6, 1), repeat(8, 1))) - suite "lzssencoder": test "commonPrefixLength": check commonPrefixLength([], [], 0, 10) == 0 diff --git a/tests/tlzsshuffman.nim b/tests/tlzsshuffman.nim new file mode 100644 index 0000000..f771f31 --- /dev/null +++ b/tests/tlzsshuffman.nim @@ -0,0 +1,144 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import unittest, tables, lists, sequtils, streams +import bitio/bitwriter, bitio/bitreader +import lzss/listpolyfill, lzss/lzssnode, lzss/lzsschain +import huffman/huffmantree, huffman/huffmantreebuilder, huffman/huffmanencoder, huffman/huffmandecoder +import lzsshuffman/lzsshuffmansymbol, lzsshuffman/lzsshuffmanstats, lzsshuffman/lzsshuffmanencoder, lzsshuffman/lzsshuffmandecoder + +suite "lzsshuffmansymbol": + test "isEndMarker": + check 'a'.Symbol.isEndMarker() == false + check endSymbol.isEndMarker() + + test "isCharacter": + check 'a'.Symbol.isCharacter() + check endSymbol.isCharacter() == false + check 300.Symbol.isCharacter() == false + + test "unpackLzssReference": + check unpackLzssReference(257.Symbol, 10) == lzssReference(3, 10) + check unpackLzssReference(300.Symbol, 10) == lzssReference(46, 10) + + test "shiftLzssLength": + check shiftLzssLength(3) == 257'u16 + check shiftLzssLength(10) == 264'u16 + +suite "lzsshuffmanstats": + test "aggretateStats": + let chain = lzssChain([ + lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), + lzssCharacter(3), lzssCharacter(4), lzssCharacter(5), + lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1), + lzssReference(3, 8), lzssCharacter(5), + lzssReference(3, 3), lzssCharacter(5)]) + let (symbolTable, positionTable) = chain.aggregateStats() + check symbolTable == newCountTable(concat( + repeat(0'u16, 2), repeat(1'u16, 2), repeat(2'u16, 1), repeat(3'u16, 1), repeat(4'u16, 1), repeat(5'u16, 3), + repeat(endSymbol.uint16, 1), + repeat(257'u16, 2), repeat(258'u16, 1))) + check positionTable == newCountTable(concat( + repeat(3'u16, 1), repeat(6'u16, 1), repeat(8'u16, 1))) + +suite "lzsshuffmanencoder": + test "writeChain (empty)": + let chain = lzssChain(newSeq[LzssNode]()) + let symbolTree = huffmanLeaf(endSymbol.uint16) + let positionTree = huffmanLeaf(0'u16) + let stream = newStringStream() + defer: stream.close() + let bitWriter = stream.bitWriter() + writeChain(chain, symbolTree.encoder(uint16), positionTree.encoder(uint16), bitWriter) + bitWriter.flush() + stream.setPosition(0) + check stream.atEnd() + + test "writeChain (minimal)": + let chain = lzssChain([ + lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), + lzssReference(3, 3), lzssReference(3, 4)]) + let symbolTree = huffmanBranch( + huffmanBranch( + huffmanLeaf(0'u16), + huffmanLeaf(1'u16)), + huffmanBranch( + huffmanLeaf(257'u16), + huffmanBranch( + huffmanLeaf(2'u16), + huffmanLeaf(256'u16)))) + let positionTree = huffmanBranch( + huffmanLeaf(3'u16), + huffmanLeaf(4'u16)) + let stream = newStringStream() + defer: stream.close() + let bitWriter = stream.bitWriter() + writeChain(chain, symbolTree.encoder(uint16), positionTree.encoder(uint16), bitWriter) + bitWriter.flush() + stream.setPosition(0) + let bitReader = stream.bitReader() + check bitReader.readBits(2, uint8) == 0b00'u8 # char 0 + check bitReader.readBits(2, uint8) == 0b10'u8 # char 1 + check bitReader.readBits(3, uint8) == 0b011'u8 # char 2 + check bitReader.readBits(2, uint8) == 0b01'u8 # ref len 3 + check bitReader.readBits(1, uint8) == 0b0'u8 # ref pos 3 + check bitReader.readBits(2, uint8) == 0b01'u8 # ref len 3 + check bitReader.readBits(1, uint8) == 0b1'u8 # ref pos 4 + check bitReader.readBits(3, uint8) == 0b111'u8 # eof + +suite "lzsshuffmandecoder": + test "readChain (empty)": + let symbolTree = huffmanLeaf(endSymbol.uint16) + let positionTree = huffmanLeaf(0'u16) + let stream = newStringStream() + defer: stream.close() + stream.write(0'u8) # eof + stream.setPosition(0) + let bitReader = stream.bitReader() + let result = readChain(bitReader, symbolTree.decoder(), positionTree.decoder(), 32_000) + check toSeq(result.items).len == 0 + + test "readChain (minimal)": + let symbolTree = huffmanBranch( + huffmanBranch( + huffmanLeaf(0'u16), + huffmanLeaf(1'u16)), + huffmanBranch( + huffmanLeaf(257'u16), + huffmanBranch( + huffmanLeaf(2'u16), + huffmanLeaf(256'u16)))) + let positionTree = huffmanBranch( + huffmanLeaf(3'u16), + huffmanLeaf(4'u16)) + let stream = newStringStream() + defer: stream.close() + let bitWriter = stream.bitWriter() + bitWriter.writeBits(2, 0b00'u8) + bitWriter.writeBits(2, 0b10'u8) + bitWriter.writeBits(3, 0b011'u8) + bitWriter.writeBits(2, 0b01'u8) + bitWriter.writeBits(1, 0b0'u8) + bitWriter.writeBits(2, 0b01'u8) + bitWriter.writeBits(1, 0b1'u8) + bitWriter.writeBits(3, 0b111'u8) + bitWriter.flush() + stream.setPosition(0) + let bitReader = stream.bitReader() + let result = readChain(bitReader, symbolTree.decoder(), positionTree.decoder(), 32_000) + check toSeq(result.items) == [ + lzssCharacter(0), lzssCharacter(1), lzssCharacter(2), + lzssReference(3, 3), lzssReference(3, 4)] -- cgit v1.2.3