From 5cc4256a931b98ea167291397421d0db60c5d40c Mon Sep 17 00:00:00 2001
From: pacien
Date: Sun, 2 Dec 2018 00:22:56 +0100
Subject: implement lzss block
---
src/blocks/lzssblock.nim | 31 +++++--
src/huffman/huffmantree.nim | 10 +--
src/lzss/lzsschain.nim | 17 ++--
src/lzss/lzssencoder.nim | 2 +-
src/lzsshuffman/lzsshuffmandecoder.nim | 34 ++++++++
src/lzsshuffman/lzsshuffmanencoder.nim | 34 ++++++++
src/lzsshuffman/lzsshuffmanstats.nim | 32 ++++++++
src/lzsshuffman/lzsshuffmansymbol.nim | 34 ++++++++
tests/tlzss.nim | 9 ---
tests/tlzsshuffman.nim | 144 +++++++++++++++++++++++++++++++++
10 files changed, 315 insertions(+), 32 deletions(-)
create mode 100644 src/lzsshuffman/lzsshuffmandecoder.nim
create mode 100644 src/lzsshuffman/lzsshuffmanencoder.nim
create mode 100644 src/lzsshuffman/lzsshuffmanstats.nim
create mode 100644 src/lzsshuffman/lzsshuffmansymbol.nim
create mode 100644 tests/tlzsshuffman.nim
diff --git a/src/blocks/lzssblock.nim b/src/blocks/lzssblock.nim
index f68f665..b23cee2 100644
--- a/src/blocks/lzssblock.nim
+++ b/src/blocks/lzssblock.nim
@@ -14,19 +14,38 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import ../bitio/bitreader, ../bitio/bitwriter
+import lists
+import ../bitio/integers, ../bitio/bitreader, ../bitio/bitwriter
+import ../lzss/lzsschain, ../lzss/lzssencoder
+import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder, ../huffman/huffmandecoder
+import ../lzsshuffman/lzsshuffmanstats, ../lzsshuffman/lzsshuffmandecoder, ../lzsshuffman/lzsshuffmanencoder
+
+const maxDataByteLength = 32_000
type LzssBlock* = object
- discard
+ lzssChain: LzssChain
proc readSerialised*(bitReader: BitReader): LzssBlock =
- discard
+ let symbolHuffmanTree = huffmantree.deserialise(bitReader, uint16)
+ let positionHuffmanTree = huffmantree.deserialise(bitReader, uint16)
+ let symbolDecoder = symbolHuffmanTree.decoder()
+ let positionDecoder = positionHuffmanTree.decoder()
+ LzssBlock(lzssChain: readChain(bitReader, symbolDecoder, positionDecoder, maxDataByteLength))
proc writeSerialisedTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) =
- discard
+ let (symbolStats, positionStats) = aggregateStats(lzssBlock.lzssChain)
+ let symbolHuffmanTree = buildHuffmanTree(symbolStats)
+ let positionHuffmanTree = buildHuffmanTree(positionStats)
+ let symbolEncoder = symbolHuffmanTree.encoder(uint16)
+ let positionEncoder = positionHuffmanTree.encoder(uint16)
+ symbolHuffmanTree.serialise(bitWriter)
+ positionHuffmanTree.serialise(bitWriter)
+ lzssBlock.lzssChain.writeChain(symbolEncoder, positionEncoder, bitWriter)
proc readRaw*(bitReader: BitReader): LzssBlock =
- discard
+ let byteBuf = bitReader.readSeq(maxDataByteLength, uint8)
+ LzssBlock(lzssChain: lzssEncode(byteBuf.data))
proc writeRawTo*(lzssBlock: LzssBlock, bitWriter: BitWriter) =
- discard
+ let byteSeq = lzssBlock.lzssChain.decode()
+ bitWriter.writeSeq(byteSeq.len * wordBitLength, byteSeq)
diff --git a/src/huffman/huffmantree.nim b/src/huffman/huffmantree.nim
index 58a840e..f3fce1b 100644
--- a/src/huffman/huffmantree.nim
+++ b/src/huffman/huffmantree.nim
@@ -31,6 +31,11 @@ type HuffmanTreeNode*[T: SomeUnsignedInt] = ref object
of leaf:
value*: T
+proc maxValue*[T](node: HuffmanTreeNode[T]): T =
+ case node.kind:
+ of branch: node.maxChildValue
+ of leaf: node.value
+
proc huffmanBranch*[T](left, right: HuffmanTreeNode[T]): HuffmanTreeNode[T] =
HuffmanTreeNode[T](
kind: branch, left: left, right: right,
@@ -45,11 +50,6 @@ proc `==`*[T](a, b: HuffmanTreeNode[T]): bool =
of branch: a.left == b.left and a.right == b.right
of leaf: a.value == b.value
-proc maxValue*[T](node: HuffmanTreeNode[T]): T =
- case node.kind:
- of branch: node.maxChildValue
- of leaf: node.value
-
proc deserialise*[T](bitReader: BitReader, valueType: typedesc[T]): HuffmanTreeNode[T] =
let valueBitLength = bitReader.readBits(valueLengthFieldBitLength, uint8).int
proc readNode(): HuffmanTreeNode[T] =
diff --git a/src/lzss/lzsschain.nim b/src/lzss/lzsschain.nim
index 8b49914..8ebcb1a 100644
--- a/src/lzss/lzsschain.nim
+++ b/src/lzss/lzsschain.nim
@@ -15,7 +15,7 @@
# along with this program. If not, see .
import lists, tables, sugar
-import ../bitio/integers, ../huffman/huffmantree
+import ../bitio/integers
import listpolyfill, lzssnode
const maxChainByteLength = 32_000 * wordBitLength
@@ -26,6 +26,11 @@ type LzssChain* =
proc lzssChain*(): LzssChain =
initSinglyLinkedList[LzssNode]()
+proc lzssChain*(chainArray: openArray[LzssNode]): LzssChain =
+ var chain = lzssChain()
+ for node in chainArray: chain.append(node)
+ chain
+
proc decode*(lzssChain: LzssChain): seq[uint8] =
result = newSeqOfCap[uint8](maxChainByteLength)
for node in lzssChain.items:
@@ -35,13 +40,3 @@ proc decode*(lzssChain: LzssChain): seq[uint8] =
of reference:
let absolutePos = result.len - node.relativePos
result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
-
-proc stats*(lzssChain: LzssChain): tuple[characters: CountTableRef[uint8], lengths, positions: CountTableRef[int]] =
- result = (newCountTable[uint8](), newCountTable[int](), newCountTable[int]())
- for node in lzssChain.items:
- case node.kind:
- of character:
- result.characters.inc(node.character)
- of reference:
- result.lengths.inc(node.length)
- result.positions.inc(node.relativePos)
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
index 8b750fb..82fbe7b 100644
--- a/src/lzss/lzssencoder.nim
+++ b/src/lzss/lzssencoder.nim
@@ -17,7 +17,7 @@
import lists
import listpolyfill, matchtable, lzssnode, lzsschain
-const matchGroupLength = 3
+const matchGroupLength* = 3
const maxRefByteLength = high(uint8).int + matchGroupLength
let emptySinglyLinkedList = initSinglyLinkedList[int]()
diff --git a/src/lzsshuffman/lzsshuffmandecoder.nim b/src/lzsshuffman/lzsshuffmandecoder.nim
new file mode 100644
index 0000000..cd71914
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmandecoder.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../bitio/bitreader
+import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain
+import ../huffman/huffmantree, ../huffman/huffmandecoder
+import lzsshuffmansymbol
+
+proc readChain*(bitReader: BitReader, symbolDecoder, positionDecoder: HuffmanDecoder[uint16], maxDataByteLength: int): LzssChain =
+ var chain = lzssChain()
+ var (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, 0)
+ while not symbol.isEndMarker():
+ if byteCursor > maxDataByteLength: raise newException(IOError, "lzss block too long")
+ if symbol.isCharacter():
+ chain.append(lzssCharacter(symbol.uint8))
+ else:
+ let position = positionDecoder.decode(bitReader)
+ chain.append(unpackLzssReference(symbol, position))
+ (symbol, byteCursor) = (symbolDecoder.decode(bitReader).Symbol, byteCursor + 1)
+ chain
diff --git a/src/lzsshuffman/lzsshuffmanencoder.nim b/src/lzsshuffman/lzsshuffmanencoder.nim
new file mode 100644
index 0000000..ea89f85
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmanencoder.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../bitio/bitwriter
+import ../lzss/listpolyfill, ../lzss/lzssnode, ../lzss/lzsschain, ../lzss/lzssencoder
+import ../huffman/huffmantree, ../huffman/huffmantreebuilder, ../huffman/huffmanencoder
+import lzsshuffmansymbol
+
+proc writeSymbol(bitWriter: BitWriter, encodedSymbol: tuple[bitLength: int, value: uint16]) =
+ bitWriter.writeBits(encodedSymbol.bitLength, encodedSymbol.value)
+
+proc writeChain*(lzssChain: LzssChain, symbolEncoder, positionEncoder: HuffmanEncoder[uint16, uint16], bitWriter: BitWriter) =
+ for node in lzssChain.items:
+ case node.kind:
+ of character:
+ bitWriter.writeSymbol(symbolEncoder.encode(node.character))
+ of reference:
+ bitWriter.writeSymbol(symbolEncoder.encode(shiftLzssLength(node.length)))
+ bitWriter.writeSymbol(positionEncoder.encode(node.relativePos.uint16))
+ bitWriter.writeSymbol(symbolEncoder.encode(endSymbol))
diff --git a/src/lzsshuffman/lzsshuffmanstats.nim b/src/lzsshuffman/lzsshuffmanstats.nim
new file mode 100644
index 0000000..037ce5f
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmanstats.nim
@@ -0,0 +1,32 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import tables, lists
+import ../lzss/lzssnode, ../lzss/lzsschain
+import lzsshuffmansymbol
+
+proc aggregateStats*(chain: LzssChain): tuple[symbolTable, positionTable: CountTableRef[uint16]] =
+ var (symbolTable, positionTable) = (newCountTable[uint16](), newCountTable[uint16]())
+ for node in chain.items:
+ case node.kind:
+ of character:
+ symbolTable.inc(node.character)
+ of reference:
+ symbolTable.inc(shiftLzssLength(node.length))
+ positionTable.inc(node.relativePos.uint16)
+ symbolTable.inc(endSymbol)
+ if positionTable.len < 1: positionTable.inc(0) # ensure non-empty tree
+ (symbolTable, positionTable)
diff --git a/src/lzsshuffman/lzsshuffmansymbol.nim b/src/lzsshuffman/lzsshuffmansymbol.nim
new file mode 100644
index 0000000..cfd3fe4
--- /dev/null
+++ b/src/lzsshuffman/lzsshuffmansymbol.nim
@@ -0,0 +1,34 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import lists
+import ../lzss/lzssnode, ../lzss/lzssencoder
+
+type Symbol* = uint16
+
+const endSymbol* = 256.Symbol
+
+proc isEndMarker*(symbol: Symbol): bool =
+ symbol == endSymbol
+
+proc isCharacter*(symbol: Symbol): bool =
+ symbol < endSymbol
+
+proc unpackLzssReference*(symbol: Symbol, position: uint16): LzssNode =
+ lzssReference(symbol.int - endSymbol.int - 1 + matchGroupLength, position.int)
+
+proc shiftLzssLength*(length: int): uint16 =
+ (length + endSymbol.int + 1 - matchGroupLength).uint16
diff --git a/tests/tlzss.nim b/tests/tlzss.nim
index 7325d5b..b6b3c51 100644
--- a/tests/tlzss.nim
+++ b/tests/tlzss.nim
@@ -66,15 +66,6 @@ suite "lzsschain":
test "decode":
check chain().decode() == @[0'u8, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 4, 5, 0, 5, 5, 0, 5, 5]
- test "stats":
- let stats = chain().stats()
- check stats.characters == newCountTable(concat(
- repeat(0'u8, 2), repeat(1'u8, 2), repeat(2'u8, 1), repeat(3'u8, 1), repeat(4'u8, 1), repeat(5'u8, 3)))
- check stats.lengths == newCountTable(concat(
- repeat(3, 2), repeat(4, 1)))
- check stats.positions == newCountTable(concat(
- repeat(3, 1), repeat(6, 1), repeat(8, 1)))
-
suite "lzssencoder":
test "commonPrefixLength":
check commonPrefixLength([], [], 0, 10) == 0
diff --git a/tests/tlzsshuffman.nim b/tests/tlzsshuffman.nim
new file mode 100644
index 0000000..f771f31
--- /dev/null
+++ b/tests/tlzsshuffman.nim
@@ -0,0 +1,144 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import unittest, tables, lists, sequtils, streams
+import bitio/bitwriter, bitio/bitreader
+import lzss/listpolyfill, lzss/lzssnode, lzss/lzsschain
+import huffman/huffmantree, huffman/huffmantreebuilder, huffman/huffmanencoder, huffman/huffmandecoder
+import lzsshuffman/lzsshuffmansymbol, lzsshuffman/lzsshuffmanstats, lzsshuffman/lzsshuffmanencoder, lzsshuffman/lzsshuffmandecoder
+
+suite "lzsshuffmansymbol":
+ test "isEndMarker":
+ check 'a'.Symbol.isEndMarker() == false
+ check endSymbol.isEndMarker()
+
+ test "isCharacter":
+ check 'a'.Symbol.isCharacter()
+ check endSymbol.isCharacter() == false
+ check 300.Symbol.isCharacter() == false
+
+ test "unpackLzssReference":
+ check unpackLzssReference(257.Symbol, 10) == lzssReference(3, 10)
+ check unpackLzssReference(300.Symbol, 10) == lzssReference(46, 10)
+
+ test "shiftLzssLength":
+ check shiftLzssLength(3) == 257'u16
+ check shiftLzssLength(10) == 264'u16
+
+suite "lzsshuffmanstats":
+ test "aggretateStats":
+ let chain = lzssChain([
+ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
+ lzssCharacter(3), lzssCharacter(4), lzssCharacter(5),
+ lzssReference(4, 6), lzssCharacter(0), lzssCharacter(1),
+ lzssReference(3, 8), lzssCharacter(5),
+ lzssReference(3, 3), lzssCharacter(5)])
+ let (symbolTable, positionTable) = chain.aggregateStats()
+ check symbolTable == newCountTable(concat(
+ repeat(0'u16, 2), repeat(1'u16, 2), repeat(2'u16, 1), repeat(3'u16, 1), repeat(4'u16, 1), repeat(5'u16, 3),
+ repeat(endSymbol.uint16, 1),
+ repeat(257'u16, 2), repeat(258'u16, 1)))
+ check positionTable == newCountTable(concat(
+ repeat(3'u16, 1), repeat(6'u16, 1), repeat(8'u16, 1)))
+
+suite "lzsshuffmanencoder":
+ test "writeChain (empty)":
+ let chain = lzssChain(newSeq[LzssNode]())
+ let symbolTree = huffmanLeaf(endSymbol.uint16)
+ let positionTree = huffmanLeaf(0'u16)
+ let stream = newStringStream()
+ defer: stream.close()
+ let bitWriter = stream.bitWriter()
+ writeChain(chain, symbolTree.encoder(uint16), positionTree.encoder(uint16), bitWriter)
+ bitWriter.flush()
+ stream.setPosition(0)
+ check stream.atEnd()
+
+ test "writeChain (minimal)":
+ let chain = lzssChain([
+ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
+ lzssReference(3, 3), lzssReference(3, 4)])
+ let symbolTree = huffmanBranch(
+ huffmanBranch(
+ huffmanLeaf(0'u16),
+ huffmanLeaf(1'u16)),
+ huffmanBranch(
+ huffmanLeaf(257'u16),
+ huffmanBranch(
+ huffmanLeaf(2'u16),
+ huffmanLeaf(256'u16))))
+ let positionTree = huffmanBranch(
+ huffmanLeaf(3'u16),
+ huffmanLeaf(4'u16))
+ let stream = newStringStream()
+ defer: stream.close()
+ let bitWriter = stream.bitWriter()
+ writeChain(chain, symbolTree.encoder(uint16), positionTree.encoder(uint16), bitWriter)
+ bitWriter.flush()
+ stream.setPosition(0)
+ let bitReader = stream.bitReader()
+ check bitReader.readBits(2, uint8) == 0b00'u8 # char 0
+ check bitReader.readBits(2, uint8) == 0b10'u8 # char 1
+ check bitReader.readBits(3, uint8) == 0b011'u8 # char 2
+ check bitReader.readBits(2, uint8) == 0b01'u8 # ref len 3
+ check bitReader.readBits(1, uint8) == 0b0'u8 # ref pos 3
+ check bitReader.readBits(2, uint8) == 0b01'u8 # ref len 3
+ check bitReader.readBits(1, uint8) == 0b1'u8 # ref pos 4
+ check bitReader.readBits(3, uint8) == 0b111'u8 # eof
+
+suite "lzsshuffmandecoder":
+ test "readChain (empty)":
+ let symbolTree = huffmanLeaf(endSymbol.uint16)
+ let positionTree = huffmanLeaf(0'u16)
+ let stream = newStringStream()
+ defer: stream.close()
+ stream.write(0'u8) # eof
+ stream.setPosition(0)
+ let bitReader = stream.bitReader()
+ let result = readChain(bitReader, symbolTree.decoder(), positionTree.decoder(), 32_000)
+ check toSeq(result.items).len == 0
+
+ test "readChain (minimal)":
+ let symbolTree = huffmanBranch(
+ huffmanBranch(
+ huffmanLeaf(0'u16),
+ huffmanLeaf(1'u16)),
+ huffmanBranch(
+ huffmanLeaf(257'u16),
+ huffmanBranch(
+ huffmanLeaf(2'u16),
+ huffmanLeaf(256'u16))))
+ let positionTree = huffmanBranch(
+ huffmanLeaf(3'u16),
+ huffmanLeaf(4'u16))
+ let stream = newStringStream()
+ defer: stream.close()
+ let bitWriter = stream.bitWriter()
+ bitWriter.writeBits(2, 0b00'u8)
+ bitWriter.writeBits(2, 0b10'u8)
+ bitWriter.writeBits(3, 0b011'u8)
+ bitWriter.writeBits(2, 0b01'u8)
+ bitWriter.writeBits(1, 0b0'u8)
+ bitWriter.writeBits(2, 0b01'u8)
+ bitWriter.writeBits(1, 0b1'u8)
+ bitWriter.writeBits(3, 0b111'u8)
+ bitWriter.flush()
+ stream.setPosition(0)
+ let bitReader = stream.bitReader()
+ let result = readChain(bitReader, symbolTree.decoder(), positionTree.decoder(), 32_000)
+ check toSeq(result.items) == [
+ lzssCharacter(0), lzssCharacter(1), lzssCharacter(2),
+ lzssReference(3, 3), lzssReference(3, 4)]
--
cgit v1.2.3