aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpacien2018-11-27 20:26:35 +0100
committerpacien2018-11-27 20:26:35 +0100
commit3d44208aaaeca516eb08a90c98635543cae2bd4d (patch)
tree1ec243c7286c95d2532eaf66ebfa28c2c7fdc713
parentd353e8312b59818cdae5771549c92c1dc6427c71 (diff)
downloadgziplike-3d44208aaaeca516eb08a90c98635543cae2bd4d.tar.gz
implement lzss encoding
-rw-r--r--src/lzsschain.nim36
-rw-r--r--src/lzssencoder.nim58
-rw-r--r--src/lzssnode.nim39
-rw-r--r--src/matchtable.nim32
-rw-r--r--src/polyfill.nim42
-rw-r--r--tests/tlzsschain.nim30
-rw-r--r--tests/tlzssencoder.nim62
-rw-r--r--tests/tlzssnode.nim26
-rw-r--r--tests/tmatchtable.nim35
-rw-r--r--tests/tpolyfill.nim27
10 files changed, 387 insertions, 0 deletions
diff --git a/src/lzsschain.nim b/src/lzsschain.nim
new file mode 100644
index 0000000..8203cb8
--- /dev/null
+++ b/src/lzsschain.nim
@@ -0,0 +1,36 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists, tables, sugar
18import polyfill, integers, lzssnode
19
20const maxChainByteLength = 32_000 * wordBitLength
21
22type LzssChain* =
23 SinglyLinkedList[LzssNode]
24
25proc lzssChain*(): LzssChain =
26 initSinglyLinkedList[LzssNode]()
27
28proc decode*(lzssChain: LzssChain): seq[uint8] =
29 result = newSeqOfCap[uint8](maxChainByteLength)
30 for node in lzssChain.items:
31 case node.kind:
32 of character:
33 result.add(node.character)
34 of reference:
35 let absolutePos = result.len - node.relativePos
36 result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1))
diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim
new file mode 100644
index 0000000..05f3a16
--- /dev/null
+++ b/src/lzssencoder.nim
@@ -0,0 +1,58 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists
18import polyfill, matchtable, lzssnode, lzsschain
19
20const matchGroupLength = 3
21const maxRefByteLength = high(uint8).int + matchGroupLength
22let emptySinglyLinkedList = initSinglyLinkedList[int]()
23
24proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
25 result = skipFirst
26 let maxPrefixLength = min(min(a.len, b.len), maxLength)
27 while result < maxPrefixLength and a[result] == b[result]: result += 1
28
29proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
30 for startIndex in candidatePos.items:
31 let prefixLength = commonPrefixLength(
32 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
33 if prefixLength > result.length: result = (prefixLength, startIndex)
34 if prefixLength >= maxRefByteLength: return
35
36proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
37 for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
38 let group = buffer[cursor..<(cursor + matchGroupLength)]
39 matchTable.addMatch(group, cursor)
40
41proc lzssEncode*(buf: openArray[uint8]): LzssChain =
42 result = initSinglyLinkedList[LzssNode]()
43 let matchTable = initMatchTable(seq[uint8], int)
44 var cursor = 0
45 while cursor < buf.len() - matchGroupLength:
46 let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
47 let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
48 if prefix.length > 0:
49 result.append(lzssReference(prefix.length, cursor - prefix.pos))
50 cursor += prefix.length
51 else:
52 result.append(lzssCharacter(buf[cursor]))
53 cursor += 1
54 if cursor - prefix.length >= matchGroupLength:
55 matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor)
56 while cursor < buf.len:
57 result.append(lzssCharacter(buf[cursor]))
58 cursor += 1
diff --git a/src/lzssnode.nim b/src/lzssnode.nim
new file mode 100644
index 0000000..de5958d
--- /dev/null
+++ b/src/lzssnode.nim
@@ -0,0 +1,39 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17type LzssNodeKind* = enum
18 character,
19 reference
20
21type LzssNode* = object
22 case kind*: LzssNodeKind
23 of character:
24 character*: uint8
25 of reference:
26 length*: int
27 relativePos*: int
28
29proc lzssCharacter*(value: uint8): LzssNode =
30 LzssNode(kind: character, character: value)
31
32proc lzssReference*(length, relativePos: int): LzssNode =
33 LzssNode(kind: reference, length: length, relativePos: relativePos)
34
35proc `==`*(a, b: LzssNode): bool =
36 if a.kind != b.kind: return false
37 case a.kind:
38 of character: a.character == b.character
39 of reference: a.length == b.length and a.relativePos == b.relativePos
diff --git a/src/matchtable.nim b/src/matchtable.nim
new file mode 100644
index 0000000..5be652c
--- /dev/null
+++ b/src/matchtable.nim
@@ -0,0 +1,32 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import tables, lists
18import polyfill
19
20type MatchTable*[K, V] =
21 TableRef[K, SinglyLinkedList[V]]
22
23proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] =
24 newTable[K, SinglyLinkedList[V]]()
25
26proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] =
27 matchTable.getOrDefault(pattern, initSinglyLinkedList[V]())
28
29proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) =
30 var matchList = matchTable.matchList(pattern)
31 polyfill.prepend(matchList, value)
32 matchTable[pattern] = matchList
diff --git a/src/polyfill.nim b/src/polyfill.nim
new file mode 100644
index 0000000..b252953
--- /dev/null
+++ b/src/polyfill.nim
@@ -0,0 +1,42 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17import lists
18
19# https://github.com/nim-lang/Nim/pull/9805
20
21proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
22 ## prepends a node to `L`. Efficiency: O(1).
23 n.next = L.head
24 L.head = n
25 if L.tail == nil: L.tail = n
26
27proc prepend*[T](L: var SinglyLinkedList[T], value: T) =
28 ## prepends a node to `L`. Efficiency: O(1).
29 polyfill.prepend(L, newSinglyLinkedNode(value))
30
31proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) =
32 ## appends a node `n` to `L`. Efficiency: O(1).
33 n.next = nil
34 if L.tail != nil:
35 assert(L.tail.next == nil)
36 L.tail.next = n
37 L.tail = n
38 if L.head == nil: L.head = n
39
40proc append*[T](L: var SinglyLinkedList[T], value: T) =
41 ## appends a value to `L`. Efficiency: O(1).
42 append(L, newSinglyLinkedNode(value))
diff --git a/tests/tlzsschain.nim b/tests/tlzsschain.nim
new file mode 100644
index 0000000..241a0f1
--- /dev/null
+++ b/tests/tlzsschain.nim
@@ -0,0 +1,30 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License