aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpacien2018-12-03 18:16:04 +0100
committerpacien2018-12-03 18:16:04 +0100
commitb616e8f5773631945962d4b1256f8f2d575e0da1 (patch)
treec3b2f731ad3ef692ff9edb133f852d190e07ad2f
parent200cb18aafa7f62fdca37ae8952b5e9c5bb3f25f (diff)
downloadgziplike-b616e8f5773631945962d4b1256f8f2d575e0da1.tar.gz
optimise lzss prefix lookup with custom hashmap
-rw-r--r--src/lzss/lzssencoder.nim27
-rw-r--r--src/lzss/matchring.nim37
-rw-r--r--src/lzss/matchtable.nim32
-rw-r--r--tests/tlzss.nim71
4 files changed, 107 insertions, 60 deletions
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
index 36e0c7e..72f5081 100644
--- a/src/lzss/lzssencoder.nim
+++ b/src/lzss/lzssencoder.nim
@@ -14,36 +14,37 @@
14# You should have received a copy of the GNU Affero General Public License 14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>. 15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16 16
17import matchtable, lzssnode, lzsschain 17import matchring, matchtable, lzssnode, lzsschain
18 18
19const matchGroupLength* = 3 19const matchGroupLength* = 3
20const maxRefByteLength = high(uint8).int + matchGroupLength 20const maxRefByteLength = high(uint8).int + matchGroupLength
21 21
22proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = 22proc matchGroup(buf: openArray[uint8], startIndex: int): array[matchGroupLength, uint8] =
23 result = skipFirst 23 [buf[startIndex], buf[startIndex + 1], buf[startIndex + 2]]
24
25proc commonPrefixLength*(a, b: openArray[uint8], maxLength: int): int =
24 let maxPrefixLength = min(min(a.len, b.len), maxLength) 26 let maxPrefixLength = min(min(a.len, b.len), maxLength)
25 while result < maxPrefixLength and a[result] == b[result]: result += 1 27 while result < maxPrefixLength and a[result] == b[result]: result += 1
26 28
27proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = 29proc longestPrefix*(candidatePos: MatchRing, searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
28 for startIndex in candidatePos: 30 for startIndex in candidatePos.items:
29 let prefixLength = commonPrefixLength( 31 let prefixLength = commonPrefixLength(
30 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) 32 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, maxRefByteLength)
31 if prefixLength > result.length: result = (prefixLength, startIndex) 33 if prefixLength > result.length: result = (prefixLength, startIndex)
32 if prefixLength >= maxRefByteLength: return 34 if prefixLength >= maxRefByteLength: return
33 35
34proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = 36proc addGroups*(matchTable: var MatchTable, buf: openArray[uint8], fromPosIncl, toPosExcl: int) =
35 for cursor in fromPosIncl..(toPosExcl - matchGroupLength): 37 for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
36 let group = buffer[cursor..<(cursor + matchGroupLength)] 38 matchTable.addMatch(buf.matchGroup(cursor), cursor)
37 matchTable.addMatch(group, cursor)
38 39
39proc lzssEncode*(buf: openArray[uint8]): LzssChain = 40proc lzssEncode*(buf: openArray[uint8]): LzssChain =
40 result = newSeqOfCap[LzssNode](buf.len) 41 result = newSeqOfCap[LzssNode](buf.len)
41 let matchTable = initMatchTable(seq[uint8], int) 42 var matchTable = initMatchTable()
42 var cursor = 0 43 var cursor = 0
43 while cursor < buf.len() - matchGroupLength: 44 while cursor < buf.len() - matchGroupLength:
44 let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) 45 let probableMatches = matchTable.candidates(buf.matchGroup(cursor))
45 let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) 46 let prefix = probableMatches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
46 if prefix.length > 0: 47 if prefix.length >= matchGroupLength:
47 result.add(lzssReference(prefix.length, cursor - prefix.pos)) 48 result.add(lzssReference(prefix.length, cursor - prefix.pos))
48 cursor += prefix.length 49 cursor += prefix.length
49 else: 50 else:
diff --git a/src/lzss/matchring.nim b/src/lzss/matchring.nim
new file mode 100644
index 0000000..a2d45f7
--- /dev/null
+++ b/src/lzss/matchring.nim
@@ -0,0 +1,37 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17const matchLimit* = 4
18
19type MatchRing* = object
20 offset, size: int
21 indices: array[matchLimit, int]
22
23proc initMatchRing*(): MatchRing =
24 MatchRing()
25
26proc addMatch*(ring: var MatchRing, index: int) =
27 if ring.size < matchLimit:
28 ring.indices[ring.size] = index
29 ring.size += 1
30 else:
31 let ringIndex = (ring.offset + ring.size) mod matchLimit
32 ring.indices[ringIndex] = index
33 ring.offset = (ring.offset + 1) mod ring.indices.len
34
35iterator items*(ring: MatchRing): int {.closure.} =
36 for i in countdown(ring.size - 1, 0):
37 yield ring.indices[(ring.offset + i) mod ring.indices.len]
diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim
index cc04f49..879f47d 100644
--- a/src/lzss/matchtable.nim
+++ b/src/lzss/matchtable.nim
@@ -15,25 +15,23 @@
15# along with this program. If not, see <https://www.gnu.org/licenses/>. 15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16 16
17import tables 17import tables
18import matchring
18 19
19type MatchTable*[K, V] = ref object 20const matchGroupLength = 3
20 matchLimit: int 21const hashShift = 5
21 table: TableRef[K, seq[V]] 22const tableHeight = 0b1 shl 15
22 23
23proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V], matchLimit = 5): MatchTable[K, V] = 24type MatchTable* = object
24 MatchTable[K, V](matchLimit: matchLimit, table: newTable[K, seq[V]]()) 25 table: array[tableHeight, MatchRing]
25 26
26proc len*[K, V](matchTable: MatchTable[K, V]): int = 27proc initMatchTable*(): MatchTable =
27 matchTable.table.len 28 result = MatchTable()
28 29
29proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): seq[V] = 30proc hash(pattern: array[matchGroupLength, uint8]): int =
30 if matchTable.table.hasKey(pattern): 31 ((pattern[0].int shl (hashShift * 2)) xor (pattern[1].int shl hashShift) xor pattern[2].int) mod tableHeight
31 matchTable.table[pattern]
32 else:
33 newSeqOfCap[V](matchTable.matchLimit)
34 32
35proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = 33proc addMatch*(matchTable: var MatchTable, pattern: array[matchGroupLength, uint8], index: int) =
36 var matchList = matchTable.matchList(pattern) 34 matchTable.table[hash(pattern)].addMatch(index)
37 if matchList.len >= matchTable.matchLimit: matchList.del(matchList.len - 1) 35
38 matchList.insert(value) 36proc candidates*(matchTable: MatchTable, pattern: array[matchGroupLength, uint8]): MatchRing =
39 matchTable.table[pattern] = matchList 37 matchTable.table[hash(pattern)]
diff --git a/tests/tlzss.nim b/tests/tlzss.nim
index ad667e5..39a89c6 100644
--- a/tests/tlzss.nim
+++ b/tests/tlzss.nim
@@ -14,25 +14,36 @@
14# You should have received a copy of the GNU Affero General Public License 14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>. 15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16 16
17import unittest, sequtils, tables, lists 17import unittest, sequtils, tables, lists, algorithm
18import lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder 18import lzss/matchring, lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder
19 19
20suite "matchtable": 20suite "matchring":
21 test "matchList": 21 test "items (empty)":
22 let matchTable = initMatchTable(seq[int], int) 22 var ring = initMatchRing()
23 check matchTable.matchList(@[0, 1, 2]).len == 0 23 check toSeq(ring.items).len == 0
24
25 test "addMatch, items (partial)":
26 var ring = initMatchRing()
27 let items = [0, 1, 2]
28 for i in items: ring.addMatch(i)
29 check toSeq(ring.items) == items.reversed()
24 30
31 test "addMatch, items (rolling)":
32 var ring = initMatchRing()
33 let items = toSeq(0..13)
34 for i in items: ring.addMatch(i)
35 check toSeq(ring.items) == items[^matchLimit..<items.len].reversed()
36
37suite "matchtable":
25 test "addMatch": 38 test "addMatch":
26 let matchTable = initMatchTable(seq[int], int) 39 var matchTable = initMatchTable()
27 matchTable.addMatch(@[0, 1, 2], 42) 40 matchTable.addMatch([0'u8, 1, 2], 42)
28 matchTable.addMatch(@[2, 1, 0], 24) 41 matchTable.addMatch([2'u8, 1, 0], 24)
29 check matchTable.len == 2 42 check toSeq(matchTable.candidates([0'u8, 1, 2]).items) == [42]
30 check matchTable.matchList(@[0, 1, 2]) == [42] 43 check toSeq(matchTable.candidates([2'u8, 1, 0]).items) == [24]
31 check matchTable.matchList(@[2, 1, 0]) == [24] 44 matchTable.addMatch([0'u8, 1, 2], 1337)
32 matchTable.addMatch(@[0, 1, 2], 1337) 45 check toSeq(matchTable.candidates([0'u8, 1, 2]).items) == [1337, 42]
33 check matchTable.len == 2 46 check toSeq(matchTable.candidates([2'u8, 1, 0]).items) == [24]
34 check matchTable.matchList(@[0, 1, 2]) == [1337, 42]
35 check matchTable.matchList(@[2, 1, 0]) == [24]
36 47
37suite "lzssnode": 48suite "lzssnode":
38 test "equality": 49 test "equality":
@@ -54,11 +65,11 @@ suite "lzsschain":
54 65
55suite "lzssencoder": 66suite "lzssencoder":
56 test "commonPrefixLength": 67 test "commonPrefixLength":
57 check commonPrefixLength([], [], 0, 10) == 0 68 check commonPrefixLength([], [], 10) == 0
58 check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 0, 10) == 2 69 check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 10) == 2
59 check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 1, 10) == 2 70 check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 10) == 2
60 check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 1, 10) == 2 71 check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 10) == 2
61 check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 1, 3) == 3 72 check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 3) == 3
62 73
63 test "longestPrefix": 74 test "longestPrefix":
64 let buffer = [ 75 let buffer = [
@@ -67,22 +78,22 @@ suite "lzssencoder":
67 0, 1, 2, 78 0, 1, 2,
68 0, 1, 2, 3, 4] 79 0, 1, 2, 3, 4]
69 var candidatePos = [0, 4, 8] 80 var candidatePos = [0, 4, 8]
70 let result = longestPrefix(candidatePos, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1)) 81 var matchRing = initMatchRing()
82 for pos in candidatePos: matchRing.addMatch(pos)
83 let result = longestPrefix(matchRing, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1))
71 check result.pos == 4 84 check result.pos == 4
72 check result.length == 4 85 check result.length == 4
73