From b616e8f5773631945962d4b1256f8f2d575e0da1 Mon Sep 17 00:00:00 2001
From: pacien
Date: Mon, 3 Dec 2018 18:16:04 +0100
Subject: optimise lzss prefix lookup with custom hashmap
---
src/lzss/lzssencoder.nim | 27 +++++++++---------
src/lzss/matchring.nim | 37 +++++++++++++++++++++++++
src/lzss/matchtable.nim | 32 ++++++++++------------
tests/tlzss.nim | 71 ++++++++++++++++++++++++++++--------------------
4 files changed, 107 insertions(+), 60 deletions(-)
create mode 100644 src/lzss/matchring.nim
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
index 36e0c7e..72f5081 100644
--- a/src/lzss/lzssencoder.nim
+++ b/src/lzss/lzssencoder.nim
@@ -14,36 +14,37 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import matchtable, lzssnode, lzsschain
+import matchring, matchtable, lzssnode, lzsschain
const matchGroupLength* = 3
const maxRefByteLength = high(uint8).int + matchGroupLength
-proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
- result = skipFirst
+proc matchGroup(buf: openArray[uint8], startIndex: int): array[matchGroupLength, uint8] =
+ [buf[startIndex], buf[startIndex + 1], buf[startIndex + 2]]
+
+proc commonPrefixLength*(a, b: openArray[uint8], maxLength: int): int =
let maxPrefixLength = min(min(a.len, b.len), maxLength)
while result < maxPrefixLength and a[result] == b[result]: result += 1
-proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
- for startIndex in candidatePos:
+proc longestPrefix*(candidatePos: MatchRing, searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
+ for startIndex in candidatePos.items:
let prefixLength = commonPrefixLength(
- searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
+ searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, maxRefByteLength)
if prefixLength > result.length: result = (prefixLength, startIndex)
if prefixLength >= maxRefByteLength: return
-proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
+proc addGroups*(matchTable: var MatchTable, buf: openArray[uint8], fromPosIncl, toPosExcl: int) =
for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
- let group = buffer[cursor..<(cursor + matchGroupLength)]
- matchTable.addMatch(group, cursor)
+ matchTable.addMatch(buf.matchGroup(cursor), cursor)
proc lzssEncode*(buf: openArray[uint8]): LzssChain =
result = newSeqOfCap[LzssNode](buf.len)
- let matchTable = initMatchTable(seq[uint8], int)
+ var matchTable = initMatchTable()
var cursor = 0
while cursor < buf.len() - matchGroupLength:
- let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
- let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
- if prefix.length > 0:
+ let probableMatches = matchTable.candidates(buf.matchGroup(cursor))
+ let prefix = probableMatches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
+ if prefix.length >= matchGroupLength:
result.add(lzssReference(prefix.length, cursor - prefix.pos))
cursor += prefix.length
else:
diff --git a/src/lzss/matchring.nim b/src/lzss/matchring.nim
new file mode 100644
index 0000000..a2d45f7
--- /dev/null
+++ b/src/lzss/matchring.nim
@@ -0,0 +1,37 @@
+# gzip-like LZSS compressor
+# Copyright (C) 2018 Pacien TRAN-GIRARD
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+const matchLimit* = 4
+
+type MatchRing* = object
+ offset, size: int
+ indices: array[matchLimit, int]
+
+proc initMatchRing*(): MatchRing =
+ MatchRing()
+
+proc addMatch*(ring: var MatchRing, index: int) =
+ if ring.size < matchLimit:
+ ring.indices[ring.size] = index
+ ring.size += 1
+ else:
+ let ringIndex = (ring.offset + ring.size) mod matchLimit
+ ring.indices[ringIndex] = index
+ ring.offset = (ring.offset + 1) mod ring.indices.len
+
+iterator items*(ring: MatchRing): int {.closure.} =
+ for i in countdown(ring.size - 1, 0):
+ yield ring.indices[(ring.offset + i) mod ring.indices.len]
diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim
index cc04f49..879f47d 100644
--- a/src/lzss/matchtable.nim
+++ b/src/lzss/matchtable.nim
@@ -15,25 +15,23 @@
# along with this program. If not, see .
import tables
+import matchring
-type MatchTable*[K, V] = ref object
- matchLimit: int
- table: TableRef[K, seq[V]]
+const matchGroupLength = 3
+const hashShift = 5
+const tableHeight = 0b1 shl 15
-proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V], matchLimit = 5): MatchTable[K, V] =
- MatchTable[K, V](matchLimit: matchLimit, table: newTable[K, seq[V]]())
+type MatchTable* = object
+ table: array[tableHeight, MatchRing]
-proc len*[K, V](matchTable: MatchTable[K, V]): int =
- matchTable.table.len
+proc initMatchTable*(): MatchTable =
+ result = MatchTable()
-proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): seq[V] =
- if matchTable.table.hasKey(pattern):
- matchTable.table[pattern]
- else:
- newSeqOfCap[V](matchTable.matchLimit)
+proc hash(pattern: array[matchGroupLength, uint8]): int =
+ ((pattern[0].int shl (hashShift * 2)) xor (pattern[1].int shl hashShift) xor pattern[2].int) mod tableHeight
-proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) =
- var matchList = matchTable.matchList(pattern)
- if matchList.len >= matchTable.matchLimit: matchList.del(matchList.len - 1)
- matchList.insert(value)
- matchTable.table[pattern] = matchList
+proc addMatch*(matchTable: var MatchTable, pattern: array[matchGroupLength, uint8], index: int) =
+ matchTable.table[hash(pattern)].addMatch(index)
+
+proc candidates*(matchTable: MatchTable, pattern: array[matchGroupLength, uint8]): MatchRing =
+ matchTable.table[hash(pattern)]
diff --git a/tests/tlzss.nim b/tests/tlzss.nim
index ad667e5..39a89c6 100644
--- a/tests/tlzss.nim
+++ b/tests/tlzss.nim
@@ -14,25 +14,36 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-import unittest, sequtils, tables, lists
-import lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder
+import unittest, sequtils, tables, lists, algorithm
+import lzss/matchring, lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder
-suite "matchtable":
- test "matchList":
- let matchTable = initMatchTable(seq[int], int)
- check matchTable.matchList(@[0, 1, 2]).len == 0
+suite "matchring":
+ test "items (empty)":
+ var ring = initMatchRing()
+ check toSeq(ring.items).len == 0
+
+ test "addMatch, items (partial)":
+ var ring = initMatchRing()
+ let items = [0, 1, 2]
+ for i in items: ring.addMatch(i)
+ check toSeq(ring.items) == items.reversed()
+ test "addMatch, items (rolling)":
+ var ring = initMatchRing()
+ let items = toSeq(0..13)
+ for i in items: ring.addMatch(i)
+ check toSeq(ring.items) == items[^matchLimit..