From b616e8f5773631945962d4b1256f8f2d575e0da1 Mon Sep 17 00:00:00 2001 From: pacien Date: Mon, 3 Dec 2018 18:16:04 +0100 Subject: optimise lzss prefix lookup with custom hashmap --- src/lzss/lzssencoder.nim | 27 +++++++++--------- src/lzss/matchring.nim | 37 +++++++++++++++++++++++++ src/lzss/matchtable.nim | 32 ++++++++++------------ tests/tlzss.nim | 71 ++++++++++++++++++++++++++++-------------------- 4 files changed, 107 insertions(+), 60 deletions(-) create mode 100644 src/lzss/matchring.nim diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim index 36e0c7e..72f5081 100644 --- a/src/lzss/lzssencoder.nim +++ b/src/lzss/lzssencoder.nim @@ -14,36 +14,37 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import matchtable, lzssnode, lzsschain +import matchring, matchtable, lzssnode, lzsschain const matchGroupLength* = 3 const maxRefByteLength = high(uint8).int + matchGroupLength -proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = - result = skipFirst +proc matchGroup(buf: openArray[uint8], startIndex: int): array[matchGroupLength, uint8] = + [buf[startIndex], buf[startIndex + 1], buf[startIndex + 2]] + +proc commonPrefixLength*(a, b: openArray[uint8], maxLength: int): int = let maxPrefixLength = min(min(a.len, b.len), maxLength) while result < maxPrefixLength and a[result] == b[result]: result += 1 -proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = - for startIndex in candidatePos: +proc longestPrefix*(candidatePos: MatchRing, searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = + for startIndex in candidatePos.items: let prefixLength = commonPrefixLength( - searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) + searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, maxRefByteLength) if prefixLength > result.length: result = (prefixLength, startIndex) if prefixLength >= maxRefByteLength: return -proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = +proc addGroups*(matchTable: var MatchTable, buf: openArray[uint8], fromPosIncl, toPosExcl: int) = for cursor in fromPosIncl..(toPosExcl - matchGroupLength): - let group = buffer[cursor..<(cursor + matchGroupLength)] - matchTable.addMatch(group, cursor) + matchTable.addMatch(buf.matchGroup(cursor), cursor) proc lzssEncode*(buf: openArray[uint8]): LzssChain = result = newSeqOfCap[LzssNode](buf.len) - let matchTable = initMatchTable(seq[uint8], int) + var matchTable = initMatchTable() var cursor = 0 while cursor < buf.len() - matchGroupLength: - let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) - let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) - if prefix.length > 0: + let probableMatches = matchTable.candidates(buf.matchGroup(cursor)) + let prefix = probableMatches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) + if prefix.length >= matchGroupLength: result.add(lzssReference(prefix.length, cursor - prefix.pos)) cursor += prefix.length else: diff --git a/src/lzss/matchring.nim b/src/lzss/matchring.nim new file mode 100644 index 0000000..a2d45f7 --- /dev/null +++ b/src/lzss/matchring.nim @@ -0,0 +1,37 @@ +# gzip-like LZSS compressor +# Copyright (C) 2018 Pacien TRAN-GIRARD +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +const matchLimit* = 4 + +type MatchRing* = object + offset, size: int + indices: array[matchLimit, int] + +proc initMatchRing*(): MatchRing = + MatchRing() + +proc addMatch*(ring: var MatchRing, index: int) = + if ring.size < matchLimit: + ring.indices[ring.size] = index + ring.size += 1 + else: + let ringIndex = (ring.offset + ring.size) mod matchLimit + ring.indices[ringIndex] = index + ring.offset = (ring.offset + 1) mod ring.indices.len + +iterator items*(ring: MatchRing): int {.closure.} = + for i in countdown(ring.size - 1, 0): + yield ring.indices[(ring.offset + i) mod ring.indices.len] diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim index cc04f49..879f47d 100644 --- a/src/lzss/matchtable.nim +++ b/src/lzss/matchtable.nim @@ -15,25 +15,23 @@ # along with this program. If not, see . import tables +import matchring -type MatchTable*[K, V] = ref object - matchLimit: int - table: TableRef[K, seq[V]] +const matchGroupLength = 3 +const hashShift = 5 +const tableHeight = 0b1 shl 15 -proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V], matchLimit = 5): MatchTable[K, V] = - MatchTable[K, V](matchLimit: matchLimit, table: newTable[K, seq[V]]()) +type MatchTable* = object + table: array[tableHeight, MatchRing] -proc len*[K, V](matchTable: MatchTable[K, V]): int = - matchTable.table.len +proc initMatchTable*(): MatchTable = + result = MatchTable() -proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): seq[V] = - if matchTable.table.hasKey(pattern): - matchTable.table[pattern] - else: - newSeqOfCap[V](matchTable.matchLimit) +proc hash(pattern: array[matchGroupLength, uint8]): int = + ((pattern[0].int shl (hashShift * 2)) xor (pattern[1].int shl hashShift) xor pattern[2].int) mod tableHeight -proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = - var matchList = matchTable.matchList(pattern) - if matchList.len >= matchTable.matchLimit: matchList.del(matchList.len - 1) - matchList.insert(value) - matchTable.table[pattern] = matchList +proc addMatch*(matchTable: var MatchTable, pattern: array[matchGroupLength, uint8], index: int) = + matchTable.table[hash(pattern)].addMatch(index) + +proc candidates*(matchTable: MatchTable, pattern: array[matchGroupLength, uint8]): MatchRing = + matchTable.table[hash(pattern)] diff --git a/tests/tlzss.nim b/tests/tlzss.nim index ad667e5..39a89c6 100644 --- a/tests/tlzss.nim +++ b/tests/tlzss.nim @@ -14,25 +14,36 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import unittest, sequtils, tables, lists -import lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder +import unittest, sequtils, tables, lists, algorithm +import lzss/matchring, lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder -suite "matchtable": - test "matchList": - let matchTable = initMatchTable(seq[int], int) - check matchTable.matchList(@[0, 1, 2]).len == 0 +suite "matchring": + test "items (empty)": + var ring = initMatchRing() + check toSeq(ring.items).len == 0 + + test "addMatch, items (partial)": + var ring = initMatchRing() + let items = [0, 1, 2] + for i in items: ring.addMatch(i) + check toSeq(ring.items) == items.reversed() + test "addMatch, items (rolling)": + var ring = initMatchRing() + let items = toSeq(0..13) + for i in items: ring.addMatch(i) + check toSeq(ring.items) == items[^matchLimit..