aboutsummaryrefslogtreecommitdiff
path: root/src/lzss/lzssencoder.nim
blob: 36e0c7e544c73648577234570b29eb123bd0de14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# gzip-like LZSS compressor
# Copyright (C) 2018  Pacien TRAN-GIRARD
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import matchtable, lzssnode, lzsschain

const matchGroupLength* = 3
const maxRefByteLength = high(uint8).int + matchGroupLength

proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int =
  result = skipFirst
  let maxPrefixLength = min(min(a.len, b.len), maxLength)
  while result < maxPrefixLength and a[result] == b[result]: result += 1

proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
  for startIndex in candidatePos:
    let prefixLength = commonPrefixLength(
      searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength)
    if prefixLength > result.length: result = (prefixLength, startIndex)
    if prefixLength >= maxRefByteLength: return

proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) =
  for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
    let group = buffer[cursor..<(cursor + matchGroupLength)]
    matchTable.addMatch(group, cursor)

proc lzssEncode*(buf: openArray[uint8]): LzssChain =
  result = newSeqOfCap[LzssNode](buf.len)
  let matchTable = initMatchTable(seq[uint8], int)
  var cursor = 0
  while cursor < buf.len() - matchGroupLength:
    let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)])
    let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
    if prefix.length > 0:
      result.add(lzssReference(prefix.length, cursor - prefix.pos))
      cursor += prefix.length
    else:
      result.add(lzssCharacter(buf[cursor]))
      cursor += 1
    if cursor - prefix.length >= matchGroupLength:
      matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor)
  while cursor < buf.len:
    result.add(lzssCharacter(buf[cursor]))
    cursor += 1