M.Hiroi's Home Page

Common Lisp Programming

お気楽 Common Lisp プログラミング入門 : 自作ライブラリ編

サンプルプログラム

[ Common Lisp | library ]

LZ78 符号

LZ 符号は Jacob Ziv 氏と Abraham Lempel 氏によって開発された「適応型辞書法」によるデータ圧縮アルゴリズムです。LZ 符号には多数のバリエーションが存在しますが、「LZ77 符号」と「LZ78 符号」の 2 つに大別されます。LZ77 符号は 1977 年に、LZ78 符号は 1978 年に発表されました。

両者の符号は互いに関係があるものの、辞書の作成方法はまったく異なっているので、混同しないように注意してください。LZ77 符号は「スライド辞書法」、 LZ78 符号は「動的辞書法」と呼ばれています。LZ78 符号の詳しい説明は以下に示す拙作のページをお読みください。

●LZW 符号

LZ77 符号と同様に、LZ78 符号にも多数のバリエーションが存在します。その中で広く用いられている符号が 1984 年 T. Welch 氏によって開発された「LZW 符号」です。LZW 符号ではスライド窓の使用をやめて、これまでに出現した記号列を辞書に登録することで大域的な辞書を作成します。

なお、今回のプログラムでは辞書に「トライ (trie)」を使っていますが、ハッシュ表や TST (Ternary Search Tree) を使うと、実行時間はもう少し速くなります。

;;;
;;; lzw.lisp : LZW 符号
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(require :utils)
(use-package '(:bitio :utils))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; 節
(defstruct node sym code (bros nil) (child nil))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c) (return xs))))

;;; 子を挿入する
(defun insert-child (nd c num)
  (setf (node-child nd)
        (make-node :sym c :code num :bros (node-child nd))))

;;; LZW 符号の符号化
(defun encode (fin fout)
  (let ((buff (vector-tabulate (lambda (x) (make-node :sym x :code x)) 256))
        (num 256)
        (p nil) (q nil))
    (setf p (aref buff (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (putbits fout dic-bits (node-code p))
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (putbits fout dic-bits (node-code p))
         (when (< num dic-size)
           (insert-child p c num)
           (incf num))
         (setf p (aref buff c)))
        (t
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; 記号列の出力
(defun output (buff n fout)
  (cond
   ((null (cdr (aref buff n)))
    (write-byte n fout)
    (values n 1))
   (t
    (multiple-value-bind
     (m i)
     (output buff (cdr (aref buff n)) fout)
     (write-byte (car (aref buff n)) fout)
     (values m (1+ i))))))

;;; LZW 符号の復号
(defun decode (fin fout size)
  (let ((buff (make-array dic-size :initial-element nil))
        (num 256)
        (p nil) (q nil)
        (c nil) (i nil))
    ;; 初期化
    (dotimes (x 256) (setf (aref buff x) (cons x nil)))
    ;; 最初の記号を読み込む
    (setf p (getbits fin dic-bits))
    (multiple-value-setq
     (c i)
     (output buff p fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (getbits fin dic-bits))
        (cond
         ((< q num)
          (multiple-value-setq
           (c i)
           (output buff q fout))
          (when (< num dic-size)
            (setf (aref buff num) (cons c p))
            (incf num)))
         (t
          (setf (aref buff num) (cons c p))
          (incf num)
          (multiple-value-setq
           (c i)
           (output buff q fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZW 符号 + CBT 符号

;;;
;;; lzw1.lisp : LZW 符号 + CBT 符号
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(require :utils)
(use-package '(:bitio :utils))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; 節
(defstruct node sym code (bros nil) (child nil))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c) (return xs))))

;;; 子を挿入する
(defun insert-child (nd c num)
  (setf (node-child nd)
        (make-node :sym c :code num :bros (node-child nd))))

;;; 符号語長
(defvar *code-count* nil)
(defvar *dic-bits* nil)

;;; 初期化
(defun init-dic-bits ()
  (setf *dic-bits* 9
        *code-count* 256))

;;; 更新
(defun update-dic-bits ()
  (incf *code-count*)
  (when (> *code-count* (1- (ash 1 *dic-bits*)))
    (incf *dic-bits*)))

;;; 辞書番号の符号化
(defun encode1 (fout n)
  (cond
   ((< *dic-bits* dic-bits)
    (cbt-encode fout n *code-count* *dic-bits*)
    (update-dic-bits))
   (t
    (putbits fout dic-bits n))))

;;; 辞書番号の復号
(defun decode1 (fin)
  (if (< *dic-bits* dic-bits)
      (prog1
          (cbt-decode fin *code-count* *dic-bits*)
        (update-dic-bits))
    (getbits fin dic-bits)))

;;; LZW 符号の符号化
(defun encode (fin fout)
  (let ((buff (vector-tabulate (lambda (x) (make-node :sym x :code x)) 256))
        (num 256)
        (p nil) (q nil))
    (init-dic-bits)
    (setf p (aref buff (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (encode1 fout (node-code p))
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (encode1 fout (node-code p))
         (when (< num dic-size)
           (insert-child p c num)
           (incf num))
         (setf p (aref buff c)))
        (t
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; 記号列の出力
(defun output (buff n fout)
  (cond
   ((null (cdr (aref buff n)))
    (write-byte n fout)
    (values n 1))
   (t
    (multiple-value-bind
     (m i)
     (output buff (cdr (aref buff n)) fout)
     (write-byte (car (aref buff n)) fout)
     (values m (1+ i))))))

;;; LZW 符号の復号
(defun decode (fin fout size)
  (let ((buff (make-array dic-size :initial-element nil))
        (num 256)
        (p nil) (q nil)
        (c nil) (i nil))
    ;; 初期化
    (dotimes (x 256) (setf (aref buff x) (cons x nil)))
    (init-dic-bits)
    ;; 最初の記号を読み込む
    (setf p (decode1 fin))
    (multiple-value-setq
     (c i)
     (output buff p fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (decode1 fin))
        (cond
         ((< q num)
          (multiple-value-setq
           (c i)
           (output buff q fout))
          (when (< num dic-size)
            (setf (aref buff num) (cons c p))
            (incf num)))
         (t
          (setf (aref buff num) (cons c p))
          (incf num)
          (multiple-value-setq
           (c i)
           (output buff q fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZW 符号 + BinaryRangeCoder

;;;
;;; lzwrc.lisp : LZW 符号 + RangeCoder
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :brc)
(require :utils)
(use-package '(:brc :rangecoder :utils))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; ファイルサイズの書き込み
(defun write-file-size (out size)
  (write-byte (logand (ash size -24) #xff) out)
  (write-byte (logand (ash size -16) #xff) out)
  (write-byte (logand (ash size -8) #xff) out)
  (write-byte (logand size #xff) out))

;;; ファイルサイズの読み込み
(defun read-file-size (in)
  (+ (ash (read-byte in) 24)
     (ash (read-byte in) 16)
     (ash (read-byte in) 8)
     (read-byte in)))

;;; 節
(defstruct node sym code (bros nil) (child nil))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c) (return xs))))

;;; 子を挿入する
(defun insert-child (nd c num)
  (setf (node-child nd)
        (make-node :sym c :code num :bros (node-child nd))))

;;; LZW 符号の符号化
(defun encode (rc fin)
  (let ((gm (initialize-gamma-model dic-size))
        (buff (vector-tabulate (lambda (x) (make-node :sym x :code x)) 256))
        (num 256)
        (p nil) (q nil))
    (setf p (aref buff (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (gamma-encode rc gm (node-code p) :limit #x200 :inc 4)
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (gamma-encode rc gm (node-code p) :limit #x200 :inc 4)
         (when (< num dic-size)
           (insert-child p c num)
           (incf num))
         (setf p (aref buff c)))
        (t
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-byte-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (write-file-size fout size)
          (when (plusp size)
            (call-with-range-encoder
             fout
             (lambda (rc)
               (encode rc fin))))))))))

;;; 記号列の出力
(defun output (buff n fout)
  (cond
   ((null (cdr (aref buff n)))
    (write-byte n fout)
    (values n 1))
   (t
    (multiple-value-bind
     (m i)
     (output buff (cdr (aref buff n)) fout)
     (write-byte (car (aref buff n)) fout)
     (values m (1+ i))))))

;;; LZW 符号の復号
(defun decode (rc fout size)
  (let ((gm (initialize-gamma-model dic-size))
        (buff (make-array dic-size :initial-element nil))
        (num 256)
        (p nil) (q nil)
        (c nil) (i nil))
    ;; 初期化
    (dotimes (x 256) (setf (aref buff x) (cons x nil)))
    ;; 最初の記号を読み込む
    (setf p (gamma-decode rc gm :limit #x200 :inc 4))
    (multiple-value-setq
     (c i)
     (output buff p fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (gamma-decode rc gm :limit #x200 :inc 4))
        (cond
         ((< q num)
          (multiple-value-setq
           (c i)
           (output buff q fout))
          (when (< num dic-size)
            (setf (aref buff num) (cons c p))
            (incf num)))
         (t
          (setf (aref buff num) (cons c p))
          (incf num)
          (multiple-value-setq
           (c i)
           (output buff q fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-byte-input-file
   in-file
   (lambda (fin)
     (let ((size (read-file-size fin)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (call-with-range-decoder
             fin
             (lambda (rc)
               (decode rc fout size))))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZT 符号

LZT 符号は 1987 年に Tischer 氏によって開発されました。おもな改良点は、辞書が満杯になった場合、長い間使われていない (最長時間未使用 : Least Recently Used) 語を取り除くことで辞書の空きスペースを作るところです。このような操作を「LRU スキーム」と呼びます。

LZT 符号は LRU スキームを行うため符号化と復号に時間がかかることになりますが、少ないメモリでも高い圧縮率を期待することができます。また、データの局所的な偏在も辞書に反映することが可能になります。

;;;
;;; lzt.lisp : LZT 符号
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(require :utils)
(use-package '(:bitio :utils))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; 節
(defstruct node sym code (bros nil) (child nil) (parent nil))

;;; 辞書
(defvar *dic-table* nil)
(defvar *dic-num* nil)

;;; キュー
(defvar *next* nil)
(defvar *prev* nil)

;;; 初期化
(defun init-dic ()
  (setf *dic-table* (make-array dic-size :initial-element nil))
  (dotimes (x 256)
    (setf (aref *dic-table* x) (make-node :sym x :code x)))
  (setf *dic-num* 256)
  (setf *next* (make-array dic-size :initial-element 0))
  (setf *prev* (make-array dic-size :initial-element 0)))

;;; 節を求める
(defun get-node (num) (aref *dic-table* num))

;;; 節をキューから削除
;;;  p <--> n <--> q
;;; => p <--> q
(defun delete-node (nd)
  (let* ((n (node-code nd))
         (p (aref *prev* n))
         (q (aref *next* n)))
    (setf (aref *next* p) q
          (aref *prev* q) p)))

;;; 後ろへ追加
;;; p <--> H (0) <-->
;;; => p <--> n <--> H (0) <-->
(defun push-back (nd)
  (let ((n (node-code nd))
        (p (aref *prev* 0)))
    (setf (aref *prev* 0) n
          (aref *next* p) n
          (aref *next* n) 0
          (aref *prev* n) p)))

;;; 節を後ろへ移動
(defun move-node (nd)
  (delete-node nd)
  (push-back nd))

;;; 葉を探す
(defun search-leaf ()
  (do ((n (aref *next* 0) (aref *next* n)))
      ((zerop n))
      (unless (node-child (get-node n))
        (return n))))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c)
        (return xs))))

;;; 子を削除する
(defun delete-child (nd)
  (let ((p (node-parent nd)))
    (if (eql (node-child p) nd)
        (setf (node-child p) (node-bros nd))
      (do ((xs (node-child p) (node-bros xs)))
          ((null xs))
          (when (eql (node-bros xs) nd)
            (setf (node-bros xs) (node-bros nd))
            (return))))))

;;; 子を挿入する
(defun insert-child (nd c)
  (cond
   ((< *dic-num* dic-size)
    (let ((new-nd (make-node :sym c :code *dic-num* :bros (node-child nd) :parent nd)))
      (push-back new-nd)
      (setf (node-child nd) new-nd
            (aref *dic-table* *dic-num*) new-nd)
      (incf *dic-num*)))
   (t
    ;; 葉を探す
    (let ((leaf (get-node (search-leaf))))
      (delete-child leaf)
      (setf (node-sym leaf) c
            (node-bros leaf) (node-child nd)
            (node-child leaf) nil
            (node-parent leaf) nd
            (node-child nd) leaf)
      (move-node leaf)))))

;;; 辞書番号のチェック
(defun check-code (n)
  (if (= *dic-num* dic-size)
      (/= (search-leaf) n)
    (< n *dic-num*)))

;;; LZT 符号の符号化
(defun encode (fin fout)
  (init-dic)
  (let ((p nil) (q nil))
    (setf p (get-node (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (putbits fout dic-bits (node-code p))
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (putbits fout dic-bits (node-code p))
         (insert-child p c)
         (setf p (get-node c)))
        (t
         (move-node q)
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; 記号列の出力
(defun output (nd fout)
  (cond
   ((null (node-parent nd))
    (write-byte (node-sym nd) fout)
    (values (node-sym nd) 1))
   (t
    (multiple-value-bind
     (m i)
     (output (node-parent nd) fout)
     (write-byte (node-sym nd) fout)
     (move-node nd)
     (values m (1+ i))))))

;;; LZT 符号の復号
(defun decode (fin fout size)
  (init-dic)
  (let ((p nil) (q nil)
        (c nil) (i nil))
    ;; 最初の記号を読み込む
    (setf p (getbits fin dic-bits))
    (multiple-value-setq
     (c i)
     (output (get-node p) fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (getbits fin dic-bits))
        (cond
         ((check-code q)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))
          (insert-child (get-node p) c))
         (t
          (insert-child (get-node p) c)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZT 符号 + CBT 符号

;;;
;;; lzt1.lisp : LZT 符号 + CBT 符号
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(require :utils)
(use-package '(:bitio :utils))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; 節
(defstruct node sym code (bros nil) (child nil) (parent nil))

;;; 辞書
(defvar *dic-table* nil)
(defvar *dic-num* nil)

;;; キュー
(defvar *next* nil)
(defvar *prev* nil)

;;; 初期化
(defun init-dic ()
  (setf *dic-table* (make-array dic-size :initial-element nil))
  (dotimes (x 256)
    (setf (aref *dic-table* x) (make-node :sym x :code x)))
  (setf *dic-num* 256)
  (setf *next* (make-array dic-size :initial-element 0))
  (setf *prev* (make-array dic-size :initial-element 0)))

;;; 節を求める
(defun get-node (num) (aref *dic-table* num))

;;; 節をキューから削除
;;;  p <--> n <--> q
;;; => p <--> q
(defun delete-node (nd)
  (let* ((n (node-code nd))
         (p (aref *prev* n))
         (q (aref *next* n)))
    (setf (aref *next* p) q
          (aref *prev* q) p)))

;;; 後ろへ追加
;;; p <--> H (0) <-->
;;; => p <--> n <--> H (0) <-->
(defun push-back (nd)
  (let ((n (node-code nd))
        (p (aref *prev* 0)))
    (setf (aref *prev* 0) n
          (aref *next* p) n
          (aref *next* n) 0
          (aref *prev* n) p)))

;;; 節を後ろへ移動
(defun move-node (nd)
  (delete-node nd)
  (push-back nd))

;;; 葉を探す
(defun search-leaf ()
  (do ((n (aref *next* 0) (aref *next* n)))
      ((zerop n))
      (unless (node-child (get-node n))
        (return n))))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c)
        (return xs))))

;;; 子を削除する
(defun delete-child (nd)
  (let ((p (node-parent nd)))
    (if (eql (node-child p) nd)
        (setf (node-child p) (node-bros nd))
      (do ((xs (node-child p) (node-bros xs)))
          ((null xs))
          (when (eql (node-bros xs) nd)
            (setf (node-bros xs) (node-bros nd))
            (return))))))

;;; 子を挿入する
(defun insert-child (nd c)
  (cond
   ((< *dic-num* dic-size)
    (let ((new-nd (make-node :sym c :code *dic-num* :bros (node-child nd) :parent nd)))
      (push-back new-nd)
      (setf (node-child nd) new-nd
            (aref *dic-table* *dic-num*) new-nd)
      (incf *dic-num*)))
   (t
    ;; 葉を探す
    (let ((leaf (get-node (search-leaf))))
      (delete-child leaf)
      (setf (node-sym leaf) c
            (node-bros leaf) (node-child nd)
            (node-child leaf) nil
            (node-parent leaf) nd
            (node-child nd) leaf)
      (move-node leaf)))))

;;; 辞書番号のチェック
(defun check-code (n)
  (if (= *dic-num* dic-size)
      (/= (search-leaf) n)
    (< n *dic-num*)))

;;; 符号語長
(defvar *code-count* nil)
(defvar *dic-bits* nil)

;;; 初期化
(defun init-dic-bits ()
  (setf *dic-bits* 9
        *code-count* 256))

;;; 更新
(defun update-dic-bits ()
  (incf *code-count*)
  (when (> *code-count* (1- (ash 1 *dic-bits*)))
    (incf *dic-bits*)))

;;; 辞書番号の符号化
(defun encode1 (fout n)
  (cond
   ((< *dic-bits* dic-bits)
    (cbt-encode fout n *code-count* *dic-bits*)
    (update-dic-bits))
   (t
    (putbits fout dic-bits n))))

;;; 辞書番号の復号
(defun decode1 (fin)
  (if (< *dic-bits* dic-bits)
      (prog1
          (cbt-decode fin *code-count* *dic-bits*)
        (update-dic-bits))
    (getbits fin dic-bits)))

;;; LZT 符号の符号化
(defun encode (fin fout)
  (init-dic)
  (init-dic-bits)
  (let ((p nil) (q nil))
    (setf p (get-node (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (encode1 fout (node-code p))
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (encode1 fout (node-code p))
         (insert-child p c)
         (setf p (get-node c)))
        (t
         (move-node q)
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; 記号列の出力
(defun output (nd fout)
  (cond
   ((null (node-parent nd))
    (write-byte (node-sym nd) fout)
    (values (node-sym nd) 1))
   (t
    (multiple-value-bind
     (m i)
     (output (node-parent nd) fout)
     (write-byte (node-sym nd) fout)
     (move-node nd)
     (values m (1+ i))))))

;;; LZT 符号の復号
(defun decode (fin fout size)
  (init-dic)
  (init-dic-bits)
  (let ((p nil) (q nil)
        (c nil) (i nil))
    ;; 最初の記号を読み込む
    (setf p (decode1 fin))
    (multiple-value-setq
     (c i)
     (output (get-node p) fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (decode1 fin))
        (cond
         ((check-code q)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))
          (insert-child (get-node p) c))
         (t
          (insert-child (get-node p) c)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZT 符号 + Binary RangeCoder

;;;
;;; lztrc.lisp : LZT 符号 + BinaryRangeCoder
;;;
;;; Copyright (c) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :brc)
(use-package '(:brc :rangecoder))

;;; 定数
(defconstant dic-bits 13)
(defconstant dic-size (ash 1 dic-bits))

;;; ファイルサイズの書き込み
(defun write-file-size (out size)
  (write-byte (logand (ash size -24) #xff) out)
  (write-byte (logand (ash size -16) #xff) out)
  (write-byte (logand (ash size -8) #xff) out)
  (write-byte (logand size #xff) out))

;;; ファイルサイズの読み込み
(defun read-file-size (in)
  (+ (ash (read-byte in) 24)
     (ash (read-byte in) 16)
     (ash (read-byte in) 8)
     (read-byte in)))

;;; 節
(defstruct node sym code (bros nil) (child nil) (parent nil))

;;; 辞書
(defvar *dic-table* nil)
(defvar *dic-num* nil)

;;; キュー
(defvar *next* nil)
(defvar *prev* nil)

;;; 初期化
(defun init-dic ()
  (setf *dic-table* (make-array dic-size :initial-element nil))
  (dotimes (x 256)
    (setf (aref *dic-table* x) (make-node :sym x :code x)))
  (setf *dic-num* 256)
  (setf *next* (make-array dic-size :initial-element 0))
  (setf *prev* (make-array dic-size :initial-element 0)))

;;; 節を求める
(defun get-node (num) (aref *dic-table* num))

;;; 節をキューから削除
;;;  p <--> n <--> q
;;; => p <--> q
(defun delete-node (nd)
  (let* ((n (node-code nd))
         (p (aref *prev* n))
         (q (aref *next* n)))
    (setf (aref *next* p) q
          (aref *prev* q) p)))

;;; 後ろへ追加
;;; p <--> H (0) <-->
;;; => p <--> n <--> H (0) <-->
(defun push-back (nd)
  (let ((n (node-code nd))
        (p (aref *prev* 0)))
    (setf (aref *prev* 0) n
          (aref *next* p) n
          (aref *next* n) 0
          (aref *prev* n) p)))

;;; 節を後ろへ移動
(defun move-node (nd)
  (delete-node nd)
  (push-back nd))

;;; 葉を探す
(defun search-leaf ()
  (do ((n (aref *next* 0) (aref *next* n)))
      ((zerop n))
      (unless (node-child (get-node n))
        (return n))))

;;; 子を探す
(defun search-child (nd c)
  (do ((xs (node-child nd) (node-bros xs)))
      ((null xs))
      (when (= (node-sym xs) c)
        (return xs))))

;;; 子を削除する
(defun delete-child (nd)
  (let ((p (node-parent nd)))
    (if (eql (node-child p) nd)
        (setf (node-child p) (node-bros nd))
      (do ((xs (node-child p) (node-bros xs)))
          ((null xs))
          (when (eql (node-bros xs) nd)
            (setf (node-bros xs) (node-bros nd))
            (return))))))

;;; 子を挿入する
(defun insert-child (nd c)
  (cond
   ((< *dic-num* dic-size)
    (let ((new-nd (make-node :sym c :code *dic-num* :bros (node-child nd) :parent nd)))
      (push-back new-nd)
      (setf (node-child nd) new-nd
            (aref *dic-table* *dic-num*) new-nd)
      (incf *dic-num*)))
   (t
    ;; 葉を探す
    (let ((leaf (get-node (search-leaf))))
      (delete-child leaf)
      (setf (node-sym leaf) c
            (node-bros leaf) (node-child nd)
            (node-child leaf) nil
            (node-parent leaf) nd
            (node-child nd) leaf)
      (move-node leaf)))))

;;; 辞書番号のチェック
(defun check-code (n)
  (if (= *dic-num* dic-size)
      (/= (search-leaf) n)
    (< n *dic-num*)))

;;; LZT 符号の符号化
(defun encode (rc fin)
  (init-dic)
  (let ((gm (initialize-gamma-model dic-size))
        (p nil)
        (q nil))
    (setf p (get-node (read-byte fin nil)))
    (loop
     (let ((c (read-byte fin nil)))
       (unless c
         (gamma-encode rc gm (node-code p) :limit #x200 :inc 4)
         (return))
       (setf q (search-child p c))
       (cond
        ((null q)
         (gamma-encode rc gm (node-code p) :limit #x200 :inc 4)
         (insert-child p c)
         (setf p (get-node c)))
        (t
         (move-node q)
         (setf p q)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-byte-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (write-file-size fout size)
          (when (plusp size)
            (call-with-range-encoder
             fout
             (lambda (rc)
               (encode rc fin))))))))))

;;; 記号列の出力
(defun output (nd fout)
  (cond
   ((null (node-parent nd))
    (write-byte (node-sym nd) fout)
    (values (node-sym nd) 1))
   (t
    (multiple-value-bind
     (m i)
     (output (node-parent nd) fout)
     (write-byte (node-sym nd) fout)
     (move-node nd)
     (values m (1+ i))))))

;;; LZT 符号の復号
(defun decode (rc fout size)
  (init-dic)
  (let ((gm (initialize-gamma-model dic-size))
        (p nil) (q nil)
        (c nil) (i nil))
    ;; 最初の記号を読み込む
    (setf p (gamma-decode rc gm :limit #x200 :inc 4))
    (multiple-value-setq
     (c i)
     (output (get-node p) fout))
    (decf size i)
    (do ()
        ((zerop size))
        (setf q (gamma-decode rc gm :limit #x200 :inc 4))
        (cond
         ((check-code q)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))
          (insert-child (get-node p) c))
         (t
          (insert-child (get-node p) c)
          (multiple-value-setq
           (c i)
           (output (get-node q) fout))))
        (setf p q)
        (decf size i))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-byte-input-file
   in-file
   (lambda (fin)
     (let ((size (read-file-size fin)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (call-with-range-decoder
             fin
             (lambda (rc)
               (decode rc fout size))))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

Copyright (C) 2023 Makoto Hiroi
All rights reserved.

[ Common Lisp | library ]