M.Hiroi's Home Page

Common Lisp Programming

お気楽 Common Lisp プログラミング入門 : 自作ライブラリ編

サンプルプログラム

[ Common Lisp | library ]

LZ77 符号

LZ 符号は Jacob Ziv 氏と Abraham Lempel 氏によって開発された「適応型辞書法」によるデータ圧縮アルゴリズムです。LZ 符号には多数のバリエーションが存在しますが、「LZ77 符号」と「LZ78 符号」の 2 つに大別されます。LZ77 符号は 1977 年に、LZ78 符号は 1978 年に発表されました。

両者の符号は互いに関係があるものの、辞書の作成方法はまったく異なっているので、混同しないように注意してください。LZ77 符号は「スライド辞書法」、 LZ78 符号は「動的辞書法」と呼ばれています。LZ77 符号の詳しい説明は以下に示す拙作のページをお読みください。

●LZSS 符号

;;;
;;; lzss.py : LZSS coding
;;;
;;; Copyright (C) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(use-package :bitio)

;;; 定数
(defconstant min-len 3)
(defconstant max-len 18)
(defconstant len-bits 4)
(defconstant pos-bits 13)
(defconstant slide-size (ash 1 pos-bits))
(defconstant slide-size2 (* slide-size 2))
(defconstant slide-limit (+ slide-size2 max-len))

;;; スライド窓
(defstruct slide-window stream buff ht data-size)

(defun initialize-slide-window (s)
  (let* ((buff (make-array slide-limit :element-type 'unsigned-byte))
         (data-size (read-sequence buff s)))
    (make-slide-window
     :stream s
     :buff buff
     :ht (make-hash-table)
     :data-size data-size)))

;;; ハッシュ値を求める
(defun hash-value (sw rp)
  (do ((buff (slide-window-buff sw))
       (value 0)
       (i 0 (1+ i)))
      ((>= i min-len) value)
      (setf value
            (+ (* value 256) (aref buff (+ rp i))))))

;;; データ入力
(defun read-data (sw start size)
  (read-sequence
   (slide-window-buff sw)
   (slide-window-stream sw)
   :start start
   :end (+ start size)))

;;; データ移動
(defun move-data (sw to from size)
  (let ((buff (slide-window-buff sw)))
    (dotimes (n size)
      (setf (aref buff (+ to n)) (aref buff (+ from n))))))

;;; スライド窓の更新
(defun update-slide (sw rp)
  (cond
   ((< (slide-window-data-size sw) slide-limit)
    ;; EOF
    rp)
   (t
    ;; buffer update
    (move-data sw 0 slide-size (+ slide-size max-len))
    (setf (slide-window-data-size sw)
          (read-data sw (+ slide-size max-len) slide-size))
    ;; hash update
    (maphash
     (lambda (k v)
       (if (< (car v) slide-size)
           (remhash k (slide-window-ht sw))
         (do ((xs v (cdr xs)))
             ((null xs))
             (decf (car xs) slide-size)
             (when (and (consp (cdr xs))
                        (< (cadr xs) slide-size))
               (setf (cdr xs) nil)))))
     (slide-window-ht sw))
    (- rp slide-size))))

;;; データの挿入
(defun insert-data (sw rp)
  (let ((value (hash-value sw rp))
        (ht (slide-window-ht sw)))
    (if (gethash value ht)
        (push rp (gethash value ht))
      (setf (gethash value ht) (list rp)))))

;;; 最長一致列の探索
(defun longest-match (buff s1 s2 &key (start 0) (limit max-len))
  (do ((i start (1+ i)))
      ((>= i limit) i)
      (unless (= (aref buff (+ s1 i)) (aref buff (+ s2 i)))
        (return i))))

(defun search-data (sw rp)
  (let ((buff (slide-window-buff sw))
        (value (hash-value sw rp))
        (low-limit (- rp slide-size))
        (match-len 0)
        (match-pos 0))
    (do ((xs (gethash value (slide-window-ht sw)) (cdr xs)))
        ((or (null xs)
             (< (car xs) low-limit)))
        (when (and (< (+ rp (max min-len match-len)) (slide-window-data-size sw))
                   (= (aref buff (+ rp match-len))
                      (aref buff (+ (car xs) match-len)))
          (let ((n (longest-match buff rp (car xs) :start 3)))
            (when (< match-len n)
              (setf match-len n
                    match-pos (car xs))
              (when (= n max-len)
                (return)))))))
    ;; データの終端をチェック
    (when (>= match-len (- (slide-window-data-size sw) rp))
      (setf match-len (- (slide-window-data-size sw) rp)))
    (values match-len match-pos)))

;;; LZSS 符号 : 符号化
(defun encode (fin fout)
  (do ((sw (initialize-slide-window fin))
       (rp 0))
      ((>= rp (slide-window-data-size sw)))
      (multiple-value-bind
       (match-len match-pos)
       (search-data sw rp)
       (cond
        ((< match-len min-len)
         (setf match-len 1)
         (putbit fout 0)
         (putbits fout 8 (aref (slide-window-buff sw) rp)))
        (t
         (putbit fout 1)
         (putbits fout len-bits (- match-len min-len))
         (putbits fout pos-bits (- rp match-pos 1))))
       ;;
       (do ((n match-len (1- n)))
           ((zerop n))
           (insert-data sw rp)
           (incf rp)
           (when (>= rp slide-size2)
             (setf rp (update-slide sw rp)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; LZSS 符号 : 復号
(defun decode (fin fout size)
  (do ((buff (make-array (ash 1 pos-bits) :element-type 'unsigned-byte))
       (rp 0))
      ((zerop size))
      (cond
       ((= (getbit fin) 1)
        (let ((num (+ (getbits fin len-bits) min-len))
              (pos (- rp (1+ (getbits fin pos-bits)))))
          (when (minusp pos)
            (incf pos (length buff)))
          (do ((num num (1- num)))
              ((zerop num))
              (let ((c (aref buff pos)))
                (write-byte c fout)
                (setf (aref buff rp) c)
                (incf pos)
                (incf rp)
                (when (>= pos (length buff))
                  (setf pos 0))
                (when (>= rp (length buff))
                  (setf rp 0))))
          (decf size num)))
       (t
        (let ((c (getbits fin 8)))
          (write-byte c fout)
          (setf (aref buff rp) c)
          (incf rp)
          (when (>= rp (length buff))
            (setf rp 0)))
        (decf size)))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZB 符号

;;;
;;; lzb.py : LZB coding
;;;
;;; Copyright (C) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :bitio)
(use-package :bitio)

;;; 定数
(defconstant min-len 3)
(defconstant max-len 256)
(defconstant pos-bits 16)                   ; 13 (8k), 15 (32k), 16 (64k)
(defconstant slide-size (ash 1 pos-bits))
(defconstant slide-size2 (* slide-size 2))
(defconstant slide-limit (+ slide-size2 max-len))

;;;
;;; 距離の符号化 (CBT 符号)
;;;
(defvar *pos-bits* 0)

;;; 更新
(defun update-pos-bits (rp)
  (when (< *pos-bits* pos-bits)
    (do ()
        ((<= rp (1- (ash 1 *pos-bits*))))
        (incf *pos-bits*))))

;;; 符号化
(defun pos-encode (fout rp pos)
  (if (< 1 *pos-bits* pos-bits)
      (cbt-encode fout pos rp *pos-bits*)
    (putbits fout pos-bits pos)))

;;; 復号
(defun pos-decode (fin rp)
  (if (< 1 *pos-bits* pos-bits)
      (cbt-decode fin rp *pos-bits*)
    (getbits fin pos-bits)))

;;; スライド窓
(defstruct slide-window stream buff ht data-size)

(defun initialize-slide-window (s)
  (let* ((buff (make-array slide-limit :element-type 'unsigned-byte))
         (data-size (read-sequence buff s)))
    (make-slide-window
     :stream s
     :buff buff
     :ht (make-hash-table)
     :data-size data-size)))

;;; ハッシュ値を求める
(defun hash-value (sw rp)
  (do ((buff (slide-window-buff sw))
       (value 0)
       (i 0 (1+ i)))
      ((>= i min-len) value)
      (setf value
            (+ (* value 256) (aref buff (+ rp i))))))

;;; データ入力
(defun read-data (sw start size)
  (read-sequence
   (slide-window-buff sw)
   (slide-window-stream sw)
   :start start
   :end (+ start size)))

;;; データ移動
(defun move-data (sw to from size)
  (let ((buff (slide-window-buff sw)))
    (dotimes (n size)
      (setf (aref buff (+ to n)) (aref buff (+ from n))))))

;;; スライド窓の更新
(defun update-slide (sw rp)
  (cond
   ((< (slide-window-data-size sw) slide-limit)
    ;; EOF
    rp)
   (t
    ;; buffer update
    (move-data sw 0 slide-size (+ slide-size max-len))
    (setf (slide-window-data-size sw)
          (read-data sw (+ slide-size max-len) slide-size))
    ;; hash update
    (maphash
     (lambda (k v)
       (if (< (car v) slide-size)
           (remhash k (slide-window-ht sw))
         (do ((xs v (cdr xs)))
             ((null xs))
             (decf (car xs) slide-size)
             (when (and (consp (cdr xs))
                        (< (cadr xs) slide-size))
               (setf (cdr xs) nil)))))
     (slide-window-ht sw))
    (- rp slide-size))))

;;; データの挿入
(defun insert-data (sw rp)
  (let ((value (hash-value sw rp))
        (ht (slide-window-ht sw)))
    (if (gethash value ht)
        (push rp (gethash value ht))
      (setf (gethash value ht) (list rp)))))

;;; 最長一致列の探索
(defun longest-match (buff s1 s2 &key (start 0) (limit max-len))
  (do ((i start (1+ i)))
      ((>= i limit) i)
      (unless (= (aref buff (+ s1 i)) (aref buff (+ s2 i)))
        (return i))))

(defun search-data (sw rp)
  (let ((buff (slide-window-buff sw))
        (value (hash-value sw rp))
        (low-limit (- rp slide-size))
        (match-len 0)
        (match-pos 0))
    (do ((xs (gethash value (slide-window-ht sw)) (cdr xs)))
        ((or (null xs)
             (< (car xs) low-limit)))
        (when (and (< (+ rp (max min-len match-len)) (slide-window-data-size sw))
                   (= (aref buff (+ rp match-len))
                      (aref buff (+ (car xs) match-len)))
          (let ((n (longest-match buff rp (car xs) :start 3)))
            (when (< match-len n)
              (setf match-len n
                    match-pos (car xs))
              (when (= n max-len)
                (return)))))))
    ;; データの終端をチェック
    (when (>= match-len (- (slide-window-data-size sw) rp))
      (setf match-len (- (slide-window-data-size sw) rp)))
    (values match-len match-pos)))

;;; LZSS 符号 : 符号化
(defun encode (fin fout)
  (do ((sw (initialize-slide-window fin))
       (rp 0))
      ((>= rp (slide-window-data-size sw)))
      (multiple-value-bind
       (match-len match-pos)
       (search-data sw rp)
       (cond
        ((< match-len min-len)
         (setf match-len 1)
         (putbit fout 0)
         (putbits fout 8 (aref (slide-window-buff sw) rp)))
        (t
         (putbit fout 1)
         (gamma-encode fout (- match-len min-len))
         (pos-encode fout rp (- rp match-pos 1))))
       ;;
       (do ((n match-len (1- n)))
           ((zerop n))
           (insert-data sw rp)
           (incf rp)
           (when (>= rp slide-size2)
             (setf rp (update-slide sw rp))))
       (update-pos-bits rp))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (setf *pos-bits* 0)
  (call-with-bit-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (putbits fout 32 size)
          (when (plusp size)
            (encode fin fout))))))))

;;; LZSS 符号 : 復号
(defun decode (fin fout size)
  (do ((buff (make-array (ash 1 pos-bits) :element-type 'unsigned-byte))
       (rp 0))
      ((zerop size))
      (cond
       ((= (getbit fin) 1)
        (let ((num (+ (gamma-decode fin) min-len))
              (pos (- rp (1+ (pos-decode fin rp)))))
          (when (minusp pos)
            (incf pos (length buff)))
          (do ((num num (1- num)))
              ((zerop num))
              (let ((c (aref buff pos)))
                (write-byte c fout)
                (setf (aref buff rp) c)
                (incf pos)
                (incf rp)
                (when (>= pos (length buff))
                  (setf pos 0))
                (when (>= rp (length buff))
                  (setf rp 0))))
          (decf size num)))
       (t
        (let ((c (getbits fin 8)))
          (write-byte c fout)
          (setf (aref buff rp) c)
          (incf rp)
          (when (>= rp (length buff))
            (setf rp 0)))
        (decf size)))
      (update-pos-bits rp)))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (setf *pos-bits* 0)
  (call-with-bit-input-file
   in-file
   (lambda (fin)
     (let ((size (getbits fin 32)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (decode fin fout size))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

●LZRC 符号

;;;
;;; lzrc.py : LZSS + Binary Range Coder
;;;
;;; Copyright (C) 2023 Makoto Hiroi
;;;
;;; Released under the MIT license
;;; https://opensource.org/license/mit/
;;;
(require :brc)
(use-package '(:brc :rangecoder))

;;; 定数
(defconstant min-len 4)
(defconstant max-len 256)
(defconstant pos-bits 16)                   ; 13 (8k), 15 (32k), 16 (64k)
(defconstant slide-size (ash 1 pos-bits))
(defconstant slide-size2 (* slide-size 2))
(defconstant slide-limit (+ slide-size2 max-len))

;;; ファイルサイズの書き込み
(defun write-file-size (out size)
  (write-byte (logand (ash size -24) #xff) out)
  (write-byte (logand (ash size -16) #xff) out)
  (write-byte (logand (ash size -8) #xff) out)
  (write-byte (logand size #xff) out))

;;; ファイルサイズの読み込み
(defun read-file-size (in)
  (+ (ash (read-byte in) 24)
     (ash (read-byte in) 16)
     (ash (read-byte in) 8)
     (read-byte in)))

;;; スライド窓
(defstruct slide-window stream buff ht data-size)

(defun initialize-slide-window (s)
  (let* ((buff (make-array slide-limit :element-type 'unsigned-byte))
         (data-size (read-sequence buff s)))
    (make-slide-window
     :stream s
     :buff buff
     :ht (make-hash-table)
     :data-size data-size)))

;;; ハッシュ値を求める
(defun hash-value (sw rp)
  (do ((buff (slide-window-buff sw))
       (value 0)
       (i 0 (1+ i)))
      ((>= i min-len) value)
      (setf value
            (+ (* value 256) (aref buff (+ rp i))))))

;;; データ入力
(defun read-data (sw start size)
  (read-sequence
   (slide-window-buff sw)
   (slide-window-stream sw)
   :start start
   :end (+ start size)))

;;; データ移動
(defun move-data (sw to from size)
  (let ((buff (slide-window-buff sw)))
    (dotimes (n size)
      (setf (aref buff (+ to n)) (aref buff (+ from n))))))

;;; スライド窓の更新
(defun update-slide (sw rp)
  (cond
   ((< (slide-window-data-size sw) slide-limit)
    ;; EOF
    rp)
   (t
    ;; buffer update
    (move-data sw 0 slide-size (+ slide-size max-len))
    (setf (slide-window-data-size sw)
          (read-data sw (+ slide-size max-len) slide-size))
    ;; hash update
    (maphash
     (lambda (k v)
       (if (< (car v) slide-size)
           (remhash k (slide-window-ht sw))
         (do ((xs v (cdr xs)))
             ((null xs))
             (decf (car xs) slide-size)
             (when (and (consp (cdr xs))
                        (< (cadr xs) slide-size))
               (setf (cdr xs) nil)))))
     (slide-window-ht sw))
    (- rp slide-size))))

;;; データの挿入
(defun insert-data (sw rp)
  (let ((value (hash-value sw rp))
        (ht (slide-window-ht sw)))
    (if (gethash value ht)
        (push rp (gethash value ht))
      (setf (gethash value ht) (list rp)))))

;;; 最長一致列の探索
(defun longest-match (buff s1 s2 &key (start 0) (limit max-len))
  (do ((i start (1+ i)))
      ((>= i limit) i)
      (unless (= (aref buff (+ s1 i)) (aref buff (+ s2 i)))
        (return i))))

(defun search-data (sw rp)
  (let ((buff (slide-window-buff sw))
        (value (hash-value sw rp))
        (low-limit (- rp slide-size))
        (match-len 0)
        (match-pos 0))
    (do ((xs (gethash value (slide-window-ht sw)) (cdr xs)))
        ((or (null xs)
             (< (car xs) low-limit)))
        (when (and (< (+ rp (max min-len match-len)) (slide-window-data-size sw))
                   (= (aref buff (+ rp match-len))
                      (aref buff (+ (car xs) match-len)))
          (let ((n (longest-match buff rp (car xs) :start 3)))
            (when (< match-len n)
              (setf match-len n
                    match-pos (car xs))
              (when (= n max-len)
                (return)))))))
    ;; データの終端をチェック
    (when (>= match-len (- (slide-window-data-size sw) rp))
      (setf match-len (- (slide-window-data-size sw) rp)))
    (values match-len match-pos)))

;;; LZRC 符号 : 符号化
(defun encode (rc fin)
  (do ((freq-flag (initialize-alpha-model 2))
       (freq-code (initialize-binary-model 256))
       (freq-len  (initialize-gamma-model (1+ (- max-len min-len))))
       (freq-pos  (initialize-gamma-model slide-size))
       (sw (initialize-slide-window fin))
       (rp 0))
      ((>= rp (slide-window-data-size sw)))
      (multiple-value-bind
       (match-len match-pos)
       (search-data sw rp)
       (cond
        ((< match-len min-len)
         (setf match-len 1)
         (alpha-encode rc freq-flag 0)
         (bm-encode rc freq-code (aref (slide-window-buff sw) rp)))
        (t
         (alpha-encode rc freq-flag 1)
         (gamma-encode rc freq-len (- match-len min-len))
         (gamma-encode rc freq-pos (- rp match-pos 1))))
       ;;
       (do ((n match-len (1- n)))
           ((zerop n))
           (insert-data sw rp)
           (incf rp)
           (when (>= rp slide-size2)
             (setf rp (update-slide sw rp)))))))

;;; ファイルの符号化
(defun encode-file (in-file out-file)
  (call-with-byte-output-file
   out-file
   (lambda (fout)
     (call-with-byte-input-file
      in-file
      (lambda (fin)
        (let ((size (file-length fin)))
          (write-file-size fout size)
          (when (plusp size)
            (call-with-range-encoder
             fout
             (lambda (rc)
               (encode rc fin))))))))))

;;; LZSS 符号 : 復号
(defun decode (rc fout size)
  (do ((freq-flag (initialize-alpha-model 2))
       (freq-code (initialize-binary-model 256))
       (freq-len  (initialize-gamma-model (1+ (- max-len min-len))))
       (freq-pos  (initialize-gamma-model slide-size))
       (buff (make-array (ash 1 pos-bits) :element-type 'unsigned-byte))
       (rp 0))
      ((zerop size))
      (cond
       ((= (alpha-decode rc freq-flag) 1)
        (let ((num (+ (gamma-decode rc freq-len) min-len))
              (pos (- rp (1+ (gamma-decode rc freq-pos)))))
          (when (minusp pos)
            (incf pos (length buff)))
          (do ((num num (1- num)))
              ((zerop num))
              (let ((c (aref buff pos)))
                (write-byte c fout)
                (setf (aref buff rp) c)
                (incf pos)
                (incf rp)
                (when (>= pos (length buff))
                  (setf pos 0))
                (when (>= rp (length buff))
                  (setf rp 0))))
          (decf size num)))
       (t
        (let ((c (bm-decode rc freq-code)))
          (write-byte c fout)
          (setf (aref buff rp) c)
          (incf rp)
          (when (>= rp (length buff))
            (setf rp 0)))
        (decf size)))))

;;; ファイルの復号
(defun decode-file (in-file out-file)
  (call-with-byte-input-file
   in-file
   (lambda (fin)
     (let ((size (read-file-size fin)))
       (call-with-byte-output-file
        out-file
        (lambda (fout)
          (when (plusp size)
            (call-with-range-decoder
             fin
             (lambda (rc)
               (decode rc fout size))))))))))

;;; 簡単なテスト
(defun test ()
  (let ((files '("alice29.txt" "asyoulik.txt" "cp.html" "fields.c" "grammar.lsp"
                 "kennedy.xls" "lcet10.txt" "plrabn12.txt" "ptt5" "sum" "xargs.1")))
    (format t "----- encode -----~%")
    (time
     (dolist (file files)
       (encode-file (format nil "./canterbury/~a" file)
                    (format nil "~a.en" file))))
    ;;
    (format t "----- decode -----~%")
    (time
     (dolist (file files)
       (decode-file (format nil "~a.en" file)
                    (format nil "~a.de" file))))))

Copyright (C) 2023 Makoto Hiroi
All rights reserved.

[ Common Lisp | library ]