Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement: External/GGUF #84

Merged
merged 7 commits into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion caten.asd
Original file line number Diff line number Diff line change
@@ -1,12 +1,39 @@
(asdf:defsystem "caten"
:description "Programmable Deep Learning Framework"
:author "hikettei <[email protected]>"
:version "0.0"
:licence "MIT"
:depends-on
("caten.apis" "caten.nn" "caten.test-suite")
:serial t
:components ((:file "source/caten-user"))
:in-order-to
((test-op
(asdf:test-op "caten.apis")
(asdf:test-op "caten.nn")
(asdf:test-op "caten.test-suite"))))
(asdf:test-op "caten.test-suite"))))

;; External system for Caten.
;; Systems including non-portable dependencies (e.g.: CUDA, Metal) or systems cannot be guaranteed to be maintained, are separated from caten.
(asdf:defsystem "caten/metal"
:description "Metal extension for Caten"
:author "hikettei <[email protected]>"
:licence "MIT"
:depends-on ("cl-metal" "caten.apis")
:serial t
:components
((:file "external/backends/metal")))

(asdf:defsystem "caten/gguf"
:description "[gguf](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) format translator."
:author "hikettei <[email protected]>"
:depends-on ("caten.apis" "babel" "fast-io")
:serial t
:pathname "external/gguf"
:components
((:file "package")
(:file "helpers")
(:file "metadata")
(:file "tensor-info")
(:file "gguf-file")
(:file "dequantize")))
8 changes: 0 additions & 8 deletions external/backends/metal.asd

This file was deleted.

4 changes: 2 additions & 2 deletions external/backends/metal.lisp
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
(defpackage :caten/external.backends.metal
(defpackage :caten/metal
(:use :cl :caten/ajit :caten/air :caten/avm :cffi :cl-metal)
(:import-from
:caten/common.dtype
#:dtype/cast)
(:export
#:Metal))

(in-package :caten/external.backends.metal)
(in-package :caten/metal)

(defclass Metal (Device)
((device-id :initform 0 :initarg :id :accessor metal-device-id)))
Expand Down
38 changes: 38 additions & 0 deletions external/gguf/dequantize.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
(in-package :caten/gguf)

;; [TODO] Implement QOPs in external/qnn
;; [TODO] Optimize array manipulation by implementing Lisp JIT
;; Currently, caten/gguf dequantizes the weight and computations are going under float.
;; https://github.com/ggerganov/ggml/blob/fca1caafea7de9fbd7efc733b9818f9cf2da3050/src/ggml-quants.c
;; https://github.com/99991/pygguf/blob/main/gguf.py
(defgeneric dequantize (type-id aligned-buffer tensor-info) (:documentation "Return: Dequantized Buffer"))

;;(defun buffer-sizeof (tensor-info block-size elements-per-block)
;; (declare (optimize (speed 3)) (type fixnum block-size elements-per-block))
;; (floor (the fixnum (/ (the fixnum (* block-size (the fixnum (apply #'* (tensor-info-dimensions tensor-info))))) elements-per-block))))

(defmethod dequantize ((type-id (eql :f32)) aligned-buffer tensor-info)
(declare (optimize (speed 3)))
(let* ((size (apply #'* (tensor-info-dimensions tensor-info)))
(out (make-array size :element-type 'single-float)))
;; [TODO] Optimize this by rendering C kernel
(dotimes (i size)
(setf (aref out i)
(let ((val (readf32-le aligned-buffer)))
(if (eql val :not-a-number)
0.0
val))))
out))

(defmethod dequantize ((type-id (eql :f16)) aligned-buffer tensor-info)
(declare (optimize (speed 3)))
(let* ((size (apply #'* (tensor-info-dimensions tensor-info)))
(out (make-array size :element-type 'single-float)))
;; [TODO] Optimize this by rendering C kernel
(dotimes (i size)
(setf (aref out i)
(let ((val (readf16-le aligned-buffer)))
(if (eql val :not-a-number)
0.0
val))))
out))
61 changes: 61 additions & 0 deletions external/gguf/gguf-file.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
(in-package :caten/gguf)
;; [TODO] The fastest gguf parser!

;; Corresponds to https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
(defclass GGUF ()
((version :type fixnum :initarg :version :accessor gguf-version)
(tensor-count :type (unsigned-byte 64) :initarg :tensor-count :accessor gguf-tensor-count)
(metadata-kv-count :type (unsigned-byte 64) :initarg :metadata-kv-count :accessor gguf-metadata-kv-count)
(metadata :initarg :metadata :accessor gguf-metadata)
(tensor-info :initarg :tensor-info :accessor gguf-tensor-info)))

(defmethod print-object ((gguf gguf) stream)
(with-slots ((version version) (tensor-info tensor-info) (metadata metadata)) gguf
(format stream "<GGUF
version=~a
metadata: ~a datum
tensor-info: ~a tensors
>"
version (length metadata) (length tensor-info))))

(defun parse-header (buffer)
(declare (type input-buffer buffer))
(multiple-value-bind (g1 g2 u f)
(values
(fast-read-byte buffer)
(fast-read-byte buffer)
(fast-read-byte buffer)
(fast-read-byte buffer))
(let ((header (make-array 4 :element-type '(unsigned-byte 8) :initial-contents (list g1 g2 u f))))
(babel:octets-to-string header :encoding :utf-8))))

(defun make-gguf (stream)
"GGUF File Structure:
https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
[Magic Number (4 Byte)] | [GGUF Version (4 Byte)] | [Tensor_Count (8 Byte)] | [Metadata_KV_Count (8 Byte)] | [Rest_data]"
(with-fast-input (buffer nil stream)
(multiple-value-bind (header version tensor-count metadata-kv-count)
(values
(parse-header buffer)
(readu32-le buffer)
(readu64-le buffer)
(readu64-le buffer))
(declare (type string header) (type (unsigned-byte 64) version tensor-count))
(assert (string= header "GGUF") () "Expecting the header to be GGUF, but got ~a.~%The given stream is not a gguf format.~%~a" header stream)
(let* ((metadata (parse-metadata-kv buffer metadata-kv-count))
(alignment (find "general.alignment" metadata :key #'metadata-key :test #'equal))
(alignment (if alignment (metadata-value alignment) 32))
(tensors (parse-tensor-info buffer tensor-count alignment stream))
(gguf (make-instance 'gguf :version version :tensor-count tensor-count :metadata-kv-count metadata-kv-count
:metadata metadata :tensor-info tensors)))
(assert (= metadata-kv-count (length metadata)) () "The number of parsed metadatas is invaild. Parsed ~a, but expected ~a" (length metadata) metadata-kv-count)
(assert (= tensor-count (length tensors)) () "The number of parsed tensor-info is invaild. Parsed ~a, but expected ~a" (length tensors) tensor-count)
;; Processing [Rest_Data]
;; Rest_Data is consisted of two parts:
;; [tensor_info] | [rest_of_the_file]
gguf))))

(defun load-gguf (pathname)
"Creates GGUF from pahtname"
(with-open-file (stream pathname :element-type '(unsigned-byte 8))
(make-gguf stream)))
15 changes: 15 additions & 0 deletions external/gguf/helpers.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(in-package :caten/gguf)

(defparameter *print-object-omit-threshold* 200)
(defun gguf-string (buffer)
(declare (type input-buffer buffer) (optimize (speed 3)))
(let* ((len (readu64-le buffer))
(str (make-array len :element-type '(unsigned-byte 8)
:initial-contents
(loop repeat len collect (fast-read-byte buffer)))))
(values (the string (babel:octets-to-string str :encoding :utf-8)) len)))

(declaim (inline readf16-le readf32-le readf64-le caten/common.dtype:decode-float32 caten/common.dtype:decode-float64))
(defun readf16-le (buffer) (caten/common.dtype:decode-float16 (readu16-le buffer)))
(defun readf32-le (buffer) (caten/common.dtype:decode-float32 (readu32-le buffer)))
(defun readf64-le (buffer) (caten/common.dtype:decode-float64 (readu64-le buffer)))
82 changes: 82 additions & 0 deletions external/gguf/metadata.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
(in-package :caten/gguf)

(defun gguf-metadata-value-type (indicator)
(ecase indicator
(0 :uint8)
(1 :int8)
(2 :uint16)
(3 :int16)
(4 :uint32)
(5 :int32)
(6 :float32)
(7 :bool)
(8 :string)
(9 :array)
(10 :uint64)
(11 :int16)
(12 :float64)))

(Defun value-type->lisp-type (dtype)
(case dtype
(:array 'simple-array)
(:string 'string)
(otherwise (caten/common.dtype:dtype->lisp dtype))))

(defun gguf-metadata-value (buffer dtype)
(declare (type keyword dtype) (type input-buffer buffer))
(ecase dtype
(:uint8 (readu8-le buffer))
(:int8 (read8-le buffer))
(:uint16 (readu16-le buffer))
(:int16 (read16-le buffer))
(:uint32 (readu32-le buffer))
(:int32 (read32-le buffer))
(:uint64 (readu64-le buffer))
(:int64 (read64-le buffer))
(:float32 (readf32-le buffer))
(:float64 (readf64-le buffer))
(:bool
(let ((val (readu8-le buffer)))
(case val
(0 nil)
(1 t)
(otherwise (error "gguf-metadata-value: Invaild bool value ~a" val)))))
(:string (gguf-string buffer))
(:array
(let* ((value-type (gguf-metadata-value-type (readu32-le buffer)))
(len (readu64-le buffer))
(array (loop repeat len collect len collect (gguf-metadata-value buffer value-type))))
(make-array (length array) :initial-contents array)))))

(defstruct (Metadata
(:constructor make-metadata (key value-type value)))
(key key :type string)
(value-type value-type :type keyword)
(value value :type (or number boolean simple-array)))

(defmethod print-object ((metadata Metadata) stream)
(let* ((obj (format nil "~a" (metadata-value metadata)))
(obj (if (>= (length obj) *print-object-omit-threshold*)
(format nil "~a..." (subseq obj 0 *print-object-omit-threshold*))
obj))
(type (metadata-value-type metadata)))
(format stream "<Metadata[~a] ~a -> ~a>"
(case type
(:array
(assert (arrayp (metadata-value metadata)))
(format nil "Array{~a x ~a}" (array-element-type (metadata-value metadata)) (array-total-size (metadata-value metadata))))
(otherwise type))
(metadata-key metadata) obj)))

(defun make-gguf-metadata (buffer)
(declare (type input-buffer buffer))
(multiple-value-bind (key value-type)
(values
(gguf-string buffer)
(gguf-metadata-value-type (readu32-le buffer)))
(make-metadata key value-type (gguf-metadata-value buffer value-type))))

(defun parse-metadata-kv (buffer metadata-kv-count)
(declare (type input-buffer buffer) (type fixnum metadata-kv-count))
(loop repeat metadata-kv-count
collect (make-gguf-metadata buffer)))
25 changes: 25 additions & 0 deletions external/gguf/package.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
(defpackage :caten/gguf
(:documentation "[GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) to Caten Translator.")
(:use :cl :fast-io)
(:export
#:load-gguf
#:make-gguf

#:GGUF
#:gguf-version
#:gguf-tensor-count
#:gguf-metadata-kv-count
#:metadata
#:metadata-key
#:metadata-value-type
#:metadata-value
#:tensor-info
#:tensor-info-name
#:tensor-info-n-dimension
#:tensor-info-dimensions
#:tensor-info-ggml-type
#:tensor-info-relative-offset
#:tensor-info-absolute-offset
#:tensor-info-buffer))

(in-package :caten/gguf)
88 changes: 88 additions & 0 deletions external/gguf/tensor-info.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
(in-package :caten/gguf)

(defun ggml-type (indicator)
(ecase indicator
(0 :F32)
(1 :F16)
(2 :Q4_0)
(3 :Q4_1)
;; 4 support has been removed
;; 5 support has been removed
(6 :Q5_0)
(7 :Q5_1)
(8 :Q8_0)
(9 :Q8_1)
(10 :Q2_K)
(11 :Q2_K)
(12 :Q4_K)
(13 :Q5_K)
(14 :Q6_K)
(15 :Q8_K)
(16 :IQ2_XXS)
(17 :IQ2_XS)
(18 :IQ3_XXS)
(19 :IQ1_S)
(20 :IQ4_NL)
(21 :IQ3_S)
(22 :IQ2_S)
(23 :IQ4_XS)
(24 :I8)
(25 :I16)
(26 :I32)
(27 :I64)
(28 :F64)
(29 :IQ1_M)))

(defstruct (Tensor-Info
(:constructor make-tensor-info (name n-dimension dimensions tensor-type offset)))
(name name :type string)
(n-dimension n-dimension :type fixnum)
(dimensions dimensions :type list)
(ggml-type tensor-type :type keyword)
(relative-offset offset :type fixnum)
(absolute-offset 0 :type fixnum)
(buffer))

(defmethod print-object ((tensor Tensor-info) stream)
(let* ((obj (format nil "~a" (tensor-info-buffer tensor)))
(obj (if (>= (length obj) *print-object-omit-threshold*)
(subseq obj 0 *print-object-omit-threshold*)
obj)))
(format stream "<Tensor-Info{name=~a, ggml-type=:~a, dimensions=~a}~% ~a~%>"
(tensor-info-name tensor)
(tensor-info-ggml-type tensor)
(tensor-info-dimensions tensor)
(if (null (tensor-info-buffer tensor))
(format nil "[Not realized, relative_offset=~a, absolute_offset=~a]"
(tensor-info-relative-offset tensor) (tensor-info-absolute-offset tensor))
(format nil "~a" obj)))))

(defmethod tensor-info-realize ((tensor Tensor-info) buffer stream)
(declare (type input-buffer buffer) (optimize (speed 3)))
(assert (null (tensor-info-buffer tensor)) () "The given tensor-info is already realized. ~a" tensor)
(with-fast-input (rest-of-the-file nil stream (tensor-info-absolute-offset tensor))
(setf (tensor-info-buffer tensor) (dequantize (tensor-info-ggml-type tensor) rest-of-the-file tensor))
tensor))

(defun gguf-tensor-info-parse (buffer)
(declare (type input-buffer buffer))
(let* ((name (gguf-string buffer))
(n-dimension (readu32-le buffer))
(dimensions (loop repeat n-dimension collect (readu64-le buffer)))
(tensor-dtype (ggml-type (readu32-le buffer)))
(offset (readu64-le buffer)))
(make-tensor-info name n-dimension dimensions tensor-dtype offset)))

(defun parse-tensor-info (buffer tensor-count alignment stream)
(declare (type input-buffer buffer) (type fixnum tensor-count))
(let ((tensors (loop repeat tensor-count collect (gguf-tensor-info-parse buffer)))
(start (buffer-position buffer)))
;; Inconveniently, the offset defined in gguf is relative to the end of header and is unaligned.
;; we need to compute the absolute file offset ourselves instead.
(loop for tensor in tensors
for rel = (tensor-info-relative-offset tensor)
for offset = (+ start rel) do
(incf offset (mod (- alignment (mod offset alignment)) alignment))
(setf (tensor-info-absolute-offset tensor) offset))
(flet ((r (tensor-info) (tensor-info-realize tensor-info buffer stream)))
(map 'list #'r tensors))))
4 changes: 4 additions & 0 deletions source/caten-user.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(defpackage :caten-user
(:documentation "REPL Playground for Caten")
(:use :cl :caten :caten/aasm :caten/air :caten/ajit :caten/nn :caten/air))
(in-package :caten-user)
Loading