From b003bfaefe5115b30590dda02ea58b040a0fcc96 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sat, 9 Nov 2024 23:19:49 +0900 Subject: [PATCH 01/25] update comment --- source/apis/iseq.lisp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/apis/iseq.lisp b/source/apis/iseq.lisp index 611989225..766182021 100644 --- a/source/apis/iseq.lisp +++ b/source/apis/iseq.lisp @@ -233,7 +233,7 @@ ;; (Forward Mode) First, Simplify the forward graph in :Module/:Func level (dolist (f external-simplifiers) (funcall f forward-graph)) ;; Second, lower an :module into a list of :func - (lower-all forward-graph) ;; lower-all is O(N) + (time (lower-all forward-graph)) ;; SLOW ;; (Backward Mode) First, create a reverse-mode backward tape from the sorted forward graph. ;; the tapes consequent after the allocation of prev-grad. (when (null no-grad) (setf iseq-bw (%make-graph-backward session iseq :iseq-bw iseq-bw))) @@ -268,7 +268,7 @@ (let ((merged-graph (->fast-graph merged-graph))) (lower-all merged-graph) ;; Function-level whole optimization - (dolist (f external-simplifiers) + (dolist (f external-simplifiers) ;; Slow but O(n) (funcall f merged-graph :debug-opt (= 1 (the fixnum (ctx:getenv :PROFILE_SIMPLIFIER))))) ;; verify and complete (verify-graph merged-graph) From 4a27c0e7df35f4e847f3612e5677c58e5a6111bb Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 14:12:00 +0900 Subject: [PATCH 02/25] uncomment --- source/apis/iseq.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/apis/iseq.lisp b/source/apis/iseq.lisp index 766182021..8ec51c6f6 100644 --- a/source/apis/iseq.lisp +++ b/source/apis/iseq.lisp @@ -233,7 +233,7 @@ ;; (Forward Mode) First, Simplify the forward graph in :Module/:Func level (dolist (f external-simplifiers) (funcall f forward-graph)) ;; Second, lower an :module into a list of :func - (time (lower-all forward-graph)) ;; SLOW + (lower-all forward-graph) ;; SLOW ;; (Backward Mode) First, create a reverse-mode backward tape from the sorted forward graph. ;; the tapes consequent after the allocation of prev-grad. (when (null no-grad) (setf iseq-bw (%make-graph-backward session iseq :iseq-bw iseq-bw))) From ee4ceba651850a83a50da171a2c649e54adbc9bc Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 14:24:13 +0900 Subject: [PATCH 03/25] optimization --- source/codegen/scheduler.lisp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 22c18daec..c2ff6d980 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -462,9 +462,9 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (setf (group-items self) (append (group-items p) (group-items self)) (group-reduce-dims self) (or (group-reduce-dims self) (group-reduce-dims p)))) self) - +;; DFA (defun recursive-create-groups (id graph &key (seen)) - (declare (type symbol id) (type graph graph) (type hash-table seen)) + (declare (type symbol id) (type graph graph) (type hash-table seen) (optimize (speed 3))) (when (gethash id seen) (return-from recursive-create-groups)) (setf (gethash id seen) t) (let* ((node (id->value graph id)) @@ -491,13 +491,13 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (let ((mergeable-p-list (loop for parent in parents for parent-return = (car parent) - for nth upfrom 0 + for nth fixnum upfrom 0 if parent-return collect (group-merge-p self graph node parent-return nth) else collect nil))) (assert (= (length mergeable-p-list) (length parents))) - (append + (nconc (list (merge-groups self (map 'list #'car parents) mergeable-p-list)) (loop for p in parents for m in mergeable-p-list From c5321ab3475904cfcb9b07dfc36b0871abcfe9e6 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 14:28:41 +0900 Subject: [PATCH 04/25] opt function --- source/codegen/scheduler.lisp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index c2ff6d980..0b51949fc 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -367,7 +367,10 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (return-from identify-view-type :shrink))) :reshape)) -(defmethod group-merge-p ((self Group) (graph Graph) (node Node) (parent-group Group) nth) +(defun group-merge-p (self graph node parent-group nth) + (declare (type group self) (type graph graph) (type node node) (type group parent-group) + (type fixnum nth) + (optimize (speed 3))) (symbol-macrolet ((->ok (progn (setf (group-reduce-dims self) (or (group-reduce-dims self) (group-reduce-dims parent-group))) @@ -382,7 +385,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (read-node (id->value graph read)) (read-view (car (nth nth (getattr node :_read_views)))) (read-type (group-get-type parent-group))) - (assert (<= (length (nth nth (getattr node :_read_views))) 1)) + (assert (<= (length (the list (nth nth (getattr node :_read_views)))) 1)) ;; Relations between group and parent-group: ;; ``` ;; group=parent | X[write_type]{write_iter} = f(...) @@ -397,6 +400,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f ->ng) (let ((r1 (group-rank self)) (r2 (group-rank parent-group))) + (declare (type fixnum r1 r2)) ;; r2 -> r1 (cond ((or (= r1 0) (= r2 0))->ok) @@ -426,6 +430,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f ->ok) (if (and read-view (some #'identity (getattr read-view :broadcast))) (let ((mask (getattr read-view :broadcast))) + (declare (type list mask)) (when (not (= (length mask) (max r1 r2))) (setf mask (map 'list #'fourth (buffer-views (if c read-type self-type))))) (when (not (= (length mask) (max r1 r2)))->ng) @@ -663,7 +668,7 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (defmethod graph-schedule ((graph Graph)) (let* ((seen (make-hash-table)) - (groups (apply #'append (map 'list #'(lambda (x) (recursive-create-groups x graph :seen seen)) (graph-outputs graph))))) + (groups (time (apply #'append (map 'list #'(lambda (x) (recursive-create-groups x graph :seen seen)) (graph-outputs graph)))))) (mapc #'verify-group groups) (when (>= (ctx:getenv :JIT_DEBUG) 4) (format t "[graph-schedule] Prescheduled ~a groups:~%" (length groups)) From df1fbd40fd275225024178bee8d06e77372194f0 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 14:34:29 +0900 Subject: [PATCH 05/25] optimize --- source/codegen/scheduler.lisp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 0b51949fc..2522af417 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -619,11 +619,12 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre base-graph))) (defun apply-move-after-reduction (schedule-graph) + (declare (type graph schedule-graph) (optimize (speed 3))) (labels ((%newtype (buffer) (caten/avm:make-buffer (buffer-nrank buffer) (loop for s in (buffer-shape buffer) - for nth upfrom 0 + for nth fixnum upfrom 0 for v = (nth nth (buffer-views buffer)) if (and (listp v) (fourth v)) ;; broadcasted collect 1 @@ -632,7 +633,7 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (buffer-stride buffer) (buffer-dtype buffer) (loop for s in (buffer-shape buffer) - for nth upfrom 0 + for nth fixnum upfrom 0 for v = (nth nth (buffer-views buffer)) if (and (listp v) (fourth v)) collect `(0 1 1 t) @@ -668,7 +669,7 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (defmethod graph-schedule ((graph Graph)) (let* ((seen (make-hash-table)) - (groups (time (apply #'append (map 'list #'(lambda (x) (recursive-create-groups x graph :seen seen)) (graph-outputs graph)))))) + (groups (apply #'append (map 'list #'(lambda (x) (recursive-create-groups x graph :seen seen)) (graph-outputs graph))))) (mapc #'verify-group groups) (when (>= (ctx:getenv :JIT_DEBUG) 4) (format t "[graph-schedule] Prescheduled ~a groups:~%" (length groups)) From 1102ca85fba3416c5844c2037deee7d8b49105d8 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 14:43:33 +0900 Subject: [PATCH 06/25] more comments --- source/codegen/scheduler.lisp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 2522af417..45a222571 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -467,7 +467,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (setf (group-items self) (append (group-items p) (group-items self)) (group-reduce-dims self) (or (group-reduce-dims self) (group-reduce-dims p)))) self) -;; DFA +;; depth first serach (defun recursive-create-groups (id graph &key (seen)) (declare (type symbol id) (type graph graph) (type hash-table seen) (optimize (speed 3))) (when (gethash id seen) (return-from recursive-create-groups)) @@ -515,7 +515,8 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (defun apply-schedule-item-fusor (f schedule-graph base-graph &aux (seen) (changed-p t)) (declare (optimize (speed 3)) (type function f) - (type graph schedule-graph base-graph) + (type fastgraph schedule-graph) + (type graph base-graph) (type list seen)) (labels ((parent-groups (self) (assert (node-p self)) @@ -543,6 +544,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (dolist (w (node-writes parent)) (remnode schedule-graph w)))) (mapc #'explore (node-reads self))))) + ;; This loop finishes in the constant time. (loop while changed-p do (setf changed-p nil seen nil) (mapc #'explore (graph-outputs schedule-graph))))) @@ -681,6 +683,7 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (setf (graph-outputs schedule) (graph-outputs graph)) (setf schedule (->fast-graph schedule)) ;; ~~ Rewriting Rules + Post Fusion ~~~~~ + ;; SLOW (apply-reduce+move-fusion schedule graph) (apply-serialize-reduction schedule graph) ;; (TODO: Only execute when MAXIMIZE_MEMORY_LOCALITY=1?) (apply-move-after-reduction schedule) From a31d349132d03edaf850f7539f12fd52a77878d9 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 15:14:11 +0900 Subject: [PATCH 07/25] add: can-split-p and cache id->users --- source/codegen/scheduler.lisp | 41 +++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 45a222571..289091731 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -512,9 +512,26 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f append p))))) ;; ~~~~~~ More Fusion Rules ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ;; [TODO] Rewrite them as a pattern matcher. -(defun apply-schedule-item-fusor (f schedule-graph base-graph &aux (seen) (changed-p t)) +(defun make-can-split-p (schedule-graph) (declare (optimize (speed 3)) - (type function f) + (type fastgraph schedule-graph)) + (let ((in-degrees (make-hash-table)) (out-degrees (make-hash-table))) + (declare (type hash-table in-degrees out-degrees)) + (flet ((butseen (list) + (loop for l in list + for v = (id->value schedule-graph l) + if (and v (symbolp l)) collect v))) + (loop for node in (graph-nodes schedule-graph) do + (setf (gethash (node-id node) in-degrees) (butseen (node-reads node))) + (dolist (r (gethash (node-id node) in-degrees)) + (when (null (find (node-id node) (the list (gethash (node-id r) out-degrees)) :key #'node-id)) + (push node (gethash (node-id r) out-degrees)))))) + (flet ((node->users (node) (<= (length (the list (gethash (node-id node) out-degrees))) 1))) + #'node->users))) + +(defun apply-schedule-item-fusor (f can-split-p schedule-graph base-graph &aux (seen) (changed-p t)) + (declare (optimize (speed 3)) + (type function f can-split-p) (type fastgraph schedule-graph) (type graph base-graph) (type list seen)) @@ -524,8 +541,6 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f for val = (and (symbolp r) (id->value schedule-graph r)) ;; Only :jitable scheduleitems are merged if val collect val)) - (can-split-p (id) - (<= (length (the list (id->users schedule-graph id))) 1)) (explore (id) (let* ((self (id->value schedule-graph id)) (_ (when (or (null self) (find (node-id self) seen)) (return-from explore))) @@ -536,7 +551,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (loop for parent in candidates if (and self-mergeable-p parent (getattr parent :jitable) - (every #'can-split-p (node-writes parent)) + (funcall can-split-p parent) ;; confirm that parent is not used by other nodes except for self (funcall f self parent)) do (let ((merged (merge-schedule-items self parent base-graph))) (setf changed-p t) @@ -549,7 +564,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (setf changed-p nil seen nil) (mapc #'explore (graph-outputs schedule-graph))))) -(defun apply-reduce+move-fusion (schedule-graph base-graph) +(defun apply-reduce+move-fusion (schedule-graph can-split-p base-graph) "Applies the post-loop-fusion to eliminate MOVE after the reduction. ``` Group1 @@ -584,10 +599,11 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre collect item))) (apply-schedule-item-fusor #'(lambda (self parent) (declare (ignore self)) (reduce-w/o-store parent)) + can-split-p schedule-graph base-graph))) -(defun apply-serialize-reduction (schedule-graph base-graph) +(defun apply-serialize-reduction (schedule-graph can-split-p base-graph) (flet ((is-tensor (buffer) (not (every #'(lambda (x) (eql x 1)) (buffer-shape buffer)))) (depend-dims-p (items rank &aux (common-views (make-list rank))) @@ -617,6 +633,7 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (equal (getattr self :reduce-dims) (getattr parent :reduce-dims)) (depend-dims-p (getattr self :items) (getattr self :rank)) (depend-dims-p (getattr self :items) (getattr parent :rank))))))) + can-split-p schedule-graph base-graph))) @@ -682,12 +699,12 @@ If this interrupts the parallelism, AutoScheduler should distribute them and cre (let ((schedule (apply #'make-graph (map 'list #'(lambda (x) (group->schedule x graph)) groups)))) (setf (graph-outputs schedule) (graph-outputs graph)) (setf schedule (->fast-graph schedule)) - ;; ~~ Rewriting Rules + Post Fusion ~~~~~ - ;; SLOW - (apply-reduce+move-fusion schedule graph) - (apply-serialize-reduction schedule graph) ;; (TODO: Only execute when MAXIMIZE_MEMORY_LOCALITY=1?) + ;; ~~ Rewriting Rules + Post Fusion ~~~~~~~~~~~~~~~~~~~~~~ + (let ((can-split-p-cache (make-can-split-p schedule))) ;; Create a hash table for recording the edge and reference counter. + (apply-reduce+move-fusion schedule can-split-p-cache graph) + (apply-serialize-reduction schedule can-split-p-cache graph)) ;; (TODO: Only execute when MAXIMIZE_MEMORY_LOCALITY=1?) (apply-move-after-reduction schedule) - ;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (when (>= (ctx:getenv :JIT_DEBUG) 3) (format t "[graph-schedule] Schedule Graph:~%~a~%" schedule)) schedule))) From ec35cbb98a7825bf3329e233da1f59356766f4a5 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 15:37:23 +0900 Subject: [PATCH 08/25] can split-p is used just to merge jtiable and jitable nodes --- source/codegen/scheduler.lisp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 289091731..4c99de981 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -526,7 +526,9 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (dolist (r (gethash (node-id node) in-degrees)) (when (null (find (node-id node) (the list (gethash (node-id r) out-degrees)) :key #'node-id)) (push node (gethash (node-id r) out-degrees)))))) - (flet ((node->users (node) (<= (length (the list (gethash (node-id node) out-degrees))) 1))) + (flet ((node->users (node) + (let ((users (gethash (node-id node) out-degrees))) + (every #'(lambda (x) (getattr x :jitable)) users)))) #'node->users))) (defun apply-schedule-item-fusor (f can-split-p schedule-graph base-graph &aux (seen) (changed-p t)) @@ -551,7 +553,7 @@ g represents for Graph, b1 for the self buffer, b2 for the parent buffer, mask f (loop for parent in candidates if (and self-mergeable-p parent (getattr parent :jitable) - (funcall can-split-p parent) ;; confirm that parent is not used by other nodes except for self + (funcall can-split-p parent) ;; confirm that the parent is not used by special ops (funcall f self parent)) do (let ((merged (merge-schedule-items self parent base-graph))) (setf changed-p t) From 799a52563d74210602a308af0e3c0b5475e84a83 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 17:22:10 +0900 Subject: [PATCH 09/25] Fix for local memory planner --- source/codegen/memory-planner.lisp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index c44a4e568..5875eca11 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -151,8 +151,8 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in symbolics (loop for node in (graph-nodes schedule-graph) if (not (eql (node-id node) (node-id item))) - append (node-reads node)))) - (constants)) + append (node-reads node))))) + (dolist (o outputs) (setf (gethash o lock-table) t)) (loop for node in blueprint for nth upfrom 0 if (not (eql (node-class node) :Render)) do @@ -160,7 +160,7 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in for typ in (relay-reads (read-type-relay node)) for time = `(,nth ,@(gethash val trace-table)) if (id-is-input-p val base-graph) do (push val outputs) - if (and (symbolp val) (null (find val constants))) + if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) (loop for val in (node-writes node) for typ in (relay-writes (read-type-relay node)) @@ -192,6 +192,7 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) (flet ((newid (id) (or (gethash id alias-map) id))) + (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (bp (getattr item :blueprint)) (setf (node-writes bp) (map 'list #'newid (node-writes bp)) (node-reads bp) (map 'list #'newid (node-reads bp))) From 22d14af0d255f2453ecb07f21538c7e15d25bfd5 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 17:27:25 +0900 Subject: [PATCH 10/25] node-writes are allocated --- source/codegen/jit.lisp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/codegen/jit.lisp b/source/codegen/jit.lisp index 943a3c6be..c0885b212 100644 --- a/source/codegen/jit.lisp +++ b/source/codegen/jit.lisp @@ -239,6 +239,7 @@ caten/codegen overview: (push alloc nodes) (when view (push view nodes))) (push w allocated)) + (dolist (w (node-writes node)) (push w allocated)) (push (make-compiled-kernel-node node graph) nodes) ;; Merging view after the JIT_KERNEL invocation (loop for w in (node-writes node) From fd1a6bc00231b15187ea05aabbea8ea4b0ed544c Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 17:48:37 +0900 Subject: [PATCH 11/25] all nodes including jit must have a read-type --- source/codegen/memory-planner.lisp | 19 +++++++++++++++---- source/codegen/scheduler.lisp | 26 +++++++++++++++----------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 5875eca11..9da179b93 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -90,16 +90,18 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (id2type (make-hash-table)) (lock-table (make-hash-table)) (total-time (length (graph-nodes schedule-graph))) - (outputs (append (graph-outputs schedule-graph) symbolics)) - (constants)) + (outputs (append (graph-outputs schedule-graph) symbolics))) + (dolist (o outputs) (setf (gethash o lock-table) t)) (loop for node in (graph-nodes schedule-graph) for nth upfrom 0 for lock-p = (null (getattr node :jitable)) do + (assert (= (length (getattr node :storage-id-src)) (length (getattr node :read-types)))) + (assert (= (length (getattr node :storage-id-dst)) (length (getattr node :write-types)))) (loop for val in (getattr node :storage-id-src) for typ in (getattr node :read-types) for time = `(,nth ,@(gethash val trace-table)) if (id-is-input-p val base-graph) do (push val outputs) - if (and (symbolp val) (null (find val constants))) + if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) (loop for val in (getattr node :storage-id-dst) for typ in (getattr node :write-types) @@ -111,6 +113,10 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in do (setf (gethash val id2type) typ (gethash val trace-table) (list nth) (gethash val lock-table) lock-p))) + (maphash + #'(lambda (key val) + (format t "~a -> ~a~%" key val)) + trace-table) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) @@ -131,10 +137,15 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) + (maphash + #'(lambda (key val) + (format t "~a -> ~a~%" key val)) + alias-map) (flet ((newid (id) (or (gethash id alias-map) id))) (dolist (node (graph-nodes schedule-graph)) (when (getattr node :jitable) - (setf (getattr node :storage-id-dst) (map 'list #'newid (getattr node :storage-id-dst))))))))) + ;; (setf (getattr node :storage-id-dst) (map 'list #'newid (getattr node :storage-id-dst))) + )))))) (defun run-memory-planner-local (item schedule-graph symbolics base-graph) "Minimizes the number of allocation buffers that are only used in the item." diff --git a/source/codegen/scheduler.lisp b/source/codegen/scheduler.lisp index 4c99de981..63bb89318 100644 --- a/source/codegen/scheduler.lisp +++ b/source/codegen/scheduler.lisp @@ -110,8 +110,8 @@ Otherwise, the scheduled items are relocated to the compiled avm directly. Speci #'concatenate 'string (butlast - (loop for x1 in x - for nth upfrom 0 + (loop for nth upfrom 0 below (max (length x) (length y)) + for x1 = (nth nth x) for y1 = (nth nth y) if (or (eql x1 y1) (null y1)) append (list (format nil "~a" x1) ", ") @@ -185,23 +185,27 @@ Otherwise, the scheduled items are relocated to the compiled avm directly. Speci (writes (items-write-to (group-items group) base-graph)) (allocate-p (find :Allocate (group-items group) :key #'node-type)) (no-symbolic-incremental-p t) - (full-scalar-p t) (rank 0)) + (full-scalar-p t) (rank 0) (id2type (make-hash-table))) ;; Ensure there's no symbolic incremental for the auto scheduler. (dolist (node (group-items group)) - (dolist (r (append (relay-reads (read-type-relay node)) (relay-writes (read-type-relay node)))) - (when r - (when (> (buffer-nrank r) 0) - (setf full-scalar-p nil)) - (setf rank (max rank (buffer-nrank r))) - (dolist (v (buffer-views r)) - (when (and v (third v) (symbolp (third v))) ;; v=(upfrom below by broadcast_p) - (setf no-symbolic-incremental-p nil)))))) + (loop for r in (append (node-reads node) (node-writes node)) + for rt in (append (relay-reads (read-type-relay node)) (relay-writes (read-type-relay node))) + do (setf (gethash r id2type) rt) + if rt do + (when (> (buffer-nrank rt) 0) + (setf full-scalar-p nil)) + (setf rank (max rank (buffer-nrank rt))) + (dolist (v (buffer-views rt)) + (when (and v (third v) (symbolp (third v))) ;; v=(upfrom below by broadcast_p) + (setf no-symbolic-incremental-p nil))))) (make-node :GRAPH :Schedule-Item writes reads :name (make-unique-schedule-name group) :jitable (and (every #'jitable-p (group-items group)) (null full-scalar-p)) :allocate-p (when allocate-p t) :auto-schedule-p (and no-symbolic-incremental-p (null full-scalar-p)) :storage-id-dst writes :storage-id-src reads + :read-types (map 'list #'(lambda (x) (gethash x id2type)) reads) + :write-types (map 'list #'(lambda (x) (gethash x id2type)) writes) :reference-counters (map 'list From d38556960267df0961b90acbd5fda43b1962d4ae Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:03:51 +0900 Subject: [PATCH 12/25] update --- source/codegen/memory-planner.lisp | 69 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 9da179b93..93a1dedd2 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -1,5 +1,5 @@ (defpackage :caten/codegen/memory-planner - (:use :cl :caten/air :caten/avm :caten/codegen/shape-inference :caten/codegen/expr) + (:use :cl :caten/air :caten/avm :caten/codegen/shape-inference :caten/codegen/expr :alexandria) (:export #:run-memory-planner)) (in-package :caten/codegen/memory-planner) @@ -83,6 +83,38 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." ;; If :from is specified => the input should not be destructed. t)))) +(defun rewrite-bp-with-newid (item newid) + (dolist (bp (getattr item :blueprint)) + (setf (node-writes bp) (map 'list newid (node-writes bp)) + (node-reads bp) (map 'list newid (node-reads bp))) + (when (eql (node-type bp) :EXPR) + (dolist (item (graph-nodes (expr-graph (getattr bp :EXPR)))) + (when (eql (node-type item) :AREF) + (setf (getattr item :storage-id) (funcall newid (car (node-writes item)))))))) + ;; Remove Duplicated :DEFINE_GLOBAL + (setf (getattr item :blueprint) + (loop with seen = nil + for item in (getattr item :blueprint) + if (or (not (eql (node-type item) :DEFINE-GLOBAL)) + (null (find (car (node-writes item)) seen))) + collect item + if (eql (node-type item) :DEFINE-GLOBAL) + do (push (car (node-writes item)) seen))) + (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) + (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) + (reads (remove-duplicates reads :key (compose newid #'car))) + (writes (remove-duplicates writes :key (compose newid #'car))) + (seen)) + (flet ((only-unseen (items) + (loop for (id . type) in items + if (null (find (funcall newid id) seen)) + do (push (funcall newid id) seen) and collect (cons id type)))) + (multiple-value-bind (writes reads) (values (only-unseen writes) (only-unseen reads)) + (setf (getattr item :storage-id-src) (map 'list (compose newid #'car) reads) + (getattr item :storage-id-dst) (map 'list (compose newid #'car) writes) + (getattr item :read-types) (map 'list #'cdr reads) + (getattr item :write-types) (map 'list #'cdr writes)))))) + (defmethod run-memory-planner-global ((schedule-graph Graph) (symbolics list) (base-graph Graph)) "write_1, write_2 = f(write_suite_1, write_suite_2, *[dynamic_shape + read_buffers]) The goal of run-memory-planner is to reduce the number of :allocate-p object in schedule-graph, by rewriting write_suite1 and write_suite2." @@ -144,8 +176,7 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (flet ((newid (id) (or (gethash id alias-map) id))) (dolist (node (graph-nodes schedule-graph)) (when (getattr node :jitable) - ;; (setf (getattr node :storage-id-dst) (map 'list #'newid (getattr node :storage-id-dst))) - )))))) + (rewrite-bp-with-newid node #'newid))))))) (defun run-memory-planner-local (item schedule-graph symbolics base-graph) "Minimizes the number of allocation buffers that are only used in the item." @@ -204,36 +235,8 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) (flet ((newid (id) (or (gethash id alias-map) id))) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) - (dolist (bp (getattr item :blueprint)) - (setf (node-writes bp) (map 'list #'newid (node-writes bp)) - (node-reads bp) (map 'list #'newid (node-reads bp))) - (when (eql (node-type bp) :EXPR) - (dolist (item (graph-nodes (expr-graph (getattr bp :EXPR)))) - (when (eql (node-type item) :AREF) - (setf (getattr item :storage-id) (newid (car (node-writes item)))))))) - ;; remove duplicated define-global - (setf (getattr item :blueprint) - (loop with seen = nil - for item in (getattr item :blueprint) - if (or (not (eql (node-type item) :DEFINE-GLOBAL)) - (null (find (car (node-writes item)) seen))) - collect item - if (eql (node-type item) :DEFINE-GLOBAL) - do (push (car (node-writes item)) seen))) - (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) - (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) - (reads (remove-duplicates reads :key (alexandria:compose #'newid #'car))) - (writes (remove-duplicates writes :key (alexandria:compose #'newid #'car))) - (seen)) - (flet ((only-unseen (items) - (loop for (id . type) in items - if (null (find (newid id) seen)) - do (push (newid id) seen) and collect (cons id type)))) - (multiple-value-bind (reads writes) (values (only-unseen reads) (only-unseen writes)) - (setf (getattr item :storage-id-src) (map 'list (alexandria:compose #'newid #'car) reads) - (getattr item :storage-id-dst) (map 'list (alexandria:compose #'newid #'car) writes) - (getattr item :read-types) (map 'list #'cdr reads) - (getattr item :write-types) (map 'list #'cdr writes)))))) + (rewrite-bp-with-newid item #'newid) + ) alias-map))) (defun buffer-sizeof (buffer) From 12b617e0c3ecb393e86794c018e00b741be92628 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:14:12 +0900 Subject: [PATCH 13/25] test --- source/codegen/memory-planner.lisp | 84 +++++++++++++++++------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 93a1dedd2..65d15b7d8 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -83,7 +83,7 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." ;; If :from is specified => the input should not be destructed. t)))) -(defun rewrite-bp-with-newid (item newid) +(defun rewrite-bp-with-newid (item newid no-rewrite) (dolist (bp (getattr item :blueprint)) (setf (node-writes bp) (map 'list newid (node-writes bp)) (node-reads bp) (map 'list newid (node-reads bp))) @@ -100,20 +100,21 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." collect item if (eql (node-type item) :DEFINE-GLOBAL) do (push (car (node-writes item)) seen))) - (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) - (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) - (reads (remove-duplicates reads :key (compose newid #'car))) - (writes (remove-duplicates writes :key (compose newid #'car))) - (seen)) - (flet ((only-unseen (items) - (loop for (id . type) in items - if (null (find (funcall newid id) seen)) - do (push (funcall newid id) seen) and collect (cons id type)))) - (multiple-value-bind (writes reads) (values (only-unseen writes) (only-unseen reads)) - (setf (getattr item :storage-id-src) (map 'list (compose newid #'car) reads) - (getattr item :storage-id-dst) (map 'list (compose newid #'car) writes) - (getattr item :read-types) (map 'list #'cdr reads) - (getattr item :write-types) (map 'list #'cdr writes)))))) + (unless no-rewrite + (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) + (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) + (reads (remove-duplicates reads :key (compose newid #'car))) + (writes (remove-duplicates writes :key (compose newid #'car))) + (seen)) + (flet ((only-unseen (items) + (loop for (id . type) in items + if (null (find (funcall newid id) seen)) + do (push (funcall newid id) seen) and collect (cons id type)))) + (multiple-value-bind (writes reads) (values (only-unseen writes) (only-unseen reads)) + (setf (getattr item :storage-id-src) (map 'list (compose newid #'car) reads) + (getattr item :storage-id-dst) (map 'list (compose newid #'car) writes) + (getattr item :read-types) (map 'list #'cdr reads) + (getattr item :write-types) (map 'list #'cdr writes))))))) (defmethod run-memory-planner-global ((schedule-graph Graph) (symbolics list) (base-graph Graph)) "write_1, write_2 = f(write_suite_1, write_suite_2, *[dynamic_shape + read_buffers]) @@ -145,10 +146,10 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in do (setf (gethash val id2type) typ (gethash val trace-table) (list nth) (gethash val lock-table) lock-p))) - (maphash - #'(lambda (key val) - (format t "~a -> ~a~%" key val)) - trace-table) + ;(maphash + ; #'(lambda (key val) + ; (format t "~a -> ~a~%" key val)) + ; trace-table) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) @@ -161,7 +162,7 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in ;; Set the longest time for the output variables (not to destruct it, and users can see the result) (if (find key outputs) total-time - (apply #'max (gethash key trace-table))) + (apply #'max (gethash key trace-table))) ;; the fragment will be available after t+1. :lock (gethash key lock-table)))) ;; Minimize the peak memory usage (solved (greedy-solve-dsa memory-blocks total-time)) @@ -169,14 +170,19 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - (maphash - #'(lambda (key val) - (format t "~a -> ~a~%" key val)) - alias-map) - (flet ((newid (id) (or (gethash id alias-map) id))) + ;(maphash + ; #'(lambda (key val) + ; (format t "~a -> ~a~%" key val)) + ; alias-map) + (labels ((newid (id) + (if (gethash id alias-map) + (if (eql (gethash id alias-map) id) + id + (newid (gethash id alias-map))) + id))) (dolist (node (graph-nodes schedule-graph)) (when (getattr node :jitable) - (rewrite-bp-with-newid node #'newid))))))) + (rewrite-bp-with-newid node #'newid nil))))))) (defun run-memory-planner-local (item schedule-graph symbolics base-graph) "Minimizes the number of allocation buffers that are only used in the item." @@ -202,17 +208,17 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in for typ in (relay-reads (read-type-relay node)) for time = `(,nth ,@(gethash val trace-table)) if (id-is-input-p val base-graph) do (push val outputs) - if (symbolp val) - do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) + if (symbolp val) + do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) (loop for val in (node-writes node) for typ in (relay-writes (read-type-relay node)) if (id-is-input-p val base-graph) do (push val outputs) - if (and (symbolp val) (null (gethash val trace-table))) - ;; ID2Type -> the variable name and its type - ;; TraceTable -> the variable name and timestamps of the variable (when it's used) - ;; LockTable -> Set T to lock (never become in-place) - do (setf (gethash val id2type) typ - (gethash val trace-table) (list nth)))) + if (and (symbolp val) (null (gethash val trace-table))) + ;; ID2Type -> the variable name and its type + ;; TraceTable -> the variable name and timestamps of the variable (when it's used) + ;; LockTable -> Set T to lock (never become in-place) + do (setf (gethash val id2type) typ + (gethash val trace-table) (list nth)))) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) @@ -233,10 +239,14 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - (flet ((newid (id) (or (gethash id alias-map) id))) + (labels ((newid (id) + (if (gethash id alias-map) + (if (eql (gethash id alias-map) id) + id + (newid (gethash id alias-map))) + id))) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) - (rewrite-bp-with-newid item #'newid) - ) + (rewrite-bp-with-newid item #'newid t)) alias-map))) (defun buffer-sizeof (buffer) From 81da7590f4bbd92236603e615343af47d201d077 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:33:32 +0900 Subject: [PATCH 14/25] reimplement memory planner --- source/codegen/memory-planner.lisp | 188 ++++++++++------------------- 1 file changed, 62 insertions(+), 126 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 65d15b7d8..62cf1cfd8 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -83,7 +83,7 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." ;; If :from is specified => the input should not be destructed. t)))) -(defun rewrite-bp-with-newid (item newid no-rewrite) +(defun rewrite-bp-with-newid (item newid) (dolist (bp (getattr item :blueprint)) (setf (node-writes bp) (map 'list newid (node-writes bp)) (node-reads bp) (map 'list newid (node-reads bp))) @@ -100,125 +100,71 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." collect item if (eql (node-type item) :DEFINE-GLOBAL) do (push (car (node-writes item)) seen))) - (unless no-rewrite - (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) - (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) - (reads (remove-duplicates reads :key (compose newid #'car))) - (writes (remove-duplicates writes :key (compose newid #'car))) - (seen)) - (flet ((only-unseen (items) - (loop for (id . type) in items - if (null (find (funcall newid id) seen)) - do (push (funcall newid id) seen) and collect (cons id type)))) - (multiple-value-bind (writes reads) (values (only-unseen writes) (only-unseen reads)) - (setf (getattr item :storage-id-src) (map 'list (compose newid #'car) reads) - (getattr item :storage-id-dst) (map 'list (compose newid #'car) writes) - (getattr item :read-types) (map 'list #'cdr reads) - (getattr item :write-types) (map 'list #'cdr writes))))))) + (let* ((reads (map 'list #'cons (getattr item :storage-id-src) (getattr item :read-types))) + (writes (map 'list #'cons (getattr item :storage-id-dst) (getattr item :write-types))) + (reads (remove-duplicates reads :key (compose newid #'car))) + (writes (remove-duplicates writes :key (compose newid #'car))) + (seen)) + (flet ((only-unseen (items) + (loop for (id . type) in items + if (null (find (funcall newid id) seen)) + do (push (funcall newid id) seen) and collect (cons id type)))) + (multiple-value-bind (writes reads) (values (only-unseen writes) (only-unseen reads)) + (setf (getattr item :storage-id-src) (map 'list (compose newid #'car) reads) + (getattr item :storage-id-dst) (map 'list (compose newid #'car) writes) + (getattr item :read-types) (map 'list #'cdr reads) + (getattr item :write-types) (map 'list #'cdr writes)))))) -(defmethod run-memory-planner-global ((schedule-graph Graph) (symbolics list) (base-graph Graph)) - "write_1, write_2 = f(write_suite_1, write_suite_2, *[dynamic_shape + read_buffers]) -The goal of run-memory-planner is to reduce the number of :allocate-p object in schedule-graph, by rewriting write_suite1 and write_suite2." - (let* ((trace-table (make-hash-table)) - (id2type (make-hash-table)) - (lock-table (make-hash-table)) - (total-time (length (graph-nodes schedule-graph))) - (outputs (append (graph-outputs schedule-graph) symbolics))) - (dolist (o outputs) (setf (gethash o lock-table) t)) - (loop for node in (graph-nodes schedule-graph) - for nth upfrom 0 - for lock-p = (null (getattr node :jitable)) do - (assert (= (length (getattr node :storage-id-src)) (length (getattr node :read-types)))) - (assert (= (length (getattr node :storage-id-dst)) (length (getattr node :write-types)))) - (loop for val in (getattr node :storage-id-src) - for typ in (getattr node :read-types) - for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (push val outputs) - if (symbolp val) - do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) - (loop for val in (getattr node :storage-id-dst) - for typ in (getattr node :write-types) - if (id-is-input-p val base-graph) do (push val outputs) - if (and (symbolp val) (null (gethash val trace-table))) - ;; ID2Type -> the variable name and its type - ;; TraceTable -> the variable name and timestamps of the variable (when it's used) - ;; LockTable -> Set T to lock (never become in-place) - do (setf (gethash val id2type) typ - (gethash val trace-table) (list nth) - (gethash val lock-table) lock-p))) - ;(maphash - ; #'(lambda (key val) - ; (format t "~a -> ~a~%" key val)) - ; trace-table) - (let* ((memory-blocks - (loop for key in (alexandria:hash-table-keys trace-table) - for typ = (gethash key id2type) - collect - ;; [Note] A memory block lives in the range of [min{t}, max{t}) - ;; Plus, If the same task (e.g.: T0(x) -> T1(x) -> T0(x+1)) is scheduled, the memory block lives from 0 to 2. - (make-memoryblock - key typ - (apply #'min (gethash key trace-table)) - ;; Set the longest time for the output variables (not to destruct it, and users can see the result) - (if (find key outputs) - total-time - (apply #'max (gethash key trace-table))) ;; the fragment will be available after t+1. - :lock (gethash key lock-table)))) - ;; Minimize the peak memory usage - (solved (greedy-solve-dsa memory-blocks total-time)) - ;; Retrive the solution. A hash table of OLD_MEMORY_ID -> NEW_MEMORY_ID - (alias-map (make-hash-table))) - (loop for mb in solved - do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - ;(maphash - ; #'(lambda (key val) - ; (format t "~a -> ~a~%" key val)) - ; alias-map) - (labels ((newid (id) - (if (gethash id alias-map) - (if (eql (gethash id alias-map) id) - id - (newid (gethash id alias-map))) - id))) - (dolist (node (graph-nodes schedule-graph)) - (when (getattr node :jitable) - (rewrite-bp-with-newid node #'newid nil))))))) - -(defun run-memory-planner-local (item schedule-graph symbolics base-graph) - "Minimizes the number of allocation buffers that are only used in the item." - (declare (type node item) (type graph schedule-graph)) - (assert (eql (node-type item) :Schedule-Item)) - (let* ((blueprint (getattr item :blueprint)) +(defun apply-memory-planner (schedule-graph symbolics base-graph) + (declare (type graph schedule-graph)) + (let* ((nodes + (loop for node in (graph-nodes schedule-graph) + if (getattr node :jitable) + append (getattr node :blueprint) + else + collect node)) + (total-time (length nodes)) (trace-table (make-hash-table)) (id2type (make-hash-table)) (lock-table (make-hash-table)) - (total-time (length blueprint)) (outputs ;; a list of buffers that do no changed by the memory-planner (append ;; If the output were read by other kernels, it should be optimized by the global memory-planner. (graph-outputs schedule-graph) - symbolics - (loop for node in (graph-nodes schedule-graph) - if (not (eql (node-id node) (node-id item))) - append (node-reads node))))) + symbolics))) (dolist (o outputs) (setf (gethash o lock-table) t)) - (loop for node in blueprint + (loop for node in nodes for nth upfrom 0 - if (not (eql (node-class node) :Render)) do - (loop for val in (node-reads node) - for typ in (relay-reads (read-type-relay node)) - for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (push val outputs) + if (eql (node-type node) :Schedule-Item) do + (loop for val in (getattr node :storage-id-src) + for typ in (getattr node :read-types) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) if (symbolp val) - do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) - (loop for val in (node-writes node) - for typ in (relay-writes (read-type-relay node)) - if (id-is-input-p val base-graph) do (push val outputs) - if (and (symbolp val) (null (gethash val trace-table))) - ;; ID2Type -> the variable name and its type - ;; TraceTable -> the variable name and timestamps of the variable (when it's used) - ;; LockTable -> Set T to lock (never become in-place) - do (setf (gethash val id2type) typ - (gethash val trace-table) (list nth)))) + do (setf (gethash val id2type) typ (gethash val trace-table) time)) + (loop for val in (getattr node :storage-id-dst) + for typ in (getattr node :write-types) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (and (symbolp val) (null (gethash val trace-table))) + do (setf (gethash val id2type) typ) (gethash val trace-table) (list nth)) + if (and + (not (eql (node-type node) :Schedule-Item)) + (not (eql (node-class node) :Render))) do + (loop for val in (node-reads node) + for typ in (relay-reads (read-type-relay node)) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (symbolp val) + do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) + (loop for val in (node-writes node) + for typ in (relay-writes (read-type-relay node)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (and (symbolp val) (null (gethash val trace-table))) + ;; ID2Type -> the variable name and its type + ;; TraceTable -> the variable name and timestamps of the variable (when it's used) + ;; LockTable -> Set T to lock (never become in-place) + do (setf (gethash val id2type) typ + (gethash val trace-table) (list nth)))) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) @@ -239,15 +185,12 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - (labels ((newid (id) - (if (gethash id alias-map) - (if (eql (gethash id alias-map) id) - id - (newid (gethash id alias-map))) - id))) + (flet ((newid (id) (or (gethash id alias-map) id))) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) - (rewrite-bp-with-newid item #'newid t)) - alias-map))) + (dolist (node (graph-nodes schedule-graph)) + (if (getattr node :jitable) + (rewrite-bp-with-newid node #'newid) + nil)))))) (defun buffer-sizeof (buffer) "Returns the size of the buffer in bits" @@ -296,14 +239,7 @@ The goal of run-memory-planner is to reduce the number of :allocate-p object in (let ((static-graph-p (null symbolics))) (multiple-value-bind (before-count before-size) (when (>= (ctx:getenv :JIT_DEBUG) 2) (evaluate schedule-graph static-graph-p)) - ;; First, applying the memory-planner kernel by kernel. - ;; The goal is to reduce the number of arguments in the kernel. - (dolist (item (graph-nodes schedule-graph)) - (when (and (getattr item :jitable) (getattr item :blueprint)) - (run-memory-planner-local item schedule-graph symbolics base-graph))) - ;; Second, applying the memory-planner in the schedule-graph level - ;; The goal here is to reduce the number of :allocate-p object in schedule-graph. - (run-memory-planner-global schedule-graph symbolics base-graph) + (apply-memory-planner schedule-graph symbolics base-graph) (mapc #'remove-extra-node-writes-to (graph-nodes schedule-graph)) (multiple-value-bind (after-count after-size) (when (>= (ctx:getenv :JIT_DEBUG) 2) (evaluate schedule-graph static-graph-p)) From 9adf8c8dc015772c760d755b9e3f8041855f1b75 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:36:34 +0900 Subject: [PATCH 15/25] rewrite all --- source/codegen/memory-planner.lisp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 62cf1cfd8..019659814 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -188,9 +188,7 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (flet ((newid (id) (or (gethash id alias-map) id))) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (node (graph-nodes schedule-graph)) - (if (getattr node :jitable) - (rewrite-bp-with-newid node #'newid) - nil)))))) + (rewrite-bp-with-newid node #'newid)))))) (defun buffer-sizeof (buffer) "Returns the size of the buffer in bits" From 8b2db4adddbffcbb59a9c3c824116f947deb6074 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:43:12 +0900 Subject: [PATCH 16/25] Exploring recursively, (is it ok for the large graph like Transformer?...) --- source/codegen/memory-planner.lisp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 019659814..ea66449a0 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -185,7 +185,18 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - (flet ((newid (id) (or (gethash id alias-map) id))) + (when (>= 4 (ctx:getenv :JIT_DEBUG)) + (format t "[DEBUG] MemoryPlanner: alias-map~%") + (maphash + #'(lambda (k v) + (format t "~a -> ~a~%" k v)) + alias-map)) + (labels ((newid (id) + (if (gethash id alias-map) + (if (eql (gethash id alias-map) id) + id + (newid (gethash id alias-map))) + id))) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (node (graph-nodes schedule-graph)) (rewrite-bp-with-newid node #'newid)))))) From 98bd4c84616b200580c01c4f2ddf84c3e60657ca Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:44:06 +0900 Subject: [PATCH 17/25] cleanup --- source/codegen/memory-planner.lisp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index ea66449a0..8120ebda6 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -185,18 +185,18 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) - (when (>= 4 (ctx:getenv :JIT_DEBUG)) - (format t "[DEBUG] MemoryPlanner: alias-map~%") - (maphash - #'(lambda (k v) - (format t "~a -> ~a~%" k v)) - alias-map)) (labels ((newid (id) (if (gethash id alias-map) (if (eql (gethash id alias-map) id) id (newid (gethash id alias-map))) id))) + (when (>= 4 (ctx:getenv :JIT_DEBUG)) + (format t "[DEBUG] MemoryPlanner: alias-map~%") + (maphash + #'(lambda (k v) + (format t "|~a -> ~a[~a]~%" k (newid k) v)) + alias-map)) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (node (graph-nodes schedule-graph)) (rewrite-bp-with-newid node #'newid)))))) From aacc6763bab562cfb4107f225ad99365fe068c79 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:47:08 +0900 Subject: [PATCH 18/25] added a comment --- source/codegen/memory-planner.lisp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 8120ebda6..21ef39049 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -185,6 +185,8 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (alias-map (make-hash-table))) (loop for mb in solved do (setf (gethash (memoryblock-id mb) alias-map) (or (memoryblock-answer mb) (memoryblock-id mb)))) + ;; Note(hikettei): is this recursively applied? especially for schedule cached and big graph. + ;; As of this writing(2024/11/10), i am unsure if this is correct. Should be tested by GPT2 in the next pr. (labels ((newid (id) (if (gethash id alias-map) (if (eql (gethash id alias-map) id) From 7a40f4ebcaedd0d8f7b9f77d584808bb2d4007c9 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:49:09 +0900 Subject: [PATCH 19/25] typo --- source/codegen/memory-planner.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 21ef39049..2cead17e5 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -193,7 +193,7 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." id (newid (gethash id alias-map))) id))) - (when (>= 4 (ctx:getenv :JIT_DEBUG)) + (when (>= (ctx:getenv :JIT_DEBUG) 4) (format t "[DEBUG] MemoryPlanner: alias-map~%") (maphash #'(lambda (k v) From 7e5bc094e5bf4c59268ac3176dd80dfc1adbef04 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:52:05 +0900 Subject: [PATCH 20/25] push outputs --- source/codegen/memory-planner.lisp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 2cead17e5..c2589114f 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -138,13 +138,13 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (loop for val in (getattr node :storage-id-src) for typ in (getattr node :read-types) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) (loop for val in (getattr node :storage-id-dst) for typ in (getattr node :write-types) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) if (and (symbolp val) (null (gethash val trace-table))) do (setf (gethash val id2type) typ) (gethash val trace-table) (list nth)) if (and @@ -153,12 +153,12 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (loop for val in (node-reads node) for typ in (relay-reads (read-type-relay node)) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) (loop for val in (node-writes node) for typ in (relay-writes (read-type-relay node)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) if (and (symbolp val) (null (gethash val trace-table))) ;; ID2Type -> the variable name and its type ;; TraceTable -> the variable name and timestamps of the variable (when it's used) From 18fe85c3759ba8b2a13543792f72084b81185906 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:54:31 +0900 Subject: [PATCH 21/25] cleanup --- source/codegen/memory-planner.lisp | 63 +++++++++++++++--------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index c2589114f..0165eb096 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -131,40 +131,41 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (append ;; If the output were read by other kernels, it should be optimized by the global memory-planner. (graph-outputs schedule-graph) symbolics))) - (dolist (o outputs) (setf (gethash o lock-table) t)) + ;; Creating a timestamp table for each node and variable. (loop for node in nodes for nth upfrom 0 - if (eql (node-type node) :Schedule-Item) do - (loop for val in (getattr node :storage-id-src) - for typ in (getattr node :read-types) - for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) - if (symbolp val) - do (setf (gethash val id2type) typ (gethash val trace-table) time)) - (loop for val in (getattr node :storage-id-dst) - for typ in (getattr node :write-types) - for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) - if (and (symbolp val) (null (gethash val trace-table))) - do (setf (gethash val id2type) typ) (gethash val trace-table) (list nth)) + if (eql (node-type node) :Schedule-Item) ; Optimization for non-jitable instructions (like: foreign kernel calls, allocation, pause/backward) + do (loop for val in (getattr node :storage-id-src) + for typ in (getattr node :read-types) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (symbolp val) + do (setf (gethash val id2type) typ (gethash val trace-table) time)) + (loop for val in (getattr node :storage-id-dst) + for typ in (getattr node :write-types) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (and (symbolp val) (null (gethash val trace-table))) + do (setf (gethash val id2type) typ) (gethash val trace-table) (list nth)) if (and - (not (eql (node-type node) :Schedule-Item)) - (not (eql (node-class node) :Render))) do - (loop for val in (node-reads node) - for typ in (relay-reads (read-type-relay node)) - for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) - if (symbolp val) - do (setf (gethash val id2type) typ (gethash val trace-table) time)) ;; (incf consume) - (loop for val in (node-writes node) - for typ in (relay-writes (read-type-relay node)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) - if (and (symbolp val) (null (gethash val trace-table))) - ;; ID2Type -> the variable name and its type - ;; TraceTable -> the variable name and timestamps of the variable (when it's used) - ;; LockTable -> Set T to lock (never become in-place) - do (setf (gethash val id2type) typ - (gethash val trace-table) (list nth)))) + (not (eql (node-type node) :Schedule-Item)) ; For jitable and lowered instructions + (not (eql (node-class node) :Render))) + do (loop for val in (node-reads node) + for typ in (relay-reads (read-type-relay node)) + for time = `(,nth ,@(gethash val trace-table)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (symbolp val) + do (setf (gethash val id2type) typ (gethash val trace-table) time)) + (loop for val in (node-writes node) + for typ in (relay-writes (read-type-relay node)) + if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (and (symbolp val) (null (gethash val trace-table))) + ;; ID2Type -> the variable name and its type + ;; TraceTable -> the variable name and timestamps of the variable (when it's used) + ;; LockTable -> Set T to lock (never become in-place) + do (setf (gethash val id2type) typ + (gethash val trace-table) (list nth)))) + (dolist (o outputs) (setf (gethash o lock-table) t)) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) From f7ed6b63d778b8db62b21a04cda2798b869d5d60 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 18:59:19 +0900 Subject: [PATCH 22/25] readable --- source/codegen/memory-planner.lisp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 0165eb096..748415ad0 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -80,10 +80,11 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (let ((node (id->value graph id))) (when (and node (eql (node-type node) :Allocate)) (when (getattr node :from) - ;; If :from is specified => the input should not be destructed. + ;; Memory Planner is not allowed to destruct the input. (like: having a weight/parameter) t)))) (defun rewrite-bp-with-newid (item newid) + "Rewrites the given schedule item with newid" (dolist (bp (getattr item :blueprint)) (setf (node-writes bp) (map 'list newid (node-writes bp)) (node-reads bp) (map 'list newid (node-reads bp))) @@ -195,17 +196,17 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (newid (gethash id alias-map))) id))) (when (>= (ctx:getenv :JIT_DEBUG) 4) - (format t "[DEBUG] MemoryPlanner: alias-map~%") + (format t "[DEBUG] MemoryPlanner: minimized alias-map~%") (maphash #'(lambda (k v) - (format t "|~a -> ~a[~a]~%" k (newid k) v)) + (format t " | newid(~a) = ~a, alias-map[~a] = ~a~%" k (newid k) k v)) alias-map)) (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (node (graph-nodes schedule-graph)) (rewrite-bp-with-newid node #'newid)))))) (defun buffer-sizeof (buffer) - "Returns the size of the buffer in bits" + "Computes the size of the buffer in bits." (assert (every #'numberp (buffer-shape buffer))) (* (apply #'* (buffer-shape buffer)) (caten/common.dtype:dtype/size-of (buffer-dtype buffer)))) From 9ad7faeccc16ff0990768c0bad3de4293d6cca3e Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 19:03:16 +0900 Subject: [PATCH 23/25] cmt --- source/codegen/memory-planner.lisp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index 748415ad0..c76ef4920 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -1,4 +1,6 @@ (defpackage :caten/codegen/memory-planner + (:documentation "`Memory Planner` is a data structure that abstracts the allocation and freeing of memory over time. +Its purpose is to optimize memory allocation by overlapping allocations to minimize the maximum memory (heap_size) required for all the time `t`.") (:use :cl :caten/air :caten/avm :caten/codegen/shape-inference :caten/codegen/expr :alexandria) (:export #:run-memory-planner)) From 71e8dae149c789f871c5a0eaa9486d934e7842f4 Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 19:07:00 +0900 Subject: [PATCH 24/25] Fix: Lock and free for outputs --- source/codegen/memory-planner.lisp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index c76ef4920..f777fd826 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -134,6 +134,7 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." (append ;; If the output were read by other kernels, it should be optimized by the global memory-planner. (graph-outputs schedule-graph) symbolics))) + (dolist (s symbolics) (setf (gethash s lock-table) t)) ;; Creating a timestamp table for each node and variable. (loop for node in nodes for nth upfrom 0 @@ -141,13 +142,13 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." do (loop for val in (getattr node :storage-id-src) for typ in (getattr node :read-types) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (id-is-input-p val base-graph) do (push val outputs) if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) (loop for val in (getattr node :storage-id-dst) for typ in (getattr node :write-types) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (id-is-input-p val base-graph) do (push val outputs) if (and (symbolp val) (null (gethash val trace-table))) do (setf (gethash val id2type) typ) (gethash val trace-table) (list nth)) if (and @@ -156,19 +157,18 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." do (loop for val in (node-reads node) for typ in (relay-reads (read-type-relay node)) for time = `(,nth ,@(gethash val trace-table)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (id-is-input-p val base-graph) do (push val outputs) if (symbolp val) do (setf (gethash val id2type) typ (gethash val trace-table) time)) (loop for val in (node-writes node) for typ in (relay-writes (read-type-relay node)) - if (id-is-input-p val base-graph) do (setf (gethash val lock-table) t) (push val outputs) + if (id-is-input-p val base-graph) do (push val outputs) if (and (symbolp val) (null (gethash val trace-table))) ;; ID2Type -> the variable name and its type ;; TraceTable -> the variable name and timestamps of the variable (when it's used) ;; LockTable -> Set T to lock (never become in-place) do (setf (gethash val id2type) typ (gethash val trace-table) (list nth)))) - (dolist (o outputs) (setf (gethash o lock-table) t)) (let* ((memory-blocks (loop for key in (alexandria:hash-table-keys trace-table) for typ = (gethash key id2type) @@ -203,7 +203,6 @@ MemoryBlock(id) is allocated when t=create, preserved until t become `release`." #'(lambda (k v) (format t " | newid(~a) = ~a, alias-map[~a] = ~a~%" k (newid k) k v)) alias-map)) - (assert (equal outputs (map 'list #'newid outputs)) () "memory-planner: the value of constants are immutable. ~a -> ~a" outputs (map 'list #'newid outputs)) (dolist (node (graph-nodes schedule-graph)) (rewrite-bp-with-newid node #'newid)))))) From 72399b9a729c4924a4a27f842c4db10b67014e6e Mon Sep 17 00:00:00 2001 From: hikettei Date: Sun, 10 Nov 2024 19:10:29 +0900 Subject: [PATCH 25/25] typo --- source/codegen/memory-planner.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/codegen/memory-planner.lisp b/source/codegen/memory-planner.lisp index f777fd826..e0bd205fb 100644 --- a/source/codegen/memory-planner.lisp +++ b/source/codegen/memory-planner.lisp @@ -1,6 +1,6 @@ (defpackage :caten/codegen/memory-planner (:documentation "`Memory Planner` is a data structure that abstracts the allocation and freeing of memory over time. -Its purpose is to optimize memory allocation by overlapping allocations to minimize the maximum memory (heap_size) required for all the time `t`.") +It is responsible for optimizing memory allocation by overlapping allocation to minimize the maximum memory usage (heap_size) required for all the time `t`.") (:use :cl :caten/air :caten/avm :caten/codegen/shape-inference :caten/codegen/expr :alexandria) (:export #:run-memory-planner))