Skip to content

Commit

Permalink
sort r-c-vs
Browse files Browse the repository at this point in the history
  • Loading branch information
behrica committed Oct 15, 2024
1 parent 898f92c commit 87d6243
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 9 deletions.
6 changes: 3 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
"dockerfile": "Dockerfile",
"args": {
"BASE_IMAGE": "temurin-21-tools-deps-jammy",
"USERNAME": "${localEnv:USER}"
"USERNAME": "${localEnv:USER:vscode}"
}
},
"remoteUser": "${localEnv:USER}",
"containerUser": "${localEnv:USER}",
"remoteUser": "${localEnv:USER:vscode}",
"containerUser": "${localEnv:USER:vscode}",
"features": {
"ghcr.io/devcontainers/features/git:1": {}
},
Expand Down
11 changes: 8 additions & 3 deletions src/scicloj/ml/xgboost/csr.clj
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
;; re-implements https://blog.newtum.com/sparse-matrix-in-java/
;; maybe se here, nmot sure teh same: https://github.com/scipy/scipy/blob/v1.14.1/scipy/sparse/_csr.py
(ns scicloj.ml.xgboost.csr
(:require
[ tech.v3.datatype :as dt]
Expand All @@ -17,15 +19,18 @@
:row-pointers new-row-pointers})))

(defn ->csr [r-c-vs]
;; data gets sorted by r and c
;; not sure, if good idea for performace ?

(->
(reduce

(fn [csr [row col value]]
(add-to-csr csr row col value))
{:values (dt/make-list :float)
:column-indices (dt/make-list :int)
:row-pointers (dt/make-list :long [0])}
r-c-vs)
:row-pointers (dt/make-list :long [0])}
(sort-by (juxt first second)
r-c-vs))

(#(assoc % :row-pointers (conj (:row-pointers %)
(count (:values %)))))))
Expand Down
109 changes: 106 additions & 3 deletions test/scicloj/ml/xgboost/csr_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,91 @@
[tech.v3.tensor :as t]))


;; scipy
;;1 0 2 0
;;4 0 0 3
;;3 1 2 0
;;csr=csr_matrix (np.array ([[1,0,2,0],[4,0,0,3],[3,1,2,0]]))

;;>>> csr.data , same as dmatrix.data
;;array ([1, 2, 4, 3, 3, 1, 2])

;; >>> csr.indices, same as dmatrix.colIndex
;;array ([0, 2, 0, 3, 0, 1, 2], dtype=int32)

;;>>> csr.indptr, same as dmatrix.rowHeaders
;;array ([0, 2, 4, 7], dtype=int32)


(deftest ->csr
;; 3. 1. 2.
;;>>> csr=coo_array (([5,8,3,6],([0,1,2,3],[0,1,2,1])),shape= (4,4)) .tocsr ()
;;>>> csr.data
;;array ([5, 8, 3, 6])
;;>>> csr.indices
;;array ([0, 1, 2, 1])
;;>>> csr.indptr
;;array ([0, 1, 2, 3, 4])

(is (=
{:values [5.0 8.0 3.0 6.0], :column-indices [0 1 2 1], :row-pointers [0 1 2 3 4]}
(csr/->csr
;; row,col,value
[[0 0 5]
[1 1 8]
[2 2 3]
[4 1 6]]))))
[3 1 6]]))))


(deftest ->csr-2
;; matches wikipedia https://en.wikipedia.org/wiki/Sparse_matrix
;;in python
;; coo_array (([10,20,30,40,50,60,70,80],([0,0,1,1,2,2,2,3],[0,1,1,3,2,3,4,5])),shape=(4,6)).todense()
;; array ([[10, 20, 0, 0, 0, 0],
;; [0, 30, 0, 40, 0, 0],
;; [0, 0, 50, 60, 70, 0],
;; [0, 0, 0, 0, 0, 80]])
;;
;; 3. 1. 2.
;; >>> csr=coo_array (([10,20,30,40,50,60,70,80],([0,0,1,1,2,2,2,3],[0,1,1,3,2,3,4,5])),shape= (4,6)) .tocsr ()
;; >>> csr.data
;; array ([10, 20, 30, 40, 50, 60, 70, 80])
;; >>> csr.indices
;; array ([0, 1, 1, 3, 2, 3, 4, 5])
;; >>> csr.indptr
;; array ([0, 2, 4, 7, 8])
(is (= {:values [10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0]
:column-indices [0 1 1 3 2 3 4 5]
:row-pointers [0 2 4 7 8]}
(csr/->csr
[
[0 0 10.0]
[0 1 20.0]
[1 1 30.0]
[1 3 40.0]
[2 2 50.0]
[2 3 60.0]
[2 4 70.]
[3 5 80.0]]))))
;;=>


(deftest unsorted []
(is (= {:values [10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0]
:column-indices [0 1 1 3 2 3 4 5]
:row-pointers [0 2 4 7 8]}
(csr/->csr
(shuffle
[[0 0 10.0]
[0 1 20.0]
[1 1 30.0]
[1 3 40.0]
[2 2 50.0]
[2 3 60.0]
[2 4 70.0]
[3 5 80.0]]))
)))


(deftest ->dense
(is (=
Expand Down Expand Up @@ -51,6 +128,32 @@
4 4)))))


(t/->tensor
(csr/->dense
(csr/->csr
[[0 0 5]
[1 1 8]
[2 2 3]
[4 1 6]])
4 4))
;;=> #tech.v3.tensor<object>[4 4]
;; [[5.000 0 0 0]
;; [ 0 8.000 0 0]
;; [ 0 0 3.000 0]
;; [ 0 6.000 0 0]]


(comment
)
(t/->tensor
(csr/->dense
(csr/->csr
[[1 1 8]
[2 2 3]
[4 1 6]
[0 0 5]]
)
4 4))
;;=> #tech.v3.tensor<object>[4 4]
;; [[ 0 0 0 0]
;; [ 0 8.000 0 0]
;; [ 0 0 3.000 0]
;; [5.000 6.000 0 0]]

0 comments on commit 87d6243

Please sign in to comment.