Skip to content

Commit

Permalink
v0.0.8 (#2)
Browse files Browse the repository at this point in the history
* Start updating to recent ggml-sys-bleedingedge.

* Continue half-hearted update to current GGML.
  • Loading branch information
KerfuffleV2 authored Sep 2, 2023
1 parent b11fbc9 commit 6f18440
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 46 deletions.
7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty-ggml"
version = "0.0.7"
version = "0.0.8"
description = "Idiomatic bindings for the GGML library (pre-alpha)"
repository = "https://github.com/KerfuffleV2/rusty-ggml"
keywords = ["deep-learning", "machine-learning", "tensor", "ggml", "ml"]
Expand All @@ -13,14 +13,15 @@ default = ["ggml-sys-bleedingedge/use_cmake"]
no_k_quants = ["ggml-sys-bleedingedge/no_k_quants"]
no_accelerate = ["ggml-sys-bleedingedge/no_accelerate"]
cublas = ["ggml-sys-bleedingedge/cublas"]
hipblas = ["ggml-sys-bleedingedge/hipblas"]
clblast = ["ggml-sys-bleedingedge/clblast"]
openblas = ["ggml-sys-bleedingedge/openblas"]
metal = ["ggml-sys-bleedingedge/metal"]

[dependencies]
ggml-sys-bleedingedge = "=2306220059.0.0"
ggml-sys-bleedingedge = "=2309021811.0.0"
anyhow = "1"
thiserror = "1"
num-traits = "0.2"
num-derive="0.3"
num-derive="0.4"
bytemuck = { version = "1", features = ["extern_crate_alloc"] }
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ See:

Not suitable for general use. Consider this to be pre-alpha code.

Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/600718ebee029aa684c4a6abbe035d21283a446c/smolrwkv/src/ggml/graph.rs
**`v0.0.8` Warning**: Keeping this in sync with recent GGML changes has lagged. It compiles and seems to work but there might be weird stuff I haven't caught.

**Note**: There are special considerations when using GPU features like `cublas`, `hipblas`. See the `ggml-sys-bleedingedge` repo or crate documentation for more information

Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/189915ec68b28d057b440f75803d3d056e150733/smolrwkv/src/ggml/graph.rs

## Related

For your token sampling needs see https://github.com/KerfuffleV2/llm-samplers ( https://crates.io/crates/llm-samplers )
19 changes: 12 additions & 7 deletions src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ pub(crate) struct IContext {
pub(crate) failed: Option<Arc<anyhow::Error>>,
}

// FIXME: YOLO? It's an internal struct and only lives in an Arc.
unsafe impl Send for IContext {}

impl Drop for IContext {
// Since `IContext` lives inside an `Arc` this will only happen
// when the very last instance of the `Arc` is dropped.
Expand Down Expand Up @@ -405,8 +408,9 @@ impl GContext {
/// Runs the supplied graph using this context.
pub fn compute(&self, graph: &mut GGraph) -> Result<()> {
ensure!(!self.no_alloc, GContextError::NoAlloc);
let n_threads = graph.n_threads;
self.with_icontext_infallible(|ictx| unsafe {
gg::ggml_graph_compute(ictx.gptr(), &mut graph.0)
gg::ggml_graph_compute_with_ctx(ictx.gptr(), &mut graph.graph, n_threads as i32)
})
}

Expand All @@ -416,15 +420,16 @@ impl GContext {
}
}

#[repr(transparent)]
pub struct GGraph(gg::ggml_cgraph);
pub struct GGraph {
n_threads: usize,
graph: gg::ggml_cgraph,
}

impl GGraph {
/// Create a new computation graph with the specified number of threads.
pub fn new(n_threads: usize) -> Self {
let mut graph = unsafe { std::mem::zeroed::<gg::ggml_cgraph>() };
graph.n_threads = n_threads as i32;
Self(graph)
let graph = unsafe { std::mem::zeroed::<gg::ggml_cgraph>() };
Self { n_threads, graph }
}

/// Register a tensor to be processed when the graph is computed.
Expand All @@ -439,7 +444,7 @@ impl GGraph {
tensor
.as_ref()
.with_tensor_infallible(|_ctx, _ictx, tptr| unsafe {
gg::ggml_build_forward_expand(&mut self.0, tptr)
gg::ggml_build_forward_expand(&mut self.graph, tptr)
})
}
}
18 changes: 5 additions & 13 deletions src/gtensor/binary_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,12 @@ where
/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
pub fn conv_1d<
const STRIDE: usize,
const RDIMS: usize,
const ODIMS: usize,
T: AsRef<GTensor<RDIMS>>,
>(
pub fn conv_1d<const RDIMS: usize, const ODIMS: usize, T: AsRef<GTensor<RDIMS>>>(
&self,
rhs: T,
s0: usize,
p0: usize,
d0: usize,
) -> Self
where
Dim<RDIMS>: DimValid,
Expand All @@ -220,8 +218,6 @@ where
DimPair<DIMS, 4>: DimLt,
DimPair<RDIMS, 2>: DimGtE,
DimPair<ODIMS, 2>: DimEq,
DimPair<STRIDE, 1>: DimGtE,
DimPair<STRIDE, 3>: DimLt,
{
let rmd = rhs.as_ref().md.clone();
self.new_binary(rhs, |ctx, ictx, ltptr, rtptr| {
Expand All @@ -234,11 +230,7 @@ where
let mr = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::F32, shp)
.fit_or_die()?;
Ok((mr, unsafe {
if STRIDE == 1 {
gg::ggml_conv_1d_s1_ph(ictx.gptr(), ltptr, rtptr)
} else {
gg::ggml_conv_1d_s2_ph(ictx.gptr(), ltptr, rtptr)
}
gg::ggml_conv_1d(ictx.gptr(), ltptr, rtptr, s0 as i32, p0 as i32, d0 as i32)
}))
})
}
Expand Down
9 changes: 8 additions & 1 deletion src/gtensor/matmul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,14 @@ macro_rules! mk_gmulmatinstances {
GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, shp)
.fit_or_die()?;
unsafe {
Ok((mr, gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr)))
let t = gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr);
// FIXME: Horrible hack to pretend mul_mat has the old non-broadcasting behavior.
let real_dims = (*t).ne.iter().take_while(|i| **i != 1).collect::<Vec<_>>().len();
if real_dims != $o {
Err(GTensorError::InvalidOperation)?;
}
(*t).n_dims = $o;
Ok((mr, t))
}
})
}
Expand Down
6 changes: 5 additions & 1 deletion src/gtensor/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ where
pub(crate) fn from_ptr(tp: NonNull<gg::ggml_tensor>) -> Self {
let (tr, tp) = (unsafe { tp.as_ref() }, tp.as_ptr());
let (op, typ, shape) = {
assert_eq!(DIMS, tr.n_dims as usize, "Unexpected number of dimensions!");
assert_eq!(
DIMS, tr.n_dims as usize,
"Unexpected number of dimensions {:?}!",
tr.ne
);
let mut shp = [0; DIMS];
shp.iter_mut()
.zip(tr.ne[0..DIMS].iter())
Expand Down
96 changes: 76 additions & 20 deletions src/gtensor/unary_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,6 @@ where
/// ```
[neg, ggml_neg],

/// Perform LayerNorm operation on tensor `A`.
/// Returns a new tensor.
///
/// `a.norm()`
///
/// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
/// for more information and comparison with the [GTensor::rms_norm] function.
[rms_norm, ggml_rms_norm],

/// Perform RMSNorm operation on tensor `A`.
/// Returns a new tensor.
///
/// `a.rms_norm()`
///
/// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
/// for more information and comparison with the [GTensor::norm] function.
[norm, ggml_norm],

/// Elementwise step operation on tensor `A`.
/// Returns a new tensor.
///
Expand Down Expand Up @@ -229,6 +211,38 @@ where
[soft_max, ggml_soft_max],
}

/// Perform LayerNorm operation on tensor `A`.
/// Returns a new tensor.
///
/// `a.norm()`
///
/// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
/// for more information and comparison with the [GTensor::rms_norm] function.
pub fn norm(&self, eps: f32) -> Self {
self.new_unary(|ctx, ictx, tptr| {
let mr =
GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape)
.fit_or_die()?;
unsafe { Ok((mr, gg::ggml_norm(ictx.gptr(), tptr, eps))) }
})
}

/// Perform RMSNorm operation on tensor `A`.
/// Returns a new tensor.
///
/// `a.rms_norm()`
///
/// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
/// for more information and comparison with the [GTensor::norm] function.
pub fn rms_norm(&self, eps: f32) -> Self {
self.new_unary(|ctx, ictx, tptr| {
let mr =
GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape)
.fit_or_die()?;
unsafe { Ok((mr, gg::ggml_rms_norm(ictx.gptr(), tptr, eps))) }
})
}

/// Elementwise `mean` of tensor `A`.
/// Returns a new tensor.
///
Expand Down Expand Up @@ -384,7 +398,40 @@ where
/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
pub fn rope(self, n_past: usize, n_dims: usize, mode: usize) -> Self {
pub fn rope(self, n_past: usize, n_dims: usize, mode: usize, n_ctx: usize) -> Self {
self.new_unary(|ctx, ictx, tptr| {
// Creates a view plus a i32 tensor with three items.
let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []);
let mr2 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::I32, [3]);
let mr = (mr1 + mr2).fit_or_die()?;
unsafe {
Ok((
mr,
gg::ggml_rope(
ictx.gptr(),
tptr,
n_past as i32,
n_dims as i32,
mode as i32,
n_ctx as i32,
),
))
}
})
}

/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
/// # !!!! FIXME !!!!
pub fn rope_custom(
self,
n_past: usize,
n_dims: usize,
mode: usize,
n_ctx: usize,
freq_base: f32,
freq_scale: f32,
) -> Self {
self.new_unary(|ctx, ictx, tptr| {
// Creates a view plus a i32 tensor with three items.
let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []);
Expand All @@ -393,7 +440,16 @@ where
unsafe {
Ok((
mr,
gg::ggml_rope(ictx.gptr(), tptr, n_past as i32, n_dims as i32, mode as i32),
gg::ggml_rope_custom(
ictx.gptr(),
tptr,
n_past as i32,
n_dims as i32,
mode as i32,
n_ctx as i32,
freq_base,
freq_scale,
),
))
}
})
Expand Down

0 comments on commit 6f18440

Please sign in to comment.