Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml: new gpu kernels + extends ggml_leaky_relu + ggml_pad #621

Merged
merged 32 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1a9f93a
add new cuda kernels and new op ggml_pad
FSSRepo Nov 26, 2023
137239c
add ggml_tanh cuda kernel
FSSRepo Nov 27, 2023
3921196
Merge branch 'master' of https://github.com/FSSRepo/ggml into sync-ma…
FSSRepo Dec 7, 2023
c6676aa
Merge branch 'ggerganov:master' into sync-master
FSSRepo Dec 7, 2023
3cf37e6
remove old broadcast impl
FSSRepo Dec 7, 2023
566c6ac
restore some changes
FSSRepo Dec 7, 2023
a10e406
cuda: optimized im2col + group_norm kernels
FSSRepo Dec 7, 2023
1e5cdfb
Merge branch 'ggerganov:master' into sync-master
FSSRepo Dec 7, 2023
bf6321d
extent ggml_leaky -> ggml_leaky_relu
FSSRepo Dec 7, 2023
b7e07dc
Merge branch 'sync-master' of https://github.com/FSSRepo/ggml into sy…
FSSRepo Dec 7, 2023
d5ca625
fix some code issues
FSSRepo Dec 7, 2023
1b6a52d
cuda: concat support 4 dims
FSSRepo Dec 7, 2023
09706d2
cuda: fix ggml_acc + add backends ops test
FSSRepo Dec 7, 2023
0f8bf26
restore ggml_pad + add backend op test
FSSRepo Dec 7, 2023
b5de391
metal : implement GGML_OP_ACC
ggerganov Dec 8, 2023
6303f0d
ggml : fix bug in ggml_upscale
ggerganov Dec 8, 2023
473f254
metal : add ggml_upscale
ggerganov Dec 8, 2023
6cae037
metal : add ggml_tanh
ggerganov Dec 8, 2023
69e719e
metal : add ggml_gelu_quick
ggerganov Dec 8, 2023
14d71dd
ggml : make ggml_pad more general purpose
ggerganov Dec 8, 2023
14a3445
metal : add ggml_pad
ggerganov Dec 8, 2023
b6f0c35
Merge branch 'ggerganov:master' into sync-master
FSSRepo Dec 8, 2023
ca48db5
ggml_leaky_relu as regular op + fix identation
FSSRepo Dec 8, 2023
28893bb
cuda: ggml_acc admit all op_parms
FSSRepo Dec 8, 2023
b3e6e66
negative_slope better pass param
FSSRepo Dec 8, 2023
cbe125b
metal : add ggml_leaky_relu
ggerganov Dec 10, 2023
d0641c6
metal : add ggml_group_norm
ggerganov Dec 10, 2023
ad5d579
cuda : minor
slaren Dec 10, 2023
b9a77fa
ggml : add GGML_OP_LEAKY_RELU to ggml_compute_backward
slaren Dec 10, 2023
1914017
metal : soft max, tanh, supports_op fixes
slaren Dec 12, 2023
020b5ef
test-backend-ops : add sentinels between tensors to detect overflows
slaren Dec 12, 2023
f91e484
Merge branch 'ggerganov:master' into sync-master
FSSRepo Dec 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ __pycache__/

# Model files
ggml-model-f16.bin
*.bat
2 changes: 1 addition & 1 deletion examples/yolo/yolov3-tiny.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const
}
result = ggml_add(ctx, result, ggml_repeat(ctx, layer.biases, result));
if (layer.activate) {
result = ggml_leaky(ctx, result);
result = ggml_leaky_relu(ctx, result, 0.1f, true);
}
return result;
}
Expand Down
20 changes: 16 additions & 4 deletions include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,9 @@ extern "C" {
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
GGML_OP_UPSCALE, // nearest interpolate
GGML_OP_PAD,
GGML_OP_ARGSORT,
GGML_OP_LEAKY_RELU,

GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
Expand Down Expand Up @@ -463,7 +465,6 @@ extern "C" {
GGML_UNARY_OP_GELU,
GGML_UNARY_OP_GELU_QUICK,
GGML_UNARY_OP_SILU,
GGML_UNARY_OP_LEAKY,

GGML_UNARY_OP_COUNT,
};
Expand Down Expand Up @@ -793,6 +794,9 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);

// dst = a
// view(dst, nb1, nb2, nb3, offset) += b
// return dst
GGML_API struct ggml_tensor * ggml_acc(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -957,15 +961,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_leaky(
GGML_API struct ggml_tensor * ggml_leaky_relu(
struct ggml_context * ctx,
struct ggml_tensor * a);
struct ggml_tensor * a, float negative_slope, bool inplace);

GGML_API struct ggml_tensor * ggml_relu_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

// TODO: double-check this computation is correct
GGML_API struct ggml_tensor * ggml_gelu(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down Expand Up @@ -1549,6 +1552,15 @@ extern "C" {
struct ggml_tensor * a,
int scale_factor);

// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
GGML_API struct ggml_tensor * ggml_pad(
struct ggml_context * ctx,
struct ggml_tensor * a,
int p0,
int p1,
int p2,
int p3);

// sort rows
enum ggml_sort_order {
GGML_SORT_ASC,
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ if (GGML_CUBLAS)
endif()

# required for dynamic parallelism
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
# set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

if (GGML_STATIC)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
Expand Down
Loading