ggerganov · FSSRepo · Dec 13, 2023 · Nov 26, 2023 · Nov 27, 2023 · Dec 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,4 @@ __pycache__/
 
 # Model files
 ggml-model-f16.bin
+*.bat
diff --git a/examples/yolo/yolov3-tiny.cpp b/examples/yolo/yolov3-tiny.cpp
@@ -140,7 +140,7 @@ static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const
     }
     result = ggml_add(ctx, result, ggml_repeat(ctx, layer.biases, result));
     if (layer.activate) {
-        result = ggml_leaky(ctx, result);
+        result = ggml_leaky_relu(ctx, result, 0.1f, true);
     }
     return result;
 }

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -423,7 +423,9 @@ extern "C" {
         GGML_OP_POOL_1D,
         GGML_OP_POOL_2D,
         GGML_OP_UPSCALE, // nearest interpolate
+        GGML_OP_PAD,
         GGML_OP_ARGSORT,
+        GGML_OP_LEAKY_RELU,
 
         GGML_OP_FLASH_ATTN,
         GGML_OP_FLASH_FF,
@@ -463,7 +465,6 @@ extern "C" {
         GGML_UNARY_OP_GELU,
         GGML_UNARY_OP_GELU_QUICK,
         GGML_UNARY_OP_SILU,
-        GGML_UNARY_OP_LEAKY,
 
         GGML_UNARY_OP_COUNT,
     };
@@ -793,6 +794,9 @@ extern "C" {
             struct ggml_tensor  * a,
             struct ggml_tensor  * b);
 
+    // dst = a
+    // view(dst, nb1, nb2, nb3, offset) += b
+    // return dst
     GGML_API struct ggml_tensor * ggml_acc(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
@@ -957,15 +961,14 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-    GGML_API struct ggml_tensor * ggml_leaky(
+    GGML_API struct ggml_tensor * ggml_leaky_relu(
             struct ggml_context * ctx,
-            struct ggml_tensor  * a);
+            struct ggml_tensor  * a, float negative_slope, bool inplace);
 
     GGML_API struct ggml_tensor * ggml_relu_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-    // TODO: double-check this computation is correct
     GGML_API struct ggml_tensor * ggml_gelu(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
@@ -1549,6 +1552,15 @@ extern "C" {
             struct ggml_tensor  * a,
             int                   scale_factor);
 
+    // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
+    GGML_API struct ggml_tensor * ggml_pad(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int                  p0,
+            int                  p1,
+            int                  p2,
+            int                  p3);
+
     // sort rows
     enum ggml_sort_order {
         GGML_SORT_ASC,

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -223,7 +223,7 @@ if (GGML_CUBLAS)
         endif()
 
         # required for dynamic parallelism
-        set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+        # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
 
         if (GGML_STATIC)
             set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -38,3 +38,4 @@ __pycache__/

		# Model files
		ggml-model-f16.bin
		*.bat