replaced fnc with static algo

pjreddie · Apr 26, 2022 · cc734c0 · cc734c0
1 parent b1ab3da
commit cc734c0
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 25 deletions.
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,8 @@ ehthumbs.db
 Icon?
 Thumbs.db
 *.swp
+
+# Generated Libs
+libdarknet.a
+libdarknet.so
+.idea/
diff --git a/Makefile b/Makefile
@@ -7,9 +7,11 @@ DEBUG=0
 ARCH= -gencode arch=compute_30,code=sm_30 \
       -gencode arch=compute_35,code=sm_35 \
       -gencode arch=compute_50,code=[sm_50,compute_50] \
-      -gencode arch=compute_52,code=[sm_52,compute_52]
+      -gencode arch=compute_52,code=[sm_52,compute_52] \
+#      -gencode arch=compute_75,code=[sm_75,compute_75] # RTX 20 series
 #      -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
 
+
 # This is what I use, uncomment if you know your arch and want to specify
 # ARCH= -gencode arch=compute_52,code=compute_52
 

diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
@@ -145,31 +145,40 @@ void cudnn_convolutional_setup(layer *l)
     }
     #endif
 
-    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
-            l->srcTensorDesc,
-            l->weightDesc,
-            l->convDesc,
-            l->dstTensorDesc,
-            CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
-            2000000000,
-            &l->fw_algo);
-    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
-            l->weightDesc,
-            l->ddstTensorDesc,
-            l->convDesc,
-            l->dsrcTensorDesc,
-            CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
-            2000000000,
-            &l->bd_algo);
-    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
-            l->srcTensorDesc,
-            l->ddstTensorDesc,
-            l->convDesc,
-            l->dweightDesc,
-            CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
-            2000000000,
-            &l->bf_algo);
+    #if CUDNN_MAJOR >= 8
+    // Following functions no longer exist in release of CUDNN 8+ and have no replacement. This is the minimal
+    // intervention necessary for project to work on CUDNN 8+ (CUDA 11+).
+
+    //    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
+    //            l->srcTensorDesc,
+    //            l->weightDesc,
+    //            l->convDesc,
+    //            l->dstTensorDesc,
+    //            CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
+    //            2000000000,
+    //            &l->fw_algo);
+        l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
+    //    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
+    //            l->weightDesc,
+    //            l->ddstTensorDesc,
+    //            l->convDesc,
+    //            l->dsrcTensorDesc,
+    //            CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
+    //            2000000000,
+    //            &l->bd_algo);
+        l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
+    //    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
+    //            l->srcTensorDesc,
+    //            l->ddstTensorDesc,
+    //            l->convDesc,
+    //            l->dweightDesc,
+    //            CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
+    //            2000000000,
+    //            &l->bf_algo);
+        l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
+    #endif
 }
+
 #endif
 #endif