From 5ce4891406d32d8b49ee44253bd1bd1f1ca34c54 Mon Sep 17 00:00:00 2001
From: Glenn <gljenkins@cardiffmet.ac.uk>
Date: Sat, 6 Sep 2025 18:36:44 +0100
Subject: [PATCH] Replace FindCUDA with FindCUDAToolkit (fix #2833) (#3090)

* Use CUDAToolkit, fix #2833

* make cuda optional

* Setting CUDA_ARCHITECTURES to native to remove warning message about Policy CMP0104

* Changed CUDA_ARCHITECTUERS to - all - as suggested.

* cuDNN not found on Windows 10, added to FindCUDNN based on find_cudnn.txt in test_for_cudnn. Now working on Windows 10 with default install paths.

* Fixed the test_for_cuda to use CUDAToolkit

* Fixed test_for_cudnn to use CUDAToolkit, removed reference to find_packaage(CUDA ...

* Cleaned up tests and added a note regarding CUDA_PROPAGATE_HOST_FLAGS, do we still need this?

* Cleaned up tests and added a note regarding CUDA_PROPAGATE_HOST_FLAGS, do we still need this?

* I need to test this on a machine which doesn't have CUDA setup, I wonder if there's any easy way to run that?

* Fix cudnn finding on windows

* fix the path finding on windows

* Make sure cmake knows how to find nvcc

* set cuda arch to something appropriate by default in a portable way

* Make the installed version of dlib work too

* change back to this version

* update CI to use cmake 3.17

* remove unused files

---------

Co-authored-by: Tobias Fischer <info@tobiasfischer.info>
Co-authored-by: Davis King <davis@dlib.net>
---
 .github/workflows/build_cpp.yml               |  14 +-
 CMakeLists.txt                                |   2 +-
 dlib/CMakeLists.txt                           | 249 +++++-------------
 dlib/cmake_utils/FindCUDNN.cmake              |  81 ++++++
 dlib/cmake_utils/dlibConfig.cmake.in          |   4 +
 dlib/cmake_utils/test_for_cuda/CMakeLists.txt |  14 -
 dlib/cmake_utils/test_for_cuda/cuda_test.cu   |  21 --
 .../cmake_utils/test_for_cudnn/CMakeLists.txt |  18 --
 .../cmake_utils/test_for_cudnn/find_cudnn.txt |  24 --
 tools/python/dlib/__init__.py.in              |  15 +-
 10 files changed, 169 insertions(+), 273 deletions(-)
 create mode 100644 dlib/cmake_utils/FindCUDNN.cmake
 delete mode 100644 dlib/cmake_utils/test_for_cuda/CMakeLists.txt
 delete mode 100644 dlib/cmake_utils/test_for_cuda/cuda_test.cu
 delete mode 100644 dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
 delete mode 100644 dlib/cmake_utils/test_for_cudnn/find_cudnn.txt

diff --git a/.github/workflows/build_cpp.yml b/.github/workflows/build_cpp.yml
index 28ee56f8c3..a0035ca09d 100644
--- a/.github/workflows/build_cpp.yml
+++ b/.github/workflows/build_cpp.yml
@@ -30,7 +30,7 @@ defaults:
     working-directory: dlib/test
 
 jobs:
-  ubuntu-22-04-gcc-default-cmake-3-10-ffmpeg5:
+  ubuntu-22-04-gcc-default-cmake-3-17-ffmpeg5:
     runs-on: 'ubuntu-22.04'
     steps:
     - uses: actions/checkout@v2
@@ -40,18 +40,18 @@ jobs:
         sudo apt update
         sudo apt install libwebp-dev make yasm
         
-    - name: Cache cmake 3.10.0
+    - name: Cache cmake 3.17.0
       uses: actions/cache@v3
       id: cache-cmake-download
       with:
         # cache this folder:
-        path: ~/cmake-3.10.0-Linux-x86_64
-        key: cmake-3.10.0_try3
+        path: ~/cmake-3.17.0-Linux-x86_64
+        key: cmake-3.17.0_try3
         
     - run: |
         # Get the minimum version of cmake dlib supports
-        wget https://cmake.org/files/v3.10/cmake-3.10.0-Linux-x86_64.tar.gz
-        tar -xf cmake-3.10.0-Linux-x86_64.tar.gz -C ~
+        wget https://cmake.org/files/v3.17/cmake-3.17.0-Linux-x86_64.tar.gz
+        tar -xf cmake-3.17.0-Linux-x86_64.tar.gz -C ~
       if: steps.cache-cmake-download.outputs.cache-hit != 'true'
 
     - name: Cache FFmpeg 5
@@ -76,7 +76,7 @@ jobs:
       run: |
         mkdir build
         cd build
-        ~/cmake-3.10.0-Linux-x86_64/bin/cmake -DCMAKE_PREFIX_PATH=/home/runner/ffmpeg-n5.1.3_installation .. 
+        ~/cmake-3.17.0-Linux-x86_64/bin/cmake -DCMAKE_PREFIX_PATH=/home/runner/ffmpeg-n5.1.3_installation .. 
     
     - name: Build just tests
       run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 79c938d677..ca00843f18 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.10.0)
+cmake_minimum_required(VERSION 3.17.0)
 
 project(dlib_project)
 
diff --git a/dlib/CMakeLists.txt b/dlib/CMakeLists.txt
index d1ac826ce3..04b5d92059 100644
--- a/dlib/CMakeLists.txt
+++ b/dlib/CMakeLists.txt
@@ -4,7 +4,7 @@
 #
 
 
-cmake_minimum_required(VERSION 3.10.0)
+cmake_minimum_required(VERSION 3.17.0)
 
 set(CMAKE_DISABLE_SOURCE_CHANGES ON)
 set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
@@ -13,7 +13,7 @@ if(POLICY CMP0077)
    cmake_policy(SET CMP0077 NEW)
 endif()
 
-project(dlib)
+project(dlib LANGUAGES C CXX)
 
 set(CPACK_PACKAGE_NAME "dlib")
 set(CPACK_PACKAGE_VERSION_MAJOR "20")
@@ -26,6 +26,7 @@ if (NOT TARGET dlib)
    message(STATUS "Compiling dlib version: ${VERSION}")
 endif()
 
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_utils)
 
 include(cmake_utils/set_compiler_specific_options.cmake)
 
@@ -104,17 +105,6 @@ elseif(BUILD_SHARED_LIBS)
    endif()
 endif()
 
-
-if (CMAKE_VERSION VERSION_LESS "3.9.0")
-   # Set only because there are old target_link_libraries() statements in the
-   # FindCUDA.cmake file that comes with CMake that error out if the new behavior
-   # is used.  In newer versions of CMake we can instead set CUDA_LINK_LIBRARIES_KEYWORD which fixes this issue.
-   cmake_policy(SET CMP0023 OLD)
-else()
-   set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC)
-endif()
-
-
 macro (enable_preprocessor_switch option_name)
    list(APPEND active_preprocessor_switches "-D${option_name}")
 endmacro()
@@ -249,7 +239,6 @@ if (NOT TARGET dlib)
       option(DLIB_USE_BLAS ${DLIB_USE_BLAS_STR} ON)
       option(DLIB_USE_LAPACK ${DLIB_USE_LAPACK_STR} ON)
       option(DLIB_USE_CUDA ${DLIB_USE_CUDA_STR} ON)
-      set(DLIB_USE_CUDA_COMPUTE_CAPABILITIES 50 CACHE STRING ${DLIB_USE_CUDA_COMPUTE_CAPABILITIES_STR})
       option(DLIB_PNG_SUPPORT ${DLIB_PNG_SUPPORT_STR} ON)
       option(DLIB_GIF_SUPPORT ${DLIB_GIF_SUPPORT_STR} ON)
       option(DLIB_WEBP_SUPPORT ${DLIB_WEBP_SUPPORT_STR} ON)
@@ -649,178 +638,75 @@ if (NOT TARGET dlib)
 
 
       if (DLIB_USE_CUDA)
-         find_package(CUDA 7.5)
-
-         if (CUDA_VERSION VERSION_GREATER 9.1 AND CMAKE_VERSION VERSION_LESS 3.12.2)
-            # This bit of weirdness is to work around a bug in cmake 
-            list(REMOVE_ITEM CUDA_CUBLAS_LIBRARIES "CUDA_cublas_device_LIBRARY-NOTFOUND")
-         endif()
-
-
-         if (CUDA_FOUND AND MSVC AND NOT CUDA_CUBLAS_LIBRARIES AND "${CMAKE_SIZEOF_VOID_P}" EQUAL "4")
-            message(WARNING "You have CUDA installed, but we can't use it unless you put visual studio in 64bit mode.")
-            set(CUDA_FOUND 0)
-         endif()
-
-         if (NOT CUDA_CUBLAS_LIBRARIES)
-            message(STATUS "Found CUDA, but CMake was unable to find the cuBLAS libraries that should be part of every basic CUDA "
-               "install. Your CUDA install is somehow broken or incomplete. Since cuBLAS is required for dlib to use CUDA we won't use CUDA.")
-            set(CUDA_FOUND 0)
-         endif()
-
-         if (CUDA_FOUND)
-
-            # There is some bug in cmake that causes it to mess up the
-            # -std=c++11 option if you let it propagate it to nvcc in some
-            # cases.  So instead we disable this and manually include
-            # things from CMAKE_CXX_FLAGS in the CUDA_NVCC_FLAGS list below.
-            if (APPLE)
-               set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-               # Grab all the -D flags from CMAKE_CXX_FLAGS so we can pass them
-               # to nvcc.
-               string(REGEX MATCHALL "-D[^ ]*" FLAGS_FOR_NVCC "${CMAKE_CXX_FLAGS}")
-
-               # Check if we are being built as part of a pybind11 module. 
-               if (COMMAND pybind11_add_module)
-                  # Don't export unnecessary symbols.
-                  list(APPEND FLAGS_FOR_NVCC "-Xcompiler=-fvisibility=hidden")
-               endif()
-            endif()
-
-            set(CUDA_HOST_COMPILATION_CPP ON)
-            string(REPLACE "," ";" DLIB_CUDA_COMPUTE_CAPABILITIES ${DLIB_USE_CUDA_COMPUTE_CAPABILITIES})
-            foreach(CAP ${DLIB_CUDA_COMPUTE_CAPABILITIES})
-                list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_${CAP},code=[sm_${CAP},compute_${CAP}]")
-            endforeach()
-            # Note that we add __STRICT_ANSI__ to avoid freaking out nvcc with gcc specific
-            # magic in the standard C++ header files (since nvcc uses gcc headers on linux).
-            list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__;-D_MWAITXINTRIN_H_INCLUDED;-D_FORCE_INLINES;${FLAGS_FOR_NVCC}")
-            list(APPEND CUDA_NVCC_FLAGS ${active_preprocessor_switches})
-            if (NOT DLIB_IN_PROJECT_BUILD)
-               LIST(APPEND CUDA_NVCC_FLAGS -DDLIB__CMAKE_GENERATED_A_CONFIG_H_FILE)
-            endif()
-            if (NOT MSVC)
-               list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
-            endif()
-            if (CMAKE_POSITION_INDEPENDENT_CODE)
-               # sometimes this setting isn't propagated to NVCC, which then causes the
-               # compile to fail.  So make sure it's propagated.
-               if (NOT MSVC) # Visual studio doesn't have -fPIC so don't do it in that case.
-                  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
-               endif()
+         find_package(CUDAToolkit)
+      
+         if (CUDAToolkit_FOUND AND CUDAToolkit_NVCC_EXECUTABLE)
+            set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE})
+
+            # Set USER_DID_NOT_SPECIFY_WHAT_CUDA_ARCH_TO_USE before calling
+            # enable_language(CUDA) because enable_language() sets
+            # CMAKE_CUDA_ARCHITECTURES to a default that isn't especially
+            # helpful for most users in newer cmake versions.  E.g. it picks
+            # the oldest supported arch the cuda toolkit you have can build for
+            # which is often so old your GPU can't actually run the resulting
+            # kernels.
+            set(USER_DID_NOT_SPECIFY_WHAT_CUDA_ARCH_TO_USE FALSE)
+            if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND (NOT DEFINED ENV{CUDAARCHS} OR "$ENV{CUDAARCHS}" STREQUAL ""))
+               set(USER_DID_NOT_SPECIFY_WHAT_CUDA_ARCH_TO_USE TRUE)
             endif()
 
-            include(cmake_utils/test_for_cudnn/find_cudnn.txt)
+            enable_language(CUDA)
 
-            if (cudnn AND cudnn_include AND NOT DEFINED cuda_test_compile_worked AND NOT DEFINED cudnn_test_compile_worked)
-               # make sure cuda is really working by doing a test compile
-               message(STATUS "Building a CUDA test project to see if your compiler is compatible with CUDA...")
-
-               set(CUDA_TEST_CMAKE_FLAGS 
-                  "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}"
-                  "-DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}"
-                  "-DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}")
-
-               if (NOT MSVC) # see https://github.com/davisking/dlib/issues/363
-                  list(APPEND CUDA_TEST_CMAKE_FLAGS "-DCUDA_HOST_COMPILER=${CUDA_HOST_COMPILER}")
-               endif()
-
-               try_compile(cuda_test_compile_worked 
-                  ${PROJECT_BINARY_DIR}/cuda_test_build 
-                  ${PROJECT_SOURCE_DIR}/cmake_utils/test_for_cuda cuda_test
-                  CMAKE_FLAGS ${CUDA_TEST_CMAKE_FLAGS}
-                  OUTPUT_VARIABLE try_compile_output_message
-                  )
-               if (NOT cuda_test_compile_worked)
-                  string(REPLACE "\n" "\n   ***   " try_compile_output_message "${try_compile_output_message}")
-                  message(STATUS "*****************************************************************************************************************")
-                  message(STATUS "*** CUDA was found but your compiler failed to compile a simple CUDA program so dlib isn't going to use CUDA. ")
-                  message(STATUS "*** The output of the failed CUDA test compile is shown below: ")
-                  message(STATUS "*** ")
-                  message(STATUS "***   ${try_compile_output_message}")
-                  message(STATUS "*****************************************************************************************************************")
+            # If the user didn't say what cuda arch they want to use try to pick something reasonable
+            if(USER_DID_NOT_SPECIFY_WHAT_CUDA_ARCH_TO_USE)
+               if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
+                  # Auto-detect host GPU(s); safest default on modern CMake.
+                  set(CMAKE_CUDA_ARCHITECTURES native)  # requires CMake ≥ 3.24
                else()
-                  message(STATUS "Building a cuDNN test project to check if you have the right version of cuDNN installed...")
-                  try_compile(cudnn_test_compile_worked 
-                     ${PROJECT_BINARY_DIR}/cudnn_test_build 
-                     ${PROJECT_SOURCE_DIR}/cmake_utils/test_for_cudnn cudnn_test
-                     CMAKE_FLAGS ${CUDA_TEST_CMAKE_FLAGS}
-                     OUTPUT_VARIABLE try_compile_output_message
-                     )
-                  if (NOT cudnn_test_compile_worked)
-                     string(REPLACE "\n" "\n   ***   " try_compile_output_message "${try_compile_output_message}")
-                     message(STATUS "*****************************************************************************************************")
-                     message(STATUS "*** Found cuDNN, but we failed to compile the dlib/cmake_utils/test_for_cudnn project. ")
-                     message(STATUS "*** You either have an unsupported version of cuDNN or something is wrong with your cudDNN install.")
-                     message(STATUS "*** Since a functional cuDNN is not found DLIB WILL NOT USE CUDA. ")
-                     message(STATUS "*** The output of the failed test_for_cudnn build is: ")
-                     message(STATUS "*** ")
-                     message(STATUS "***   ${try_compile_output_message}")
-                     message(STATUS "*****************************************************************************************************")
+                  # Fallback by nvcc version to avoid asking for archs it doesn't know yet
+                  if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
+                     # CUDA 10.x and older
+                     set(CMAKE_CUDA_ARCHITECTURES 52;60;61;70;75)
+                  elseif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
+                     # CUDA 11.x
+                     set(CMAKE_CUDA_ARCHITECTURES 60;61;70;75;80;86)
+                  else()
+                     # CUDA 12.x+
+                     # (Keep this conservative; add 89/90 only on toolkits that support them.)
+                     set(CMAKE_CUDA_ARCHITECTURES 70;75;80;86;89;90)
                   endif()
                endif()
             endif()
 
-            # Find where cuSOLVER is since the FindCUDA cmake package doesn't
-            # bother to look for it in older versions of cmake.
-            if (NOT CUDA_cusolver_LIBRARY)
-               get_filename_component(cuda_blas_path "${CUDA_CUBLAS_LIBRARIES}" DIRECTORY)
-               find_library(CUDA_cusolver_LIBRARY cusolver HINTS ${cuda_blas_path})
-               # CUDA 10.1 doesn't install symbolic links to libcusolver.so in
-               # the usual place.  This is probably a bug in the cuda
-               # installer.  In any case, If we haven't found cusolver yet go
-               # look in the cuda install folder for it.  New versions of cmake
-               # do this correctly, but older versions need help.
-               if (NOT CUDA_cusolver_LIBRARY)
-                  find_library(CUDA_cusolver_LIBRARY cusolver HINTS 
-                     /usr/local/cuda/lib64/
-                     )
-               endif()
-               mark_as_advanced(CUDA_cusolver_LIBRARY)
-            endif()
-            # Also find OpenMP since cuSOLVER needs it.  Importantly, we only
-            # look for one to link to if our use of BLAS, specifically the
-            # Intel MKL, hasn't already decided what to use.  This is because
-            # it makes the MKL bug out if you link to another openmp lib other
-            # than Intel's when you use the MKL. I'm also not really sure when
-            # explicit linking to openmp became unnecessary, but for
-            # sufficiently older versions of cuda it was needed.  Then in
-            # versions of cmake newer than 3.11 linking to openmp started to
-            # mess up the switches passed to nvcc, so you can't just leave
-            # these "try to link to openmp" statements here going forward.  Fun
-            # times.
-            if (CUDA_VERSION VERSION_LESS "9.1" AND NOT openmp_libraries AND NOT MSVC AND NOT XCODE AND NOT APPLE)
-               find_package(OpenMP)
-               if (OPENMP_FOUND)
-                  set(openmp_libraries ${OpenMP_CXX_FLAGS}) 
-               else()
-                  message(STATUS "*** Didn't find OpenMP, which is required to use CUDA. ***")
-                  set(CUDA_FOUND 0)
+            find_package(CUDNN)
+
+            if(CUDNN_FOUND)
+               set(source_files ${source_files} 
+                  cuda/cuda_dlib.cu 
+                  cuda/cudnn_dlibapi.cpp
+                  cuda/cublas_dlibapi.cpp
+                  cuda/cusolver_dlibapi.cu
+                  cuda/curand_dlibapi.cpp
+                  cuda/cuda_data_ptr.cpp
+                  cuda/gpu_data.cpp
+                  )
+               list (APPEND dlib_needed_private_libraries CUDA::cublas)
+               list (APPEND dlib_needed_private_libraries ${CUDNN_LIBRARY_PATH})
+               list (APPEND dlib_needed_private_libraries CUDA::curand)
+               list (APPEND dlib_needed_private_libraries CUDA::cusolver)
+               list (APPEND dlib_needed_private_libraries CUDA::cudart)
+               if(openmp_libraries)
+                  list (APPEND dlib_needed_private_libraries ${openmp_libraries})
                endif()
-            endif()
-         endif()
 
-         if (CUDA_FOUND AND cudnn AND cuda_test_compile_worked AND cudnn_test_compile_worked AND cudnn_include)
-            set(source_files ${source_files} 
-               cuda/cuda_dlib.cu 
-               cuda/cudnn_dlibapi.cpp
-               cuda/cublas_dlibapi.cpp
-               cuda/cusolver_dlibapi.cu
-               cuda/curand_dlibapi.cpp
-               cuda/cuda_data_ptr.cpp
-               cuda/gpu_data.cpp
-               )
-            list (APPEND dlib_needed_private_libraries ${CUDA_CUBLAS_LIBRARIES})
-            list (APPEND dlib_needed_private_libraries ${cudnn})
-            list (APPEND dlib_needed_private_libraries ${CUDA_curand_LIBRARY})
-            list (APPEND dlib_needed_private_libraries ${CUDA_cusolver_LIBRARY})
-            list (APPEND dlib_needed_private_libraries ${CUDA_CUDART_LIBRARY})
-            if(openmp_libraries)
-               list (APPEND dlib_needed_private_libraries ${openmp_libraries})
-            endif()
-
-            include_directories(${cudnn_include})
-            message(STATUS "Enabling CUDA support for dlib.  DLIB WILL USE CUDA, compute capabilities: ${DLIB_CUDA_COMPUTE_CAPABILITIES}")
+               include_directories(${CUDAToolkit_INCLUDE_DIRS} ${CUDNN_INCLUDE_PATH})
+               message(STATUS "Enabling CUDA support for dlib.  DLIB WILL USE CUDA using cuda arch ${CMAKE_CUDA_ARCHITECTURES}.  If you don't want to use that arch set the CUDAARCHS env var or CMAKE_CUDA_ARCHITECTURES cmake variable.")
+            else()
+                set(DLIB_USE_CUDA OFF CACHE STRING ${DLIB_USE_BLAS_STR} FORCE )
+               toggle_preprocessor_switch(DLIB_USE_CUDA)
+               message(STATUS "DID NOT FIND CUDNN")
+               message(STATUS "Disabling CUDA support for dlib.  DLIB WILL NOT USE CUDA")
+               endif()
          else()
             set(DLIB_USE_CUDA OFF CACHE STRING ${DLIB_USE_BLAS_STR} FORCE )
             toggle_preprocessor_switch(DLIB_USE_CUDA)
@@ -875,15 +761,8 @@ if (NOT TARGET dlib)
          endif()
       endif()
 
-      # Tell CMake to build dlib via add_library()/cuda_add_library()
-      if (DLIB_USE_CUDA)
-         # The old cuda_add_library() command doesn't support CMake's newer dependency
-         # stuff, so we have to set the include path manually still, which we do here.
-         include_directories(${dlib_needed_public_includes})
-         cuda_add_library(dlib ${source_files} )
-      else()
-         add_library(dlib ${source_files} )
-      endif()
+      add_library(dlib ${source_files})
+
 
    endif ()  ##### end of if NOT DLIB_ISO_CPP_ONLY ##########################################################
 
diff --git a/dlib/cmake_utils/FindCUDNN.cmake b/dlib/cmake_utils/FindCUDNN.cmake
new file mode 100644
index 0000000000..c5d88ae6ab
--- /dev/null
+++ b/dlib/cmake_utils/FindCUDNN.cmake
@@ -0,0 +1,81 @@
+# Find the CUDNN libraries
+#
+# The following variables are optionally searched for defaults
+#  CUDNN_ROOT: Base directory where CUDNN is found
+#  CUDNN_INCLUDE_DIR: Directory where CUDNN header is searched for
+#  CUDNN_LIBRARY: Directory where CUDNN library is searched for
+#  CUDNN_STATIC: Are we looking for a static library? (default: no)
+#
+# The following are set after configuration is done:
+#  CUDNN_FOUND
+#  CUDNN_INCLUDE_PATH
+#  CUDNN_LIBRARY_PATH
+#
+
+include(FindPackageHandleStandardArgs)
+
+set(CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} CACHE PATH "Folder containing NVIDIA cuDNN")
+if (DEFINED $ENV{CUDNN_ROOT_DIR})
+  message(WARNING "CUDNN_ROOT_DIR is deprecated. Please set CUDNN_ROOT instead.")
+endif()
+list(APPEND CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
+
+# Compatible layer for CMake <3.12. CUDNN_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
+list(APPEND CMAKE_PREFIX_PATH ${CUDNN_ROOT})
+
+set(CUDNN_INCLUDE_DIR $ENV{CUDNN_INCLUDE_DIR} CACHE PATH "Folder containing NVIDIA cuDNN header files")
+
+set(CUDA_VERSION "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
+
+find_path(CUDNN_INCLUDE_PATH cudnn.h
+  HINTS ${CUDNN_INCLUDE_DIR} ENV CUDNN_INCLUDE_DIR  ENV CUDNN_HOME
+  PATHS /usr/local /usr/local/cuda "C:/Program Files/NVIDIA/CUDNN/*/include/${CUDA_VERSION}" "C:/Program Files/NVIDIA/CUDNN/*/include/*" ENV CPATH
+  PATH_SUFFIXES cuda/include cuda include)
+
+option(CUDNN_STATIC "Look for static CUDNN" OFF)
+if (CUDNN_STATIC)
+  set(CUDNN_LIBNAME "libcudnn_static.a")
+else()
+  set(CUDNN_LIBNAME "cudnn")
+endif()
+
+set(CUDNN_LIBRARY $ENV{CUDNN_LIBRARY} CACHE PATH "Path to the cudnn library file (e.g., libcudnn.so)")
+if (CUDNN_LIBRARY MATCHES ".*cudnn_static.a" AND NOT CUDNN_STATIC)
+  message(WARNING "CUDNN_LIBRARY points to a static library (${CUDNN_LIBRARY}) but CUDNN_STATIC is OFF.")
+endif()
+
+find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME}
+  PATHS ${CUDNN_LIBRARY} /usr/local /usr/local/cuda  "C:/Program Files/NVIDIA/CUDNN/*/lib/${CUDA_VERSION}/x64" "C:/Program Files/NVIDIA/CUDNN/*/lib/${CUDA_VERSION}" "C:/Program Files/NVIDIA/CUDNN/*/lib/*" ENV LD_LIBRARY_PATH
+  PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
+
+find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY_PATH CUDNN_INCLUDE_PATH)
+
+if(CUDNN_FOUND)
+  # Get cuDNN version
+  if(EXISTS ${CUDNN_INCLUDE_PATH}/cudnn_version.h)
+    file(READ ${CUDNN_INCLUDE_PATH}/cudnn_version.h CUDNN_HEADER_CONTENTS)
+  else()
+    file(READ ${CUDNN_INCLUDE_PATH}/cudnn.h CUDNN_HEADER_CONTENTS)
+  endif()
+  string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+               CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+               CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
+  string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+               CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+               CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
+  string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+               CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+               CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
+  # Assemble cuDNN version
+  if(NOT CUDNN_VERSION_MAJOR)
+    set(CUDNN_VERSION "?")
+  else()
+    set(CUDNN_VERSION
+        "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
+  endif()
+endif()
+
+mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY CUDNN_VERSION)
diff --git a/dlib/cmake_utils/dlibConfig.cmake.in b/dlib/cmake_utils/dlibConfig.cmake.in
index 2667a2e718..4f7ea1ee28 100644
--- a/dlib/cmake_utils/dlibConfig.cmake.in
+++ b/dlib/cmake_utils/dlibConfig.cmake.in
@@ -35,6 +35,10 @@ set(dlib_LIBRARIES dlib::dlib)
 set(dlib_LIBS      dlib::dlib)
 set(dlib_INCLUDE_DIRS "@CMAKE_INSTALL_FULL_INCLUDEDIR@" "@dlib_needed_includes@")
 
+if (@DLIB_USE_CUDA@)
+   find_package(CUDAToolkit)
+endif()
+
 mark_as_advanced(dlib_LIBRARIES)
 mark_as_advanced(dlib_LIBS)
 mark_as_advanced(dlib_INCLUDE_DIRS)
diff --git a/dlib/cmake_utils/test_for_cuda/CMakeLists.txt b/dlib/cmake_utils/test_for_cuda/CMakeLists.txt
deleted file mode 100644
index f5b3b95964..0000000000
--- a/dlib/cmake_utils/test_for_cuda/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-
-cmake_minimum_required(VERSION 3.10.0)
-project(cuda_test)
-
-include_directories(../../cuda)
-add_definitions(-DDLIB_USE_CUDA)
-
-# Override the FindCUDA.cmake setting to avoid duplication of host flags if using a toolchain:
-option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-find_package(CUDA 7.5 REQUIRED)
-set(CUDA_HOST_COMPILATION_CPP ON)
-list(APPEND CUDA_NVCC_FLAGS "-arch=sm_50;-std=c++14;-D__STRICT_ANSI__;-D_MWAITXINTRIN_H_INCLUDED;-D_FORCE_INLINES")
-
-cuda_add_library(cuda_test STATIC cuda_test.cu )
diff --git a/dlib/cmake_utils/test_for_cuda/cuda_test.cu b/dlib/cmake_utils/test_for_cuda/cuda_test.cu
deleted file mode 100644
index fb1ffe0dad..0000000000
--- a/dlib/cmake_utils/test_for_cuda/cuda_test.cu
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (C) 2015  Davis E. King (davis@dlib.net)
-// License: Boost Software License   See LICENSE.txt for the full license.
-
-#include "cuda_utils.h"
-#include "cuda_dlib.h"
-
-
-// ------------------------------------------------------------------------------------
-
-__global__ void cuda_add_arrays(const float* a, const float* b, float* out, size_t n)
-{
-   out[0] += a[0]+b[0];
-}
-
-void add_arrays()
-{
-   cuda_add_arrays<<<512,512>>>(0,0,0,0);
-}
-
-// ------------------------------------------------------------------------------------
-
diff --git a/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt b/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
deleted file mode 100644
index 3d748eb562..0000000000
--- a/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-
-cmake_minimum_required(VERSION 3.10.0)
-project(cudnn_test)
-
-# Override the FindCUDA.cmake setting to avoid duplication of host flags if using a toolchain:
-option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-find_package(CUDA 7.5 REQUIRED)
-set(CUDA_HOST_COMPILATION_CPP ON)
-list(APPEND CUDA_NVCC_FLAGS "-arch=sm_50;-std=c++14;-D__STRICT_ANSI__")
-add_definitions(-DDLIB_USE_CUDA)
-
-include(find_cudnn.txt)
-
-if (cudnn_include AND cudnn)
-   include_directories(${cudnn_include})
-   cuda_add_library(cudnn_test STATIC ../../cuda/cudnn_dlibapi.cpp ${cudnn} )
-   target_compile_features(cudnn_test PUBLIC cxx_std_14)
-endif()
diff --git a/dlib/cmake_utils/test_for_cudnn/find_cudnn.txt b/dlib/cmake_utils/test_for_cudnn/find_cudnn.txt
deleted file mode 100644
index b38d5f26da..0000000000
--- a/dlib/cmake_utils/test_for_cudnn/find_cudnn.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-
-message(STATUS "Looking for cuDNN install...")
-# Look for cudnn, we will look in the same place as other CUDA
-# libraries and also a few other places as well.
-find_path(cudnn_include cudnn.h
-    HINTS ${CUDA_INCLUDE_DIRS} ENV CUDNN_INCLUDE_DIR  ENV CUDNN_HOME
-    PATHS /usr/local /usr/local/cuda "C:/Program Files/NVIDIA/CUDNN/*/include/${CUDA_VERSION}" "C:/Program Files/NVIDIA/CUDNN/*/include/*" ENV CPATH
-    PATH_SUFFIXES include
-    )
-get_filename_component(cudnn_hint_path "${CUDA_CUBLAS_LIBRARIES}" PATH)
-find_library(cudnn cudnn
-    HINTS ${cudnn_hint_path} ENV CUDNN_LIBRARY_DIR  ENV CUDNN_HOME 
-    PATHS /usr/local /usr/local/cuda "C:/Program Files/NVIDIA/CUDNN/*/lib/${CUDA_VERSION}" "C:/Program Files/NVIDIA/CUDNN/*/lib/*" ENV LD_LIBRARY_PATH
-    PATH_SUFFIXES lib64 lib x64
-    )
-mark_as_advanced(cudnn cudnn_include)
-
-if (cudnn AND cudnn_include)
-    message(STATUS "Found cuDNN: " ${cudnn})
-else()
-    message(STATUS "*** cuDNN V5.0 OR GREATER NOT FOUND.                                                       ***")
-    message(STATUS "*** Dlib requires cuDNN V5.0 OR GREATER.  Since cuDNN is not found DLIB WILL NOT USE CUDA. ***")
-    message(STATUS "*** If you have cuDNN then set CMAKE_PREFIX_PATH to include cuDNN's folder.                ***")
-endif()
diff --git a/tools/python/dlib/__init__.py.in b/tools/python/dlib/__init__.py.in
index 4a289ed5e0..6aff592629 100644
--- a/tools/python/dlib/__init__.py.in
+++ b/tools/python/dlib/__init__.py.in
@@ -8,13 +8,22 @@ def add_lib_to_dll_path(path):
     """
     try:
         import os
-        os.add_dll_directory(os.path.join(os.path.dirname(path), '../../bin'))
+
+        # Work out where the various bin folders that hold nvidia's dlls are.
+        lib_folder = os.path.dirname(path)
+        dll_folder = lib_folder.replace("/lib/", "/bin/")
+
+        if os.path.isdir(dll_folder):
+            os.add_dll_directory(dll_folder)
+        else:
+            os.add_dll_directory(os.path.dirname(dll_folder))
     except (AttributeError,KeyError,FileNotFoundError):
         pass
 
 if '@DLIB_USE_CUDA@' == 'ON':
-    add_lib_to_dll_path('@cudnn@')
-    add_lib_to_dll_path('@CUDA_CUDART_LIBRARY@')
+    add_lib_to_dll_path('@CUDNN_LIBRARY_PATH@')
+    add_lib_to_dll_path('@CUDA_CUDART@')
+
 
 from _dlib_pybind11 import *
 from _dlib_pybind11 import __version__, __time_compiled__