@@ -53,6 +53,12 @@ if(CUDA_FOUND)
53
53
message (STATUS "CUDA detected: " ${CUDA_VERSION} )
54
54
55
55
set (_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing" )
56
+ set (_arch_fermi "2.0" )
57
+ set (_arch_kepler "3.0;3.5;3.7" )
58
+ set (_arch_maxwell "5.0;5.2" )
59
+ set (_arch_pascal "6.0;6.1" )
60
+ set (_arch_volta "7.0" )
61
+ set (_arch_turing "7.5" )
56
62
if (NOT CMAKE_CROSSCOMPILING )
57
63
list (APPEND _generations "Auto" )
58
64
endif ()
@@ -70,29 +76,57 @@ if(CUDA_FOUND)
70
76
unset (CUDA_ARCH_PTX CACHE )
71
77
endif ()
72
78
79
+ macro (ocv_filter_available_architecture result_list )
80
+ if (DEFINED CUDA_SUPPORTED_CC )
81
+ set (${result_list} "${CUDA_SUPPORTED_CC} " )
82
+ else ()
83
+ set (CC_LIST ${ARGN} )
84
+ foreach (target_arch ${CC_LIST} )
85
+ string (REPLACE "." "" target_arch_short ${target_arch} )
86
+ set (NVCC_OPTION "-gencode;arch=compute_${target_arch_short} ,code=sm_${target_arch_short} " )
87
+ execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${NVCC_OPTION} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu"
88
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
89
+ RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
90
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE )
91
+ if (_nvcc_res EQUAL 0 )
92
+ set (${result_list} "${${result_list} } ${target_arch} " )
93
+ endif ()
94
+ endforeach ()
95
+ string (STRIP ${${result_list}} ${result_list} )
96
+ set (CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability" )
97
+ endif ()
98
+ endmacro ()
99
+
100
+ macro (ocv_detect_native_cuda_arch status output )
101
+ execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" "--run"
102
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
103
+ RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output}
104
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE )
105
+ endmacro ()
106
+
107
+ macro (ocv_wipeout_deprecated _arch_bin_list )
108
+ string (REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} ${${_arch_bin_list}} )
109
+ endmacro ()
110
+
73
111
set (__cuda_arch_ptx "" )
74
112
if (CUDA_GENERATION STREQUAL "Fermi" )
75
- set (__cuda_arch_bin "2.0" )
113
+ set (__cuda_arch_bin ${_arch_fermi} )
76
114
elseif (CUDA_GENERATION STREQUAL "Kepler" )
77
- set (__cuda_arch_bin "3.0 3.5 3.7" )
115
+ set (__cuda_arch_bin ${_arch_kepler} )
78
116
elseif (CUDA_GENERATION STREQUAL "Maxwell" )
79
- set (__cuda_arch_bin "5.0 5.2" )
117
+ set (__cuda_arch_bin ${_arch_maxwell} )
80
118
elseif (CUDA_GENERATION STREQUAL "Pascal" )
81
- set (__cuda_arch_bin "6.0 6.1" )
119
+ set (__cuda_arch_bin ${_arch_pascal} )
82
120
elseif (CUDA_GENERATION STREQUAL "Volta" )
83
- set (__cuda_arch_bin "7.0" )
121
+ set (__cuda_arch_bin ${_arch_volta} )
84
122
elseif (CUDA_GENERATION STREQUAL "Turing" )
85
- set (__cuda_arch_bin "7.5" )
123
+ set (__cuda_arch_bin ${_arch_turing} )
86
124
elseif (CUDA_GENERATION STREQUAL "Auto" )
87
- execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" "--run"
88
- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
89
- RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
90
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE )
125
+ ocv_detect_native_cuda_arch (_nvcc_res _nvcc_out )
91
126
if (NOT _nvcc_res EQUAL 0 )
92
127
message (STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures." )
93
128
else ()
94
- set (__cuda_arch_bin "${_nvcc_out} " )
95
- string (REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin} " )
129
+ string (REGEX MATCHALL "[0-9]+\\ .[0-9]" __cuda_arch_bin "${_nvcc_out} " )
96
130
endif ()
97
131
endif ()
98
132
@@ -101,28 +135,26 @@ if(CUDA_FOUND)
101
135
set (__cuda_arch_bin "3.2" )
102
136
set (__cuda_arch_ptx "" )
103
137
elseif (AARCH64 )
104
- execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" "--run"
105
- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
106
- RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
107
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE )
138
+ ocv_detect_native_cuda_arch (_nvcc_res _nvcc_out )
108
139
if (NOT _nvcc_res EQUAL 0 )
109
140
message (STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures." )
110
141
set (__cuda_arch_bin "5.3 6.2 7.2" )
111
142
else ()
112
143
set (__cuda_arch_bin "${_nvcc_out} " )
113
- string (REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin} " )
114
144
endif ()
115
145
set (__cuda_arch_ptx "" )
116
146
else ()
117
- if (CUDA_VERSION VERSION_LESS "9.0" )
118
- set (__cuda_arch_bin "2.0 3.0 3.5 3.7 5.0 5.2 6.0 6.1" )
119
- elseif (CUDA_VERSION VERSION_LESS "10.0" )
120
- set (__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0" )
121
- else ()
122
- set (__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5" )
123
- endif ()
147
+ ocv_filter_available_architecture (__cuda_arch_bin
148
+ ${_arch_fermi}
149
+ ${_arch_kepler}
150
+ ${_arch_maxwell}
151
+ ${_arch_pascal}
152
+ ${_arch_volta}
153
+ ${_arch_turing}
154
+ )
124
155
endif ()
125
156
endif ()
157
+ ocv_wipeout_deprecated (__cuda_arch_bin )
126
158
127
159
set (CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported" )
128
160
set (CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for" )
0 commit comments