diff --git a/src/ptx.jl b/src/ptx.jl index a39524d1..37229f52 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -55,11 +55,21 @@ function llvm_machine(target::PTXCompilerTarget) end # the default datalayout does not match the one in the NVPTX user guide -llvm_datalayout(target::PTXCompilerTarget) = Int===Int64 ? - "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"* - "-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" : - "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"* - "-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +llvm_datalayout(target::PTXCompilerTarget) = + # little endian + "e-" * + # on 32-bit systems, use 32-bit pointers. + # on 64-bit systems, use 64-bit pointers, but prefer 32-bit indexing. + # this is not what the NVPTX user guide recommends, but helps for performance. + (Int === Int64 ? "p:64:64:64:32-" : "p:32:32:32-") * + # alignment of integer types + "i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" * + # alignment of floating point types + "f32:32:32-f64:64:64-" * + # alignment of vector types + "v16:16:16-v32:32:32-v64:64:64-v128:128:128-" * + # native integer widths + "n16:32:64" have_fma(@nospecialize(target::PTXCompilerTarget), T::Type) = true