JuliaGPU
diff --git a/‎lib/cutensor/src/interfaces.jl
Lines changed: 5 additions & 5 deletions b/‎lib/cutensor/src/interfaces.jl
Lines changed: 5 additions & 5 deletions
diff --git a/‎lib/cutensor/src/operations.jl
Lines changed: 79 additions & 33 deletions b/‎lib/cutensor/src/operations.jl
Lines changed: 79 additions & 33 deletions
diff --git a/‎lib/cutensor/src/types.jl
Lines changed: 0 additions & 2 deletions b/‎lib/cutensor/src/types.jl
Lines changed: 0 additions & 2 deletions
diff --git a/‎lib/cutensor/test/contractions.jl
Lines changed: 13 additions & 12 deletions b/‎lib/cutensor/test/contractions.jl
Lines changed: 13 additions & 12 deletions
@@ -6,15 +6,15 @@ function Base.:(+)(A::CuTensor, B::CuTensor)
     α = convert(eltype(A), 1.0)
     γ = convert(eltype(B), 1.0)
     C = similar(B)
-    elementwise_binary!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
+    elementwise_binary_execute!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
     C
 end
 
 function Base.:(-)(A::CuTensor, B::CuTensor)
     α = convert(eltype(A), 1.0)
     γ = convert(eltype(B), -1.0)
     C = similar(B)
-    elementwise_binary!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
+    elementwise_binary_execute!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
     C
 end
 
@@ -36,16 +36,16 @@ end
 using LinearAlgebra
 
 function LinearAlgebra.axpy!(a, X::CuTensor, Y::CuTensor)
-    elementwise_binary!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, one(eltype(Y)), Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
+    elementwise_binary_execute!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, one(eltype(Y)), Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
     return Y
 end
 
 function LinearAlgebra.axpby!(a, X::CuTensor, b, Y::CuTensor)
-    elementwise_binary!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, b, Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
+    elementwise_binary_execute!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, b, Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
     return Y
 end
 
 function LinearAlgebra.mul!(C::CuTensor, A::CuTensor, B::CuTensor)
-   contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR_OP_IDENTITY, zero(eltype(C)), C.data, C.inds, CUTENSOR_OP_IDENTITY, CUTENSOR_OP_IDENTITY)
+   contract!(one(eltype(C)), A.data, A.inds, CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR_OP_IDENTITY, zero(eltype(C)), C.data, C.inds, CUTENSOR_OP_IDENTITY, CUTENSOR_OP_IDENTITY)
    return C
 end
@@ -1,6 +1,3 @@
-export elementwise_binary!, elementwise_trinary!,
-       permutation!, contraction!, reduction!
-
 const ModeType = AbstractVector{<:Union{Char, Integer}}
 
 # remove the CUTENSOR_ prefix from some common enums,
@@ -13,7 +10,7 @@ const ModeType = AbstractVector{<:Union{Char, Integer}}
 is_unary(op::cutensorOperator_t) =  (op ∈ (OP_IDENTITY, OP_SQRT, OP_RELU, OP_CONJ, OP_RCP))
 is_binary(op::cutensorOperator_t) = (op ∈ (OP_ADD, OP_MUL, OP_MAX, OP_MIN))
 
-function elementwise_trinary!(
+function elementwise_trinary_execute!(
         @nospecialize(alpha::Number),
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(beta::Number),
@@ -43,12 +40,7 @@ function elementwise_trinary!(
         plan
     end
 
-    scalar_type = actual_plan.scalar_type
-    cutensorElementwiseTrinaryExecute(handle(), actual_plan,
-                                      Ref{scalar_type}(alpha), A,
-                                      Ref{scalar_type}(beta), B,
-                                      Ref{scalar_type}(gamma), C, D,
-                                      stream())
+    elementwise_trinary_execute!(actual_plan, alpha, A, beta, B, gamma, C, D)
 
     if plan === nothing
         CUDA.unsafe_free!(actual_plan)
@@ -57,6 +49,23 @@ function elementwise_trinary!(
     return D
 end
 
+function elementwise_trinary_execute!(plan::CuTensorPlan,
+                                      @nospecialize(alpha::Number),
+                                      @nospecialize(A::DenseCuArray),
+                                      @nospecialize(beta::Number),
+                                      @nospecialize(B::DenseCuArray),
+                                      @nospecialize(gamma::Number),
+                                      @nospecialize(C::DenseCuArray),
+                                      @nospecialize(D::DenseCuArray))
+    scalar_type = plan.scalar_type
+    cutensorElementwiseTrinaryExecute(handle(), plan,
+                                      Ref{scalar_type}(alpha), A,
+                                      Ref{scalar_type}(beta), B,
+                                      Ref{scalar_type}(gamma), C, D,
+                                      stream())
+    return D
+end
+
 function plan_elementwise_trinary(
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(B::DenseCuArray), Binds::ModeType, opB::cutensorOperator_t,
@@ -104,7 +113,7 @@ function plan_elementwise_trinary(
     CuTensorPlan(desc[], plan_pref[]; workspacePref=workspace)
 end
 
-function elementwise_binary!(
+function elementwise_binary_execute!(
         @nospecialize(alpha::Number),
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(gamma::Number),
@@ -130,11 +139,7 @@ function elementwise_binary!(
         plan
     end
 
-    scalar_type = actual_plan.scalar_type
-    cutensorElementwiseBinaryExecute(handle(), actual_plan,
-                                     Ref{scalar_type}(alpha), A,
-                                     Ref{scalar_type}(gamma), C, D,
-                                     stream())
+    elementwise_binary_execute!(actual_plan, alpha, A, gamma, C, D)
 
     if plan === nothing
         CUDA.unsafe_free!(actual_plan)
@@ -143,6 +148,20 @@ function elementwise_binary!(
     return D
 end
 
+function elementwise_binary_execute!(plan::CuTensorPlan,
+                                     @nospecialize(alpha::Number),
+                                     @nospecialize(A::DenseCuArray),
+                                     @nospecialize(gamma::Number),
+                                     @nospecialize(C::DenseCuArray),
+                                     @nospecialize(D::DenseCuArray))
+    scalar_type = plan.scalar_type
+    cutensorElementwiseBinaryExecute(handle(), plan,
+                                     Ref{scalar_type}(alpha), A,
+                                     Ref{scalar_type}(gamma), C, D,
+                                     stream())
+    return D
+end
+
 function plan_elementwise_binary(
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(C::DenseCuArray), Cinds::ModeType, opC::cutensorOperator_t,
@@ -183,7 +202,7 @@ function plan_elementwise_binary(
     CuTensorPlan(desc[], plan_pref[]; workspacePref=workspace)
 end
 
-function permutation!(
+function permute!(
         @nospecialize(alpha::Number),
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(B::DenseCuArray), Binds::ModeType;
@@ -206,10 +225,7 @@ function permutation!(
         plan
     end
 
-    scalar_type = actual_plan.scalar_type
-    cutensorPermute(handle(), actual_plan,
-                    Ref{scalar_type}(alpha), A, B,
-                    stream())
+    permute!(actual_plan, alpha, A, B)
 
     if plan === nothing
         CUDA.unsafe_free!(actual_plan)
@@ -218,6 +234,17 @@ function permutation!(
     return B
 end
 
+function permute!(plan::CuTensorPlan,
+                  @nospecialize(alpha::Number),
+                  @nospecialize(A::DenseCuArray),
+                  @nospecialize(B::DenseCuArray))
+    scalar_type = plan.scalar_type
+    cutensorPermute(handle(), plan,
+                    Ref{scalar_type}(alpha), A, B,
+                    stream())
+    return B
+end
+
 function plan_permutation(
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(B::DenseCuArray), Binds::ModeType;
@@ -249,7 +276,7 @@ function plan_permutation(
     CuTensorPlan(desc[], plan_pref[]; workspacePref=workspace)
 end
 
-function contraction!(
+function contract!(
         @nospecialize(alpha::Number),
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(B::DenseCuArray), Binds::ModeType, opB::cutensorOperator_t,
@@ -275,11 +302,7 @@ function contraction!(
         plan
     end
 
-    scalar_type = actual_plan.scalar_type
-    cutensorContract(handle(), actual_plan,
-                     Ref{scalar_type}(alpha), A, B,
-                     Ref{scalar_type}(beta),  C, C,
-                     actual_plan.workspace, sizeof(actual_plan.workspace), stream())
+    contract!(actual_plan, alpha, A, B, beta, C)
 
     if plan === nothing
         CUDA.unsafe_free!(actual_plan)
@@ -288,6 +311,20 @@ function contraction!(
     return C
 end
 
+function contract!(plan::CuTensorPlan,
+                   @nospecialize(alpha::Number),
+                   @nospecialize(A::DenseCuArray),
+                   @nospecialize(B::DenseCuArray),
+                   @nospecialize(beta::Number),
+                   @nospecialize(C::DenseCuArray))
+    scalar_type = plan.scalar_type
+    cutensorContract(handle(), plan,
+                     Ref{scalar_type}(alpha), A, B,
+                     Ref{scalar_type}(beta), C, C,
+                     plan.workspace, sizeof(plan.workspace), stream())
+    return C
+end
+
 function plan_contraction(
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(B::DenseCuArray), Binds::ModeType, opB::cutensorOperator_t,
@@ -330,7 +367,7 @@ function plan_contraction(
     CuTensorPlan(desc[], plan_pref[]; workspacePref=workspace)
 end
 
-function reduction!(
+function reduce!(
         @nospecialize(alpha::Number),
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(beta::Number),
@@ -353,11 +390,7 @@ function reduction!(
         plan
     end
 
-    scalar_type = actual_plan.scalar_type
-    cutensorReduce(handle(), actual_plan,
-                   Ref{scalar_type}(alpha), A,
-                   Ref{scalar_type}(beta),  C, C,
-                   actual_plan.workspace, sizeof(actual_plan.workspace), stream())
+    reduce!(actual_plan, alpha, A, beta, C)
 
     if plan === nothing
         CUDA.unsafe_free!(actual_plan)
@@ -366,6 +399,19 @@ function reduction!(
     return C
 end
 
+function reduce!(plan::CuTensorPlan,
+                 @nospecialize(alpha::Number),
+                 @nospecialize(A::DenseCuArray),
+                 @nospecialize(beta::Number),
+                 @nospecialize(C::DenseCuArray))
+    scalar_type = plan.scalar_type
+    cutensorReduce(handle(), plan,
+                   Ref{scalar_type}(alpha), A,
+                   Ref{scalar_type}(beta), C, C,
+                   plan.workspace, sizeof(plan.workspace), stream())
+    return C
+end
+
 function plan_reduction(
         @nospecialize(A::DenseCuArray), Ainds::ModeType, opA::cutensorOperator_t,
         @nospecialize(C::DenseCuArray), Cinds::ModeType, opC::cutensorOperator_t,
 
@@ -1,7 +1,5 @@
 ## data types
 
-export cutensorComputeDescriptorEnum
-
 @enum cutensorComputeDescriptorEnum begin
     COMPUTE_DESC_16F = 1
     COMPUTE_DESC_32F = 2
 
@@ -1,5 +1,7 @@
 @testset "contractions" begin
 
+using cuTENSOR: contract!, plan_contraction
+
 using LinearAlgebra
 
 eltypes = [(Float32, Float32, Float32, Float32),
@@ -52,7 +54,7 @@ eltypes = [(Float32, Float32, Float32, Float32),
         opB = cuTENSOR.OP_IDENTITY
         opC = cuTENSOR.OP_IDENTITY
         opOut = cuTENSOR.OP_IDENTITY
-        dC = contraction!(1, dA, indsA, opA, dB, indsB, opB, 0, dC, indsC, opC, opOut, compute_type=eltyCompute)
+        dC = contract!(1, dA, indsA, opA, dB, indsB, opB, 0, dC, indsC, opC, opOut, compute_type=eltyCompute)
         C = collect(dC)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         @test mC ≈ mA * mB rtol=compute_rtol
@@ -63,7 +65,7 @@ eltypes = [(Float32, Float32, Float32, Float32),
         opC = cuTENSOR.OP_IDENTITY
         opOut = cuTENSOR.OP_IDENTITY
         plan  = cuTENSOR.plan_contraction(dA, indsA, opA, dB, indsB, opB, dC, indsC, opC, opOut)
-        dC = contraction!(1, dA, indsA, opA, dB, indsB, opB, 0, dC, indsC, opC, opOut; plan)
+        dC = cuTENSOR.contract!(plan, 1, dA, dB, 0, dC)
         C = collect(dC)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         @test mC ≈ mA * mB
@@ -73,10 +75,9 @@ eltypes = [(Float32, Float32, Float32, Float32),
         opB = cuTENSOR.OP_IDENTITY
         opC = cuTENSOR.OP_IDENTITY
         opOut = cuTENSOR.OP_IDENTITY
-        eltypComputeEnum = convert(cutensorComputeDescriptorEnum, eltyCompute)
+        eltypComputeEnum = convert(cuTENSOR.cutensorComputeDescriptorEnum, eltyCompute)
         plan  = cuTENSOR.plan_contraction(dA, indsA, opA, dB, indsB, opB, dC, indsC, opC, opOut; compute_type=eltypComputeEnum)
-        dC = contraction!(1, dA, indsA, opA, dB, indsB, opB,
-                          0, dC, indsC, opC, opOut, plan=plan, compute_type=eltypComputeEnum)
+        dC = cuTENSOR.contract!(plan, 1, dA, dB, 0, dC)
         C = collect(dC)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         @test mC ≈ mA * mB rtol=compute_rtol
@@ -87,14 +88,14 @@ eltypes = [(Float32, Float32, Float32, Float32),
         opC = cuTENSOR.OP_IDENTITY
         opOut = cuTENSOR.OP_IDENTITY
         plan  = cuTENSOR.plan_contraction(dA, indsA, opA, dB, indsB, opB, dC, indsC, opC, opOut; jit=cuTENSOR.JIT_MODE_DEFAULT)
-        dC = contraction!(1, dA, indsA, opA, dB, indsB, opB, 0, dC, indsC, opC, opOut, plan=plan)
+        dC = cuTENSOR.contract!(plan, 1, dA, dB, 0, dC)
         C = collect(dC)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         @test mC ≈ mA * mB
 
         # with non-trivial α
         α = rand(eltyCompute)
-        dC = contraction!(α, dA, indsA, opA, dB, indsB, opB, zero(eltyCompute), dC, indsC, opC, opOut; compute_type=eltyCompute)
+        dC = contract!(α, dA, indsA, opA, dB, indsB, opB, zero(eltyCompute), dC, indsC, opC, opOut; compute_type=eltyCompute)
         C = collect(dC)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         @test mC ≈ α * mA * mB rtol=compute_rtol
@@ -105,7 +106,7 @@ eltypes = [(Float32, Float32, Float32, Float32),
         α = rand(eltyCompute)
         β = rand(eltyCompute)
         copyto!(dC, C)
-        dD = contraction!(α, dA, indsA, opA, dB, indsB, opB, β, dC, indsC, opC, opOut; compute_type=eltyCompute)
+        dD = contract!(α, dA, indsA, opA, dB, indsB, opB, β, dC, indsC, opC, opOut; compute_type=eltyCompute)
         D = collect(dD)
         mC = reshape(permutedims(C, ipC), (loA, loB))
         mD = reshape(permutedims(D, ipC), (loA, loB))
@@ -133,7 +134,7 @@ eltypes = [(Float32, Float32, Float32, Float32),
                 opA   = cuTENSOR.OP_CONJ
                 opB   = cuTENSOR.OP_IDENTITY
                 opOut = cuTENSOR.OP_IDENTITY
-                dC    = contraction!(complex(1.0, 0.0), dA, indsA, opA, dB, indsB, opB,
+                dC    = contract!(complex(1.0, 0.0), dA, indsA, opA, dB, indsB, opB,
                                                 0, dC, indsC, opC, opOut; compute_type=eltyCompute)
                 C     = collect(dC)
                 mC    = reshape(permutedims(C, ipC), (loA, loB))
@@ -143,8 +144,8 @@ eltypes = [(Float32, Float32, Float32, Float32),
                 opA = cuTENSOR.OP_IDENTITY
                 opB = cuTENSOR.OP_CONJ
                 opOut = cuTENSOR.OP_IDENTITY
-                dC = contraction!(complex(1.0, 0.0), dA, indsA, opA, dB, indsB, opB,
-                                            complex(0.0, 0.0), dC, indsC, opC, opOut; compute_type=eltyCompute)
+                dC = contract!(complex(1.0, 0.0), dA, indsA, opA, dB, indsB, opB,
+                               complex(0.0, 0.0), dC, indsC, opC, opOut; compute_type=eltyCompute)
                 C = collect(dC)
                 mC = reshape(permutedims(C, ipC), (loA, loB))
                 @test mC ≈ mA*conj(mB) rtol=compute_rtol
@@ -153,7 +154,7 @@ eltypes = [(Float32, Float32, Float32, Float32),
                 opA = cuTENSOR.OP_CONJ
                 opB = cuTENSOR.OP_CONJ
                 opOut = cuTENSOR.OP_IDENTITY
-                dC = contraction!(one(eltyCompute), dA, indsA, opA, dB, indsB, opB,
+                dC = contract!(one(eltyCompute), dA, indsA, opA, dB, indsB, opB,
                         zero(eltyCompute), dC, indsC, opC, opOut; compute_type=eltyCompute)
                 C = collect(dC)
                 mC = reshape(permutedims(C, ipC), (loA, loB))