JuliaGPU
diff --git a/‎.buildkite/pipeline.yml
Lines changed: 1 addition & 6 deletions b/‎.buildkite/pipeline.yml
Lines changed: 1 addition & 6 deletions
diff --git a/‎lib/cutensor/Project.toml
Lines changed: 2 additions & 2 deletions b/‎lib/cutensor/Project.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/cutensor/src/cuTENSOR.jl
Lines changed: 5 additions & 4 deletions b/‎lib/cutensor/src/cuTENSOR.jl
Lines changed: 5 additions & 4 deletions
diff --git a/‎lib/cutensor/src/error.jl
Lines changed: 2 additions & 0 deletions b/‎lib/cutensor/src/error.jl
Lines changed: 2 additions & 0 deletions
diff --git a/‎lib/cutensor/src/interfaces.jl
Lines changed: 13 additions & 4 deletions b/‎lib/cutensor/src/interfaces.jl
Lines changed: 13 additions & 4 deletions
@@ -139,12 +139,7 @@ steps:
             withenv("JULIA_PKG_PRECOMPILE_AUTO" => 0) do
               Pkg.instantiate()
 
-              pkgs = [PackageSpec(path=joinpath(pwd(), "lib", lowercase("{{matrix.package}}")))]
-              if "{{matrix.package}}" == "cuTensorNet"
-                # cuTensorNet depends on a development version of cuTENSOR
-                push!(pkgs, PackageSpec(path=joinpath(pwd(), "lib", "cutensor")))
-              end
-              Pkg.develop(pkgs)
+              Pkg.develop(path=joinpath(pwd(), "lib", lowercase("{{matrix.package}}")))
 
               write("LocalPreferences.toml", "[CUDA_Runtime_jll]\nversion = \"{{matrix.cuda}}\"")
             end
 
@@ -1,7 +1,7 @@
 name = "cuTENSOR"
 uuid = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 authors = ["Tim Besard <tim.besard@gmail.com>"]
-version = "1.2.1"
+version = "2.0"
 
 [deps]
 CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
@@ -14,6 +14,6 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 CEnum = "0.2, 0.3, 0.4"
 CUDA = "~5.1"
 CUDA_Runtime_Discovery = "0.2"
-CUTENSOR_jll = "~1.7"
+CUTENSOR_jll = "~2.0"
 julia = "1.6"
 LinearAlgebra = "1"
@@ -27,20 +27,21 @@ include("libcutensor.jl")
 
 # low-level wrappers
 include("error.jl")
-include("tensor.jl")
-include("wrappers.jl")
+include("utils.jl")
+include("types.jl")
+include("operations.jl")
 
 # high-level integrations
 include("interfaces.jl")
 
 # cache for created, but unused handles
-const idle_handles = HandleCache{CuContext,Ptr{cutensorHandle_t}}()
+const idle_handles = HandleCache{CuContext,cutensorHandle_t}()
 
 function handle()
     cuda = CUDA.active_state()
 
     # every task maintains library state per device
-    LibraryState = @NamedTuple{handle::Ptr{cutensorHandle_t}}
+    LibraryState = @NamedTuple{handle::cutensorHandle_t}
     states = get!(task_local_storage(), :cuTENSOR) do
         Dict{CuContext,LibraryState}()
     end::Dict{CuContext,LibraryState}
 
@@ -41,6 +41,8 @@ function description(err::CUTENSORError)
         "insufficient workspace memory for this operation"
     elseif err.code == CUTENSOR_STATUS_INSUFFICIENT_DRIVER
         "insufficient driver version"
+    elseif err.code == CUTENSOR_STATUS_IO_ERROR
+        "file not found"
     else
         "no description for this error"
     end
 
@@ -6,14 +6,16 @@ function Base.:(+)(A::CuTensor, B::CuTensor)
     α = convert(eltype(A), 1.0)
     γ = convert(eltype(B), 1.0)
     C = similar(B)
-    elementwiseBinary!(α, A, CUTENSOR_OP_IDENTITY, γ, B, CUTENSOR_OP_IDENTITY, C, CUTENSOR_OP_ADD)
+    elementwise_binary!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
+    C
 end
 
 function Base.:(-)(A::CuTensor, B::CuTensor)
     α = convert(eltype(A), 1.0)
     γ = convert(eltype(B), -1.0)
     C = similar(B)
-    elementwiseBinary!(α, A, CUTENSOR_OP_IDENTITY, γ, B, CUTENSOR_OP_IDENTITY, C, CUTENSOR_OP_ADD)
+    elementwise_binary!(α, A.data, A.inds, CUTENSOR_OP_IDENTITY, γ, B.data, B.inds, CUTENSOR_OP_IDENTITY, C.data, C.inds, CUTENSOR_OP_ADD)
+    C
 end
 
 function Base.:(*)(A::CuTensor, B::CuTensor)
@@ -33,8 +35,15 @@ end
 
 using LinearAlgebra
 
-LinearAlgebra.axpy!(a, X::CuTensor, Y::CuTensor) = elementwiseBinary!(a, X, CUTENSOR_OP_IDENTITY, one(eltype(Y)), Y, CUTENSOR_OP_IDENTITY, Y, CUTENSOR_OP_ADD)
-LinearAlgebra.axpby!(a, X::CuTensor, b, Y::CuTensor) = elementwiseBinary!(a, X, CUTENSOR_OP_IDENTITY, b, Y, CUTENSOR_OP_IDENTITY, Y, CUTENSOR_OP_ADD)
+function LinearAlgebra.axpy!(a, X::CuTensor, Y::CuTensor)
+    elementwise_binary!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, one(eltype(Y)), Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
+    return Y
+end
+
+function LinearAlgebra.axpby!(a, X::CuTensor, b, Y::CuTensor)
+    elementwise_binary!(a, X.data, X.inds, CUTENSOR_OP_IDENTITY, b, Y.data, Y.inds, CUTENSOR_OP_IDENTITY, Y.data, Y.inds, CUTENSOR_OP_ADD)
+    return Y
+end
 
 function LinearAlgebra.mul!(C::CuTensor, A::CuTensor, B::CuTensor)
    contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR_OP_IDENTITY, zero(eltype(C)), C.data, C.inds, CUTENSOR_OP_IDENTITY, CUTENSOR_OP_IDENTITY)