CUDA Issue When Converting MPS to MPO

Hello,

I have some calculations that require I convert an MPS to an MPO.

My naive initial attempt to do this was to fill an MPO with copy tensors (the delta tensor) and then contract that with the MPS. This works on the CPU but when I try to convert my code to run on GPU I get the following error. Below is also an MWE.

Any ideas what I’ve done wrong here? I am using ITensors v0.3.49

# MWE.jl
using CUDA, ITensors

function main()

    gpu = cu

    d = 2
    N = 5
    n = d^N
    x = LinRange(1, 2, n)
    y = x .^ 2
    sinds = siteinds(d, N)
    ymps = gpu(MPS(y, sinds))
    ympo = MPO(N)
    for i = 1:N
        s = sinds[i]
        ympo[i] = δ(s, s', s'')
    end

    ympo = gpu(ympo)

    ympo = contract(ymps, ympo; alg="densitymatrix")
end

main()

Full stacktrace

┌ Warning: Performing scalar indexing on task Task (runnable) @0x00007f97c9dbc010.
│ Invocation of getindex resulted in scalar indexing of a GPU array.
│ This is typically caused by calling an iterating implementation of a method.
│ Such implementations *do not* execute on the GPU, but very slowly on the CPU,
│ and therefore are only permitted from the REPL for prototyping purposes.
│ If you did intend to index this array, annotate the caller with @allowscalar.
└ @ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:106
ERROR: LoadError: ArgumentError: cannot take the CPU address of a CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}
Stacktrace:
  [1] unsafe_convert(#unused#::Type{Ptr{Float64}}, x::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
    @ CUDA ~/.julia/packages/CUDA/nbRJk/src/array.jl:345
  [2] gemm!(transA::Char, transB::Char, alpha::Float64, A::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, B::Matrix{Float64}, beta::Float64, C::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
    @ LinearAlgebra.BLAS ~/projects/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:1524
  [3] gemm_wrapper!(C::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, tA::Char, tB::Char, A::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, B::Matrix{Float64}, _add::LinearAlgebra.MulAddMul{true, true, Float64, Float64})
    @ LinearAlgebra ~/projects/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/matmul.jl:674
  [4] mul!
    @ ~/projects/julia-1.9.2/share/julia/stdlib/v1.9/LinearAlgebra/src/matmul.jl:161 [inlined]
  [5] mul!(CM::NDTensors.Unwrap.Exposed{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, AM::NDTensors.Unwrap.Exposed{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, BM::NDTensors.Unwrap.Exposed{Matrix{Float64}, Matrix{Float64}}, α::Float64, β::Float64)
    @ NDTensors.Unwrap ~/.julia/packages/NDTensors/voe3z/src/Unwrap/src/functions/mul.jl:2
  [6] mul!!
    @ ~/.julia/packages/NDTensors/voe3z/src/abstractarray/mul.jl:2 [inlined]
  [7] mul!!
    @ ~/.julia/packages/NDTensors/voe3z/src/abstractarray/mul.jl:10 [inlined]
  [8] _contract!(CT::CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}, AT::Array{Float64, 3}, BT::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, props::NDTensors.ContractionProperties{3, 2, 3}, α::Float64, β::Float64)
    @ NDTensors ~/.julia/packages/NDTensors/voe3z/src/abstractarray/tensoralgebra/contract.jl:171
  [9] _contract!(CT::NDTensors.DenseTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Dense{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, AT::NDTensors.DenseTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Dense{Float64, Vector{Float64}}}, BT::NDTensors.DenseTensor{Float64, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, props::NDTensors.ContractionProperties{3, 2, 3}, α::Float64, β::Float64)
    @ NDTensors ~/.julia/packages/NDTensors/voe3z/src/dense/tensoralgebra/contract.jl:230
 [10] contract!
    @ ~/.julia/packages/NDTensors/voe3z/src/dense/tensoralgebra/contract.jl:213 [inlined]
 [11] contract!(C::NDTensors.DenseTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Dense{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Clabels::Tuple{Int64, Int64, Int64}, A::NDTensors.DiagTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Diag{Float64, Float64}}, Alabels::Tuple{Int64, Int64, Int64}, B::NDTensors.DenseTensor{Float32, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, Blabels::Tuple{Int64, Int64}, α::Float64, β::Float64; convert_to_dense::Bool)
    @ NDTensors ~/.julia/packages/NDTensors/voe3z/src/diag/tensoralgebra/contract.jl:126
 [12] contract!
    @ ~/.julia/packages/NDTensors/voe3z/src/diag/tensoralgebra/contract.jl:88 [inlined]
 [13] contract! (repeats 2 times)
    @ ~/.julia/packages/NDTensors/voe3z/src/diag/tensoralgebra/contract.jl:210 [inlined]
 [14] _contract!!
    @ ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:127 [inlined]
 [15] _contract!!
    @ ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:115 [inlined]
 [16] contract!!
    @ ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:176 [inlined]
 [17] contract!!
    @ ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:145 [inlined]
 [18] contract(tensor1::NDTensors.DenseTensor{Float32, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, labelstensor1::Tuple{Int64, Int64}, tensor2::NDTensors.DiagTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Diag{Float64, Float64}}, labelstensor2::Tuple{Int64, Int64, Int64}, labelsoutput_tensor::Tuple{Int64, Int64, Int64})
    @ NDTensors ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:98
 [19] contract(::Type{NDTensors.CanContract{NDTensors.DenseTensor{Float32, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, NDTensors.DiagTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Diag{Float64, Float64}}}}, tensor1::NDTensors.DenseTensor{Float32, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, labels_tensor1::Tuple{Int64, Int64}, tensor2::NDTensors.DiagTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Diag{Float64, Float64}}, labels_tensor2::Tuple{Int64, Int64, Int64})
    @ NDTensors ~/.julia/packages/NDTensors/voe3z/src/tensoralgebra/generic_tensor_operations.jl:76
 [20] contract
    @ ~/.julia/packages/SimpleTraits/l1ZsK/src/SimpleTraits.jl:331 [inlined]
 [21] _contract(A::NDTensors.DenseTensor{Float32, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, B::NDTensors.DiagTensor{Float64, 3, Tuple{Index{Int64}, Index{Int64}, Index{Int64}}, NDTensors.Diag{Float64, Float64}})
    @ ITensors ~/.julia/packages/ITensors/w2KHv/src/tensor_operations/tensor_algebra.jl:3
 [22] _contract(A::ITensor, B::ITensor)
    @ ITensors ~/.julia/packages/ITensors/w2KHv/src/tensor_operations/tensor_algebra.jl:9
 [23] contract(A::ITensor, B::ITensor)
    @ ITensors ~/.julia/packages/ITensors/w2KHv/src/tensor_operations/tensor_algebra.jl:104
 [24] *
    @ ~/.julia/packages/ITensors/w2KHv/src/tensor_operations/tensor_algebra.jl:91 [inlined]
 [25] contract(::NDTensors.Algorithm{:densitymatrix}, A::MPO, ψ::MPS; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ ITensors ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:715
 [26] contract
    @ ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:677 [inlined]
 [27] contract(A::MPO, ψ::MPS; alg::String, method::String, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ ITensors ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:615
 [28] contract
    @ ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:600 [inlined]
 [29] #contract#1004
    @ ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:664 [inlined]
 [30] contract
    @ ~/.julia/packages/ITensors/w2KHv/src/mps/mpo.jl:664 [inlined]
 [31] main()
    @ Main ~/projects/myscripts/MWE.jl:23
 [32] top-level scope
    @ ~/project/myscripts/MWE.jl:26
 [33] include(fname::String)
    @ Base.MainInclude ./client.jl:478
 [34] top-level scope
    @ REPL[2]:1
in expression starting at /path/to/my/project/myscripts/MWE.jl:26

Try this instead:

      ympo[i] = dense(δ(s, s', s''))

Basically the issue is that δ makes a tensor with a uniform diagonal value of 1, which isn’t being handled properly by our GPU backend. Basically internally our code is allocating it on CPU once you go to contract it with a GPU tensor, and contracting CPU tensors with GPU tensors isn’t defined right now which is leading to the error you are seeing. dense just makes that tensor dense to begin with so it gets transferred to GPU properly by gpu(ympo).

We are designing a better system right now to handle that kind of situation and make the code you wrote above “just work” but it will take some time.

@kmp5 this should get handled better by the new FillArrays-based design, good to keep in mind as something we want to check gets fixed.

1 Like

@Bob could you raise an issue about this on Github so we can keep track of it?

An example like:

using CUDA, ITensors
i, j, k, l = Index.((2, 2, 2, 2))
cu(randomITensor(k, l)) * cu(δ(i, j, k))

is sufficient to reproduce this bug.