CUDA problem while using tdvp

Hi,

I have a somewhat bizarre problem while using tdvp with cuda:

psi = cu(psi)
psi1 = cu(psi1)
H = cu(H)
for nt = 1:1000
    @show nt
    global psi = tdvp(H, dt, psi; time_step=dt)
    global psi = psi / inner(psi1, psi)
end

After the code runs smoothly for the first 371 time steps, it output the following error (see the end of the post for the full message):

nt = 371
After sweep 1: maxlinkdim=448 maxerr=9.97E-13 current_time=0.01 time=227.93
ERROR: LoadError: UndefVarError: `data` not defined in `NDTensorsCUDAExt`
Stacktrace:
  [1] any(f::Function, E::NDTensors.Expose.Exposed{CuArray{ComplexF64, 1, CUDA.DeviceMemory}, NDTensors.DenseTensor{ComplexF64, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{ComplexF64, CuArray{ComplexF64, 1, CUDA.DeviceMemory}}}})
    @ NDTensorsCUDAExt ~/.julia/packages/NDTensors/zHdA1/ext/NDTensorsCUDAExt/indexing.jl:19

I am very lost at debugging this, since the first 371 time steps run perfectly well with no problems.

Any idea what this is about?

Thanks,
Zhen

The full error message:

nt = 371
After sweep 1: maxlinkdim=448 maxerr=9.97E-13 current_time=0.01 time=227.93
ERROR: LoadError: UndefVarError: `data` not defined in `NDTensorsCUDAExt`
Stacktrace:
  [1] any(f::Function, E::NDTensors.Expose.Exposed{CuArray{ComplexF64, 1, CUDA.DeviceMemory}, NDTensors.DenseTensor{ComplexF64, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{ComplexF64, CuArray{ComplexF64, 1, CUDA.DeviceMemory}}}})
    @ NDTensorsCUDAExt ~/.julia/packages/NDTensors/zHdA1/ext/NDTensorsCUDAExt/indexing.jl:19
  [2] svd(T::NDTensors.DenseTensor{ComplexF64, 2, Tuple{Index{Int64}, Index{Int64}}, NDTensors.Dense{ComplexF64, CuArray{ComplexF64, 1, CUDA.DeviceMemory}}}; mindim::Int64, maxdim::Int64, cutoff::Float64, use_absolute_cutoff::Nothing, use_relative_cutoff::Nothing, alg::Nothing, min_blockdim::Nothing)
    @ NDTensors ~/.julia/packages/NDTensors/zHdA1/src/linearalgebra/linearalgebra.jl:128
  [3] svd(A::ITensor, Linds::Tuple{Index{Int64}, Index{Int64}, Index{Int64}}; leftdir::Nothing, rightdir::Nothing, lefttags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4}, righttags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4}, mindim::Int64, maxdim::Int64, cutoff::Float64, alg::Nothing, use_absolute_cutoff::Nothing, use_relative_cutoff::Nothing, min_blockdim::Nothing, utags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4}, vtags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4})
    @ ITensors ~/.julia/packages/ITensors/Zs2nC/src/tensor_operations/matrix_decomposition.jl:162
  [4] svd
    @ ~/.julia/packages/ITensors/Zs2nC/src/tensor_operations/matrix_decomposition.jl:110 [inlined]
  [5] factorize_svd(A::ITensor, Linds::Tuple{Index{Int64}, Index{Int64}, Index{Int64}}; singular_values!::Nothing, ortho::String, alg::Nothing, dir::Nothing, mindim::Int64, maxdim::Int64, cutoff::Float64, tags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4}, use_absolute_cutoff::Nothing, use_relative_cutoff::Nothing, min_blockdim::Nothing)
    @ ITensors ~/.julia/packages/ITensors/Zs2nC/src/tensor_operations/matrix_decomposition.jl:615
  [6] factorize(A::ITensor, Linds::Tuple{Index{Int64}, Index{Int64}, Index{Int64}}; mindim::Int64, maxdim::Int64, cutoff::Float64, ortho::String, tags::ITensors.TagSets.GenericTagSet{BitIntegers.UInt256, 4}, plev::Nothing, which_decomp::Nothing, eigen_perturbation::Nothing, svd_alg::Nothing, use_absolute_cutoff::Nothing, use_relative_cutoff::Nothing, min_blockdim::Nothing, singular_values!::Nothing, dir::Nothing)
    @ ITensors ~/.julia/packages/ITensors/Zs2nC/src/tensor_operations/matrix_decomposition.jl:809
  [7] replacebond!(M::MPS, b::Int64, phi::ITensor; normalize::Bool, swapsites::Nothing, ortho::String, which_decomp::Nothing, mindim::Int64, maxdim::Int64, cutoff::Float64, eigen_perturbation::Nothing, svd_alg::Nothing, use_absolute_cutoff::Nothing, use_relative_cutoff::Nothing, min_blockdim::Nothing)
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/mps.jl:563
  [8] replacebond!
    @ ~/.julia/packages/ITensorMPS/hghpU/src/mps.jl:534 [inlined]
  [9] region_update!(nsite_val::Val{2}, reverse_step_val::Val{true}, reduced_operator::ProjMPO, state::MPS, b::Int64; updater::typeof(ITensorMPS.exponentiate_updater), updater_kwargs::@NamedTuple{}, current_time::Float64, time_step::Float64, outputlevel::Int64, normalize::Bool, direction::Base.Order.ReverseOrdering{Base.Order.ForwardOrdering}, noise::Bool, which_decomp::Nothing, svd_alg::Nothing, cutoff::Float64, maxdim::Int64, mindim::Int64, maxtruncerr::Float64)
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/solvers/sweep_update.jl:419
 [10] region_update!(reduced_operator::ProjMPO, state::MPS, b::Int64; updater::Function, updater_kwargs::@NamedTuple{}, nsite::Int64, reverse_step::Bool, current_time::Float64, outputlevel::Int64, time_step::Float64, normalize::Bool, direction::Base.Order.ReverseOrdering{Base.Order.ForwardOrdering}, noise::Bool, which_decomp::Nothing, svd_alg::Nothing, cutoff::Float64, maxdim::Int64, mindim::Int64, maxtruncerr::Float64)
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/solvers/sweep_update.jl:187
 [11] sub_sweep_update(direction::Base.Order.ReverseOrdering{Base.Order.ForwardOrdering}, reduced_operator::ProjMPO, state::MPS; updater::Function, updater_kwargs::@NamedTuple{}, which_decomp::Nothing, svd_alg::Nothing, sweep::Int64, current_time::Float64, time_step::Float64, nsite::Int64, reverse_step::Bool, normalize::Bool, observer!::ITensorMPS.EmptyObserver, outputlevel::Int64, maxdim::Int64, mindim::Int64, cutoff::Float64, noise::Bool)
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/solvers/sweep_update.jl:107
 [12] sweep_update(order::ITensorMPS.TDVPOrder{2, Base.Order.ForwardOrdering()}, reduced_operator::ProjMPO, state::MPS; current_time::Float64, time_step::Float64, kwargs::@Kwargs{updater::typeof(ITensorMPS.exponentiate_updater), updater_kwargs::@NamedTuple{}, nsite::Int64, reverse_step::Bool, sweep::Int64, observer!::ITensorMPS.EmptyObserver, normalize::Bool, outputlevel::Int64, maxdim::Int64, mindim::Int64, cutoff::Float64, noise::Bool})
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/solvers/sweep_update.jl:25
 [13] macro expansion
    @ ~/.julia/packages/ITensorMPS/hghpU/src/solvers/alternating_update.jl:69 [inlined]
 [14] macro expansion
    @ ./timing.jl:421 [inlined]
 [15] alternating_update(operator::MPO, init::MPS; updater::Function, updater_kwargs::@NamedTuple{}, nsweeps::Int64, checkdone::Returns{Bool}, write_when_maxdim_exceeds::Nothing, nsite::Int64, reverse_step::Bool, time_start::Float64, time_step::Float64, order::Int64, observer!::ITensorMPS.EmptyObserver, sweep_observer!::DataFrames.DataFrame, outputlevel::Int64, normalize::Bool, maxdim::Vector{Int64}, mindim::Int64, cutoff::Vector{Float64}, noise::Bool)
    @ ITensorMPS ~/.julia/packages/ITensorMPS/hghpU/src/solvers/alternating_update.jl:68
 [16] alternating_update
    @ ~/.julia/packages/ITensorMPS/hghpU/src/solvers/alternating_update.jl:23 [inlined]
 [17] #tdvp#767
    @ ~/.julia/packages/ITensorMPS/hghpU/src/solvers/tdvp.jl:86 [inlined]

That looks like a bug in our CUDA backend code, that should be a simple fix. We’ll look into it.

Actually, can you share the versions of the packages you have installed with using Pkg; Pkg.status() and also share a minimal code that we could run to reproduce that error (for example share a code that fully defines psi, psi1, H, dt, etc.)?

Dear Matthew,

Thanks for the quick response. Here is my julia Pkg status:

julia> Pkg.status()
Status `~/.julia/environments/v1.11/Project.toml`
  [336ed68f] CSV v0.10.15
⌃ [052768ef] CUDA v5.5.2
  [442a2c76] FastGaussQuadrature v1.0.2
  [0d1a4710] ITensorMPS v0.3.7
  [9136182c] ITensors v0.8.0
  [15e1cf62] NPZ v0.4.3
  [338f10d5] Observers v0.2.5
Info Packages marked with ⌃ have new versions available and may be upgradable.

And yes, I’ll do some cleanup and produce the minimal code that reproduces the bug.

Zhen

This should be fixed by [NDTensorsCUDAExt] Fix exposed version of any by mtfishman · Pull Request #1652 · ITensor/ITensors.jl · GitHub, please update your package versions and see if that fixes it for you.