Hello,
I’m running DMRG and time-dependent simulations with CUDA on GPU. run_dmrg() finishes much faster on the GPU than on the CPU, but tdvp_apply() (time evolution / TDVP) does not show the speedup — occasionally it’s slower on the GPU. Could you help me diagnose why TDVP isn’t benefiting from the GPU.
Thanks.
Here I am sharing code as follows:
using ITensors,ITensorMPS
using CUDA, BenchmarkTools
using ITensorTDVP
CUDA.allowscalar(false)
gpu(x) = cu(x)
function warmup_dmrg(H, psi0)
dmrg(H, psi0; nsweeps=1, maxdim=[50], cutoff=[1e-6], noise=[0.0])
return nothing
end
function run_dmrg(N;hx)
sites = siteinds("S=1/2", N; conserve_qns=false)
os = OpSum()
for j=1:N-1
os += 2,"Sz",j,"Sz",j+1
os += 1/2,"S+",j,"S-",j+1
os += 1/2,"S-",j,"S+",j+1
end
for j=1:N
os += -hx,"Sx",j
end
H_cpu = MPO(os,sites)
psi0_cpu = random_mps(sites;linkdims=10)
H = gpu(H_cpu)
psi0 = gpu(psi0_cpu)
nsweeps = 22
maxdim = [10,20,30,40,50,80,100,200]
cutoff = [1E-6,1E-7,1E-8,1E-9,1E-10,1E-12]
noise = [0.0]
warmup_dmrg(H, psi0)
t_gpu = @elapsed E0,psi = dmrg(H,psi0;nsweeps,maxdim,cutoff,noise)
println("GPU: N=$N, maxdim=$(last(maxdim)), Energy=$E0, Time=$t_gpu")
return H,E0,psi,sites,t_gpu
end
function tdvp_apply(N;H,psi,sites,hx,linkdim=20, tau, ttotal)
N = length(sites)
t=0
print("t ",t)
psi_t=psi
Nt = Int(floor(ttotal/tau))
t_TEBD = @elapsed for nstep in 1:Nt
t=t+tau
psi_t=tdvp(H,tau, psi_t; time_step=tau,nsteps=1,maxdim=800,cutoff=1e-6,normalize=true)
println("t $t")
end
println("TEBD Time = ", t_TEBD)
return nothing
end
for N in (100)
H,E,psi,sites,t_gpu = run_dmrg(N;hx=6)
tdvp_apply(N;H,psi,sites,hx=6,linkdim=50, tau=0.01, ttotal=0.1)
end