My testing code is(adapted from here and here)
using ITensors, ITensorMPS
using KrylovKit
using LinearAlgebra
using MKL
using Strided
using BenchmarkTools
include("fuse_inds.jl")
Strided.disable_threads()
ITensors.disable_threaded_blocksparse()
function heisenberg(n)
os = OpSum()
for j in 1:(n - 1)
os += 1 / 2, "S+", j, "S-", j + 1
os += 1 / 2, "S-", j, "S+", j + 1
os += "Sz", j, "Sz", j + 1
end
return os
end
# Sys.CPU_THREADS÷2 for physical cores
function main(n; blas_num_threads=Sys.CPU_THREADS÷2, fuse=true, fuse_detail="binary", qn=true)
BLAS.set_num_threads(blas_num_threads)
# Hilbert space
s = siteinds("S=1/2", n; conserve_qns=qn)
if qn
println("Using quantum numbers")
else
println("No quantum numbers")
end
H = MPO(heisenberg(n), s)
initstate(j) = isodd(j) ? "↑" : "↓"
ψ0 = random_mps(s, initstate; linkdims=10)
if fuse
if fuse_detail == "oneGo"
println("Fuse the indices using a full combiner, eigen")
cb = combiner(s'...)
cbp = combiner(dag(s)...)
print("Fusion time: ")
@disable_warn_order begin
H_full = @btime contract($H) * $cb * $cbp
end
print("Eigen time: ")
@btime eigen($H_full, combinedind($cb), combinedind($cbp); ishermitian=true)
elseif fuse_detail == "binary"
println("Fuse the indices using a binary tree")
T = fusion_tree_binary(s)
print("Fusion(MPO) time: ")
H_full = @btime fuse_inds_binary($H, $T)
print("Fusion(MPS) time: ")
ψ0_full = @btime fuse_inds_binary($ψ0, $T)
elseif fuse_detail == "unbalances"
println("Fuse the indices using an unbalances tree")
T = fusion_tree(s)
print("Fusion(MPO) time: ")
H_full = @btime fuse_inds($H, $T)
print("Fusion(MPS) time: ")
ψ0_full = @btime fuse_inds($ψ0, $T)
end
else
if fuse_detail == "oneGo"
println("Don't fuse the indices, using eigen")
print("Fusion time: ")
@disable_warn_order begin
H_full = @btime contract($H)
end
print("Eigen time: ")
@btime eigen($H_full, $s', dag($s); ishermitian=true)
else
println("Don't fuse the indices")
@disable_warn_order begin
print("Fusion(MPO) time: ")
H_full = @btime contract($H)
print("Fusion(MPS) time: ")
ψ0_full = @btime contract($ψ0)
end
end
end
if !(fuse_detail == "oneGo")
print("Eigsolve time: ")
vals, vecs, info = @btime eigsolve(
$H_full, $ψ0_full, 1, :SR; ishermitian=true, tol=1e-6, krylovdim=30, eager=true
)
end
println("-"^70)
end
# qn-enabled
main(12;fuse=true, fuse_detail="oneGo", qn=true)
main(12;fuse=true, fuse_detail="binary", qn=true)
main(12;fuse=true, fuse_detail="unbalances", qn=true)
main(12;fuse=false, fuse_detail="oneGo", qn=true)
main(12;fuse=false, fuse_detail="unbalances", qn=true)
# qn-disabled
main(12;fuse=true, fuse_detail="oneGo", qn=false)
main(12;fuse=true, fuse_detail="binary", qn=false)
main(12;fuse=true, fuse_detail="unbalances", qn=false)
main(12;fuse=false, fuse_detail="oneGo", qn=false)
main(12;fuse=false, fuse_detail="unbalances", qn=false)
And the results are:
Using quantum numbers
Fuse the indices using a full combiner, eigen
Fusion time: 1.085 s (17149695 allocations: 2.12 GiB)
Eigen time: 435.875 ms (2231 allocations: 167.00 MiB)
----------------------------------------------------------------------
Using quantum numbers
Fuse the indices using a binary tree
Fusion(MPO) time: 17.828 ms (32979 allocations: 80.74 MiB)
Fusion(MPS) time: 201.200 μs (5663 allocations: 827.81 KiB)
Eigsolve time: 5.522 ms (81413 allocations: 8.33 MiB)
----------------------------------------------------------------------
Using quantum numbers
Fuse the indices using an unbalances tree
Fusion(MPO) time: 48.329 ms (34647 allocations: 189.49 MiB)
Fusion(MPS) time: 115.900 μs (2939 allocations: 568.59 KiB)
Eigsolve time: 10.842 ms (86189 allocations: 8.87 MiB)
----------------------------------------------------------------------
Using quantum numbers
Don't fuse the indices, using eigen
Fusion time: 75.049 ms (262683 allocations: 318.43 MiB)
Eigen time: 1.473 s (17316922 allocations: 2.16 GiB)
----------------------------------------------------------------------
Using quantum numbers
Don't fuse the indices
Fusion(MPO) time: 130.517 ms (262683 allocations: 318.43 MiB)
Fusion(MPS) time: 1.502 ms (32489 allocations: 11.59 MiB)
Eigsolve time: 3.328 s (8056509 allocations: 2.31 GiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using a full combiner, eigen
Fusion time: 208.746 ms (877 allocations: 810.92 MiB)
Eigen time: 6.271 s (311 allocations: 1.25 GiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using a binary tree
Fusion(MPO) time: 99.222 ms (1550 allocations: 392.07 MiB)
Fusion(MPS) time: 39.800 μs (886 allocations: 288.69 KiB)
Eigsolve time: 144.600 ms (49954 allocations: 4.95 MiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using an unbalances tree
Fusion(MPO) time: 326.049 ms (1483 allocations: 1.00 GiB)
Fusion(MPS) time: 53.400 μs (829 allocations: 444.03 KiB)
Eigsolve time: 168.598 ms (51376 allocations: 5.08 MiB)
----------------------------------------------------------------------
No quantum numbers
Don't fuse the indices, using eigen
Fusion time: 114.034 ms (555 allocations: 554.84 MiB)
Eigen time: 7.940 s (820 allocations: 1.13 GiB)
----------------------------------------------------------------------
No quantum numbers
Don't fuse the indices
Fusion(MPO) time: 113.643 ms (555 allocations: 554.84 MiB)
Fusion(MPS) time: 62.900 μs (363 allocations: 254.78 KiB)
Eigsolve time: 3.058 s (222698 allocations: 4.32 GiB)
----------------------------------------------------------------------
Also, I’m not sure if it’s the right way to benchmark Julia code given its JIT features and something else.