Comparative Analysis of Exact Diagonalization Strategies in ITensor: Index Fusion Techniques and Computational Efficiency

My testing code is(adapted from here and here)

using ITensors, ITensorMPS
using KrylovKit
using LinearAlgebra
using MKL
using Strided
using BenchmarkTools

include("fuse_inds.jl")

Strided.disable_threads()
ITensors.disable_threaded_blocksparse()

function heisenberg(n)
  os = OpSum()
  for j in 1:(n - 1)
    os += 1 / 2, "S+", j, "S-", j + 1
    os += 1 / 2, "S-", j, "S+", j + 1
    os += "Sz", j, "Sz", j + 1
  end
  return os
end

# Sys.CPU_THREADS÷2 for physical cores
function main(n; blas_num_threads=Sys.CPU_THREADS÷2, fuse=true, fuse_detail="binary", qn=true)

  BLAS.set_num_threads(blas_num_threads)

  # Hilbert space
  s = siteinds("S=1/2", n; conserve_qns=qn)
  if qn
    println("Using quantum numbers")
  else
    println("No quantum numbers")
  end
  H = MPO(heisenberg(n), s)
  initstate(j) = isodd(j) ? "↑" : "↓"
  ψ0 = random_mps(s, initstate; linkdims=10)

  if fuse
    if fuse_detail == "oneGo"
      println("Fuse the indices using a full combiner, eigen")
      cb = combiner(s'...)
      cbp = combiner(dag(s)...)
      print("Fusion time: ")
      @disable_warn_order begin
        H_full = @btime contract($H) * $cb * $cbp
      end
      print("Eigen time: ")
      @btime eigen($H_full, combinedind($cb), combinedind($cbp); ishermitian=true)
    elseif fuse_detail == "binary"
        println("Fuse the indices using a binary tree")
        T = fusion_tree_binary(s)
        print("Fusion(MPO) time: ")
        H_full = @btime fuse_inds_binary($H, $T)
        print("Fusion(MPS) time: ")
        ψ0_full = @btime fuse_inds_binary($ψ0, $T)
    elseif fuse_detail == "unbalances"
        println("Fuse the indices using an unbalances tree")
        T = fusion_tree(s)
        print("Fusion(MPO) time: ")
        H_full = @btime fuse_inds($H, $T)
        print("Fusion(MPS) time: ")
        ψ0_full = @btime fuse_inds($ψ0, $T)
    end
  else
    if fuse_detail == "oneGo"
      println("Don't fuse the indices, using eigen")
      print("Fusion time: ")
      @disable_warn_order begin
        H_full = @btime contract($H)
      end
      print("Eigen time: ")
      @btime eigen($H_full, $s', dag($s); ishermitian=true)
    else
      println("Don't fuse the indices")
      @disable_warn_order begin
        print("Fusion(MPO) time: ")
        H_full = @btime contract($H)
        print("Fusion(MPS) time: ")
        ψ0_full = @btime contract($ψ0)
      end
    end
  end

  if !(fuse_detail == "oneGo")
    print("Eigsolve time: ")
    vals, vecs, info = @btime eigsolve(
      $H_full, $ψ0_full, 1, :SR; ishermitian=true, tol=1e-6, krylovdim=30, eager=true
    )
  end
  println("-"^70)
end
# qn-enabled
main(12;fuse=true, fuse_detail="oneGo", qn=true)
main(12;fuse=true, fuse_detail="binary", qn=true)
main(12;fuse=true, fuse_detail="unbalances", qn=true)    
main(12;fuse=false, fuse_detail="oneGo", qn=true)        
main(12;fuse=false, fuse_detail="unbalances", qn=true)
# qn-disabled
main(12;fuse=true, fuse_detail="oneGo", qn=false)
main(12;fuse=true, fuse_detail="binary", qn=false)
main(12;fuse=true, fuse_detail="unbalances", qn=false)    
main(12;fuse=false, fuse_detail="oneGo", qn=false)        
main(12;fuse=false, fuse_detail="unbalances", qn=false)

And the results are:

Using quantum numbers
Fuse the indices using a full combiner, eigen
Fusion time:   1.085 s (17149695 allocations: 2.12 GiB)
Eigen time:   435.875 ms (2231 allocations: 167.00 MiB)
----------------------------------------------------------------------
Using quantum numbers
Fuse the indices using a binary tree
Fusion(MPO) time:   17.828 ms (32979 allocations: 80.74 MiB)
Fusion(MPS) time:   201.200 μs (5663 allocations: 827.81 KiB)
Eigsolve time:   5.522 ms (81413 allocations: 8.33 MiB)
----------------------------------------------------------------------
Using quantum numbers
Fuse the indices using an unbalances tree
Fusion(MPO) time:   48.329 ms (34647 allocations: 189.49 MiB)
Fusion(MPS) time:   115.900 μs (2939 allocations: 568.59 KiB)
Eigsolve time:   10.842 ms (86189 allocations: 8.87 MiB)
----------------------------------------------------------------------
Using quantum numbers
Don't fuse the indices, using eigen
Fusion time:   75.049 ms (262683 allocations: 318.43 MiB)
Eigen time:   1.473 s (17316922 allocations: 2.16 GiB)
----------------------------------------------------------------------
Using quantum numbers
Don't fuse the indices
Fusion(MPO) time:   130.517 ms (262683 allocations: 318.43 MiB)
Fusion(MPS) time:   1.502 ms (32489 allocations: 11.59 MiB)
Eigsolve time:   3.328 s (8056509 allocations: 2.31 GiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using a full combiner, eigen
Fusion time:   208.746 ms (877 allocations: 810.92 MiB)
Eigen time:   6.271 s (311 allocations: 1.25 GiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using a binary tree
Fusion(MPO) time:   99.222 ms (1550 allocations: 392.07 MiB)
Fusion(MPS) time:   39.800 μs (886 allocations: 288.69 KiB)
Eigsolve time:   144.600 ms (49954 allocations: 4.95 MiB)
----------------------------------------------------------------------
No quantum numbers
Fuse the indices using an unbalances tree
Fusion(MPO) time:   326.049 ms (1483 allocations: 1.00 GiB)
Fusion(MPS) time:   53.400 μs (829 allocations: 444.03 KiB)
Eigsolve time:   168.598 ms (51376 allocations: 5.08 MiB)
----------------------------------------------------------------------
No quantum numbers
Don't fuse the indices, using eigen
Fusion time:   114.034 ms (555 allocations: 554.84 MiB)
Eigen time:   7.940 s (820 allocations: 1.13 GiB)
----------------------------------------------------------------------
No quantum numbers
Don't fuse the indices
Fusion(MPO) time:   113.643 ms (555 allocations: 554.84 MiB)
Fusion(MPS) time:   62.900 μs (363 allocations: 254.78 KiB)
Eigsolve time:   3.058 s (222698 allocations: 4.32 GiB)
----------------------------------------------------------------------

Also, I’m not sure if it’s the right way to benchmark Julia code given its JIT features and something else.