From eb95ffb6c784f536b9f7a75d1a6eec125af58b8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Polack?= <polack@hollved.net>
Date: Mon, 11 Dec 2023 23:20:43 +0100
Subject: [PATCH] regression tests & benchmark

---
 .github/workflows/ci.yaml         |  10 +-
 .github/workflows/regression.yaml |  98 ++++++++++++++++++
 .gitignore                        |   2 +
 Project.toml                      |   3 +-
 benchmark/Project.toml            |  10 ++
 benchmark/benchmarks.jl           |   6 ++
 benchmark/humongous/Project.toml  |  10 ++
 benchmark/humongous/benchmarks.jl |  21 ++++
 benchmark/humongous/run.jl        |  43 ++++++++
 benchmark/load.jl                 |  14 +++
 benchmark/regression/testcases.jl | 158 ++++++++++++++++++++++++++++++
 benchmark/run.jl                  |  14 +++
 test/runtests_runner.jl           |  30 ++++++
 13 files changed, 413 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/regression.yaml
 create mode 100644 benchmark/Project.toml
 create mode 100644 benchmark/benchmarks.jl
 create mode 100644 benchmark/humongous/Project.toml
 create mode 100644 benchmark/humongous/benchmarks.jl
 create mode 100644 benchmark/humongous/run.jl
 create mode 100644 benchmark/load.jl
 create mode 100644 benchmark/regression/testcases.jl
 create mode 100644 benchmark/run.jl
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 881d340382..c449933817 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,11 +21,11 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-example }
-          - {mode: stable,  os: macOS-latest,   payload: noslow         }
-          - {mode: stable,  os: windows-latest, payload: noslow         }
-          - {mode: stable,  os: ubuntu-latest,  payload: noslow-mpi     }
-          - {mode: nightly, os: ubuntu-latest,  payload: noslow         }
+          - {mode: stable,  os: ubuntu-latest,  payload: example-noslow-noregression }
+          - {mode: stable,  os: macOS-latest,   payload: noslow-noregression         }
+          - {mode: stable,  os: windows-latest, payload: noslow-noregression         }
+          - {mode: stable,  os: ubuntu-latest,  payload: mpi-noslow-noregression     }
+          - {mode: nightly, os: ubuntu-latest,  payload: noslow-noregression         }
     env:
       GKS_ENCODING: utf8
       GKSwstype: 100       # Needed for Plots-related tests
diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml
new file mode 100644
index 0000000000..ae2e25eda2
--- /dev/null
+++ b/.github/workflows/regression.yaml
@@ -0,0 +1,98 @@
+name: Regression
+on:
+  push:
+    branches:
+      - master
+    tags: ['*']
+  pull_request:
+  schedule:
+    - cron:  '0 4 * * 6'  # Run every Saturday
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  test:
+    name: Benchmarking ${{ matrix.description }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {description: run,  payload: benchmarks.jl  }
+          - {description: load, payload: load.jl }
+    steps:
+      # Remove older benchmark comment
+      - name: pr-deleter
+        uses: maheshrayas/action-pr-comment-delete@v3.0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          org: <orgname>
+          repo: <repo>
+          user: github-actions[bot]
+          issue: ${{github.event.number}}
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Julia stable
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.9'
+          arch: x64
+
+      - uses: julia-actions/cache@v1
+        with:
+          include-matrix: false
+      - uses: julia-actions/julia-buildpkg@v1
+
+      - name: Install dependencies
+        run: |
+          julia --project=benchmark -e '
+          using Pkg
+          Pkg.develop(PackageSpec(; path=pwd()))
+          Pkg.instantiate()'
+
+      - name: Run benchmarks against master
+        # Remove baseline once merged. Regression tests will only work after this is merged
+        # in master.
+        run: |
+          julia --project=benchmark -e "
+            using BenchmarkCI
+            baseline = \"HEAD\"
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'pull_request' }}
+
+      - name: Run benchmarks against last release
+        run: |
+          julia --project=benchmark -e "
+            import Pkg
+            baseline = \"v\" * Pkg.TOML.parsefile(\"Project.toml\")[\"version\"]
+            script = \"\$(pwd())/benchmark/${{ matrix.payload }}\"
+            using BenchmarkCI
+            BenchmarkCI.judge(; baseline, script, retune=true)"
+        if: ${{ github.event_name == 'schedule' ||
+                github.event.push.ref == 'refs/heads/master' }}
+
+      - name: Print judgement
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.displayjudgement()'
+
+      - name: Post results
+        run: |
+          julia --project=benchmark -e '
+            using BenchmarkCI
+            BenchmarkCI.postjudge()'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Is report successful
+        run: |
+          res=$(julia --project=benchmark -e '
+                  using BenchmarkCI
+                  BenchmarkCI.displayjudgement()' | grep --count ':x:')
+          if [[ $res -gt 1 ]]; then exit 1; fi
diff --git a/.gitignore b/.gitignore
index f3d553e487..d82b137880 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,5 @@ Manifest.toml
 /LocalPreferences.toml
 .vscode
 .CondaPkg
+/.benchmarkci
+/benchmark/**/*.json
diff --git a/Project.toml b/Project.toml
index 01ac694ce2..6e2c022865 100644
--- a/Project.toml
+++ b/Project.toml
@@ -128,6 +128,7 @@ ASEconvert = "3da9722f-58c2-4165-81be-b4d7253e8fd2"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
 AtomsIOPython = "9e4c859b-2281-48ef-8059-f50fe53c37b0"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
@@ -150,4 +151,4 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 wannier90_jll = "c5400fa0-8d08-52c2-913f-1e3f656c1ce9"
 
 [targets]
-test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
+test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "BenchmarkTools", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"]
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
new file mode 100644
index 0000000000..6b2665e976
--- /dev/null
+++ b/benchmark/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
+Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
new file mode 100644
index 0000000000..fcda531cab
--- /dev/null
+++ b/benchmark/benchmarks.jl
@@ -0,0 +1,6 @@
+using BenchmarkTools
+using TestItemRunner
+
+const SUITE = BenchmarkGroup()
+
+@run_package_tests filter=ti->(:regression ∈ ti.tags)
diff --git a/benchmark/humongous/Project.toml b/benchmark/humongous/Project.toml
new file mode 100644
index 0000000000..3546e8e693
--- /dev/null
+++ b/benchmark/humongous/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
diff --git a/benchmark/humongous/benchmarks.jl b/benchmark/humongous/benchmarks.jl
new file mode 100644
index 0000000000..6d04955361
--- /dev/null
+++ b/benchmark/humongous/benchmarks.jl
@@ -0,0 +1,21 @@
+using BenchmarkTools
+using TestItemRunner
+
+function run_scenario(scenario, complexity)
+    scenario_filter(i) = occursin(string(scenario), i.filename) && complexity ∈ i.tags
+    @run_package_tests filter=scenario_filter
+end
+
+all_scenarios() = [:AlSiO2H, :Cr19, :Fe2MnAl, :Mn2RuGa, :WFe]
+function make_suite(; scenarios=all_scenarios(), complexity=:debug)
+    @assert complexity ∈ [:debug, :small, :full]
+    @assert all(scenarios .∈ Ref(all_scenarios()))
+
+    suite = BenchmarkGroup()
+    for scenario in scenarios
+        suite[scenario] = @benchmarkable run_scenario($scenario, $complexity)
+    end
+    suite
+end
+
+const SUITE = make_suite(; scenarios=[:AlSiO2H])
diff --git a/benchmark/humongous/run.jl b/benchmark/humongous/run.jl
new file mode 100644
index 0000000000..537755d0b6
--- /dev/null
+++ b/benchmark/humongous/run.jl
@@ -0,0 +1,43 @@
+ROOTPATH = abspath(joinpath(@__DIR__, "../.."))
+import Pkg
+Pkg.activate(@__DIR__)
+if !isfile(joinpath(@__DIR__, "Manifest.toml"))
+    Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH))
+    Pkg.instantiate()
+end
+
+import BenchmarkCI
+import LibGit2
+
+"""
+Launch with
+```julia
+julia --project=benchmark/humongous -e '
+   include("benchmark/humongous/run.jl")
+   run_benchmark()'
+```
+"""
+function run_benchmark(; retune=false, baseline="origin/master", target="HEAD",
+                       script=nothing)
+    mktempdir(mktempdir()) do repo_dir  # TestItemRunner needs access to parent directory as well.
+        project = joinpath(ROOTPATH, "benchmark", "humongous")
+        # Workaround to be able to benchmark releases before the use of PkgBenchmark.
+        # WARN: In this case, we need PkgBenchmark to be installed globally.
+        if isnothing(script)
+            # We run the default benchmark.
+            script = joinpath(project, "benchmarks.jl")
+        else
+            occursin(ROOTPATH, abspath(script)) &&
+                error("Script should be outside the repository.")
+        end
+        script_copy = joinpath(repo_dir, "benchmarks.jl")
+        cp(script, script_copy)
+
+        LibGit2.clone("https://github.com/epolack/DFTK-testproblems",
+                      joinpath(repo_dir, "test"))
+
+        BenchmarkCI.judge(; baseline, target, retune, script=script_copy, project)
+
+        BenchmarkCI.displayjudgement()
+    end
+end
diff --git a/benchmark/load.jl b/benchmark/load.jl
new file mode 100644
index 0000000000..3d0e20fc21
--- /dev/null
+++ b/benchmark/load.jl
@@ -0,0 +1,14 @@
+using BenchmarkTools
+
+const SUITE = BenchmarkGroup()
+
+julia_cmd = unsafe_string(Base.JLOptions().julia_bin)
+SUITE["load"] = @benchmarkable run(`$julia_cmd \
+                                        --startup-file=no \
+                                        --project=$(Base.active_project()) \
+                                        -e 'using DFTK'`)
+SUITE["pecompilation"] =
+    @benchmarkable run(`$julia_cmd \
+                           --startup-file=no \
+                           --project=$(Base.active_project()) \
+                           -e 'Base.compilecache(Base.identify_package("DFTK"))'`)
diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl
new file mode 100644
index 0000000000..62f07642b4
--- /dev/null
+++ b/benchmark/regression/testcases.jl
@@ -0,0 +1,158 @@
+@testsetup module Regression
+using DFTK
+using Unitful
+using UnitfulAtomic
+using AtomsBase
+using ..TestCases: magnesium
+
+high_symmetry = let
+    a = 4.474
+    lattice = [[0, a, a], [a, 0, a], [a, a, 0]]u"bohr"
+    x = 6.711
+    y = 2.237
+    atoms = [
+        Atom(:Cu, [0, 0, 0]u"bohr", magnetic_moment=0),
+        Atom(:O,  [x, y, x]u"bohr", magnetic_moment=0),
+        Atom(:O,  [x, y, y]u"bohr", magnetic_moment=0),
+    ]
+    system = periodic_system(atoms, lattice)
+    merge(DFTK.parse_system(system), (; temperature=0.03, Ecut=20, kgrid=[4,4,4],
+          n_electrons=45, description="high_sym"))
+end
+high_kpoints = merge(magnesium, (; kgrid=[13,13,13], Ecut=20, description="high_kpoint"))
+high_Ecut = merge(magnesium, (; kgrid=[4,4,4], Ecut=60, description="high_Ecut"))
+
+testcases = (; high_symmetry, high_kpoints, high_Ecut)
+end
+
+
+@testitem "Hamiltonian application" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using LinearAlgebra
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = Model(testcase.lattice, testcase.atoms, testcase.positions;
+                      testcase.temperature, terms=[Kinetic()])
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+
+        n_electrons = testcase.n_electrons
+        n_bands = div(n_electrons, 2, RoundUp)
+        ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q)
+             for kpt in basis.kpoints]
+        filled_occ = DFTK.filled_occupation(model)
+        occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)]
+        occ_scaling = n_electrons / sum(sum(occupation))
+        occupation = [occ * occ_scaling for occ in occupation]
+
+        (; ham) = energy_hamiltonian(basis, ψ, occupation)
+
+        SUITE["ham"][testcase.description] =
+            @benchmarkable for ik = 1:length($(basis.kpoints))
+                $(ham.blocks)[ik]*$ψ[ik]
+            end
+    end
+end
+
+@testitem "Single SCF step" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        SUITE["scf"][testcase.description] =
+            @benchmarkable self_consistent_field($basis; tol=1e5)
+    end
+end
+
+@testitem "Density + symmetrization" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        scfres = self_consistent_field(basis; tol=10)
+
+        ψ, occupation = DFTK.select_occupied_orbitals(basis, scfres.ψ, scfres.occupation;
+                                                      threshold=1e-6)
+
+        SUITE["density"]["ρ"][testcase.description] =
+            @benchmarkable compute_density($basis, $ψ, $occupation)
+        SUITE["density"]["sym"][testcase.description] =
+            @benchmarkable DFTK.symmetrize_ρ($basis, $(scfres.ρ))
+    end
+end
+
+@testitem "Basis construction" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        SUITE["basis"][testcase.description] =
+            @benchmarkable PlaneWaveBasis($model;
+                                          Ecut=$(testcase.Ecut), kgrid=$(testcase.kgrid))
+    end
+end
+
+@testitem "Sternheimer" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using .Main: SUITE
+
+    for testcase in Regression.testcases
+        model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions;
+                          testcase.temperature)
+        basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid)
+        scfres = self_consistent_field(basis; tol=10)
+
+        rhs = DFTK.compute_projected_gradient(basis, scfres.ψ, scfres.occupation)
+        SUITE["response"]["sternheimer"][testcase.description] =
+            @benchmarkable DFTK.solve_ΩplusK_split($scfres, $rhs; tol=1e-1)
+    end
+end
+
+@testitem "Response with AD" tags=[:regression] setup=[TestCases, Regression] begin
+    using DFTK
+    using BenchmarkTools
+    using LinearAlgebra
+    using ForwardDiff
+    using .Main: SUITE
+
+    function make_basis(ε::T; a=10., Ecut=30) where {T}
+        lattice=T(a) * I(3)  # lattice is a cube of ``a`` Bohrs
+        # Helium at the center of the box
+        atoms     = [ElementPsp(:He; psp=load_psp("hgh/lda/He-q2"))]
+        positions = [[1/2, 1/2, 1/2]]
+
+        model = model_DFT(lattice, atoms, positions, [:lda_x, :lda_c_vwn];
+                          extra_terms=[ExternalFromReal(r -> -ε * (r[1] - a/2))],
+                          symmetries=false)
+        PlaneWaveBasis(model; Ecut, kgrid=[1, 1, 1])  # No k-point sampling on isolated system
+    end
+
+    # dipole moment of a given density (assuming the current geometry)
+    function dipole(basis, ρ)
+        @assert isdiag(basis.model.lattice)
+        a  = basis.model.lattice[1, 1]
+        rr = [a * (r[1] - 1/2) for r in r_vectors(basis)]
+        sum(rr .* ρ) * basis.dvol
+    end
+
+    # Function to compute the dipole for a given field strength
+    function compute_dipole(ε; tol=1e-2, kwargs...)
+        scfres = self_consistent_field(make_basis(ε; kwargs...); tol)
+        dipole(scfres.basis, scfres.ρ)
+    end
+
+    SUITE["response"]["ad"] = @benchmarkable ForwardDiff.derivative($compute_dipole, 0.0)
+end
diff --git a/benchmark/run.jl b/benchmark/run.jl
new file mode 100644
index 0000000000..8ab416ed9b
--- /dev/null
+++ b/benchmark/run.jl
@@ -0,0 +1,14 @@
+ROOTPATH  = joinpath(@__DIR__, "..")
+import Pkg
+Pkg.activate(@__DIR__)
+if !isfile(joinpath(@__DIR__, "Manifest.toml"))
+    Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH))
+    Pkg.instantiate()
+end
+
+using BenchmarkCI
+
+# Remove target once merged. Regression tests will only work after this is merged in master.
+BenchmarkCI.judge(; baseline="HEAD")
+
+BenchmarkCI.displayjudgement()
diff --git a/test/runtests_runner.jl b/test/runtests_runner.jl
index 7616bd8741..9e9e4b1976 100644
--- a/test/runtests_runner.jl
+++ b/test/runtests_runner.jl
@@ -29,4 +29,34 @@ function dftk_testfilter(ti)
         return false
     end
 end
+
+using Test
+function TestItemRunner.run_testitem(filepath, use_default_usings, setups, package_name,
+                                     original_code, line, column, test_setup_module_set)
+    mod = Core.eval(Main, :(module $(gensym()) end))
+
+    if use_default_usings
+        Core.eval(mod, :(using Test))
+
+        if package_name!=""
+            Core.eval(mod, :(using $(Symbol(package_name))))
+        end
+    end
+
+    for m in setups
+        Core.eval(mod, Expr(:using, Expr(:., :., :., nameof(test_setup_module_set.setupmodule), m)))
+    end
+
+    code = string('\n'^line, ' '^column, original_code)
+
+    TestItemRunner.withpath(filepath) do
+        # Replace the test by the current testset.
+        description = Test.pop_testset().description
+        @testset "$(description)" begin
+            Base.invokelatest(include_string, mod, code, filepath)
+        end
+        Test.push_testset(Test.FallbackTestSet())  # so the parent pops nothing
+    end
+end
+
 @run_package_tests filter=dftk_testfilter verbose=true