Skip to content

Commit

Permalink
Merge branch 'master' into multicomponents
Browse files Browse the repository at this point in the history
  • Loading branch information
epolack committed Dec 21, 2023
2 parents 500978a + ecdfa73 commit 2a0c5b9
Show file tree
Hide file tree
Showing 46 changed files with 864 additions and 435 deletions.
1 change: 1 addition & 0 deletions docs/src/developer/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ For instance, to launch core functionality tests, one can use
using TestEnv # Optional: automatically installs required packages
TestEnv.activate() # for tests in a temporary environment.
using TestItemRunner
cd("test") # By default, the following macro runs everything from the parent folder.
@run_package_tests filter = ti -> :core ti.tags
```
Or to only run the tests of a particular file `serialisation.jl` use
Expand Down
8 changes: 7 additions & 1 deletion src/DFTK.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export mpi_nprocs
export mpi_master
export setup_threading, disable_threading
include("common/timer.jl")
include("common/constants.jl")
include("common/ortho.jl")
include("common/types.jl")
include("common/spherical_bessels.jl")
Expand Down Expand Up @@ -57,6 +58,7 @@ export ElementGaussian
export charge_nuclear
export charge_ionic
export atomic_symbol
export atomic_mass
export n_elec_valence
export n_elec_core
include("elements.jl")
Expand Down Expand Up @@ -207,7 +209,6 @@ export compute_forces_cart
include("postprocess/forces.jl")
export compute_stresses_cart
include("postprocess/stresses.jl")
include("postprocess/phonon.jl")
export compute_dos
export compute_ldos
export plot_dos
Expand All @@ -219,6 +220,11 @@ include("response/chi0.jl")
include("response/hessian.jl")
export compute_current
include("postprocess/current.jl")
export phonon_modes
export phonon_modes_cart
export compute_dynmat
export compute_dynmat_cart
include("postprocess/phonon.jl")

# Workarounds
include("workarounds/dummy_inplace_fft.jl")
Expand Down
5 changes: 4 additions & 1 deletion src/Model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,10 @@ recip_vector_red_to_cart(model::Model, vec) = recip_vector_red_to_cart(model)(ve
recip_vector_cart_to_red(model::Model, vec) = recip_vector_cart_to_red(model)(vec)

#=
Transformations on vectors and covectors are matrices and comatrices.
Transformations on vectors and covectors are matrices and comatrices, i.e. matrices act on
a vector to give a vector and comatrices act on covector to give a covector.
Beware that some quantities (e.g., the force constant matrix) map vectors to covectors and
are therefore not matrices nor comatrices; they fall outside of these methods.
Consider two covectors f and g related by a transformation matrix B. In reduced
coordinates g_red = B_red f_red and in cartesian coordinates we want g_cart = B_cart f_cart.
Expand Down
63 changes: 37 additions & 26 deletions src/PlaneWaveBasis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,37 +129,48 @@ Base.Broadcast.broadcastable(basis::PlaneWaveBasis) = Ref(basis)

Base.eltype(::PlaneWaveBasis{T}) where {T} = T


function Kpoint(spin::Integer, coordinate::AbstractVector{<:Real},
recip_lattice::AbstractMatrix{T}, fft_size, Ecut;
variational=true, architecture::AbstractArchitecture) where {T}
mapping = Int[]
Gvecs_k = Vec3{Int}[]
k = Vec3{T}(coordinate)
# provide a rough hint so that the arrays don't have to be resized so much
n_guess = div(prod(fft_size), 8)
sizehint!(mapping, n_guess)
sizehint!(Gvecs_k, n_guess)
for (i, G) in enumerate(G_vectors(fft_size))
if !variational || norm2(recip_lattice * (G + k)) / 2 Ecut
push!(mapping, i)
push!(Gvecs_k, G)
end
end
Gvecs_k = to_device(architecture, Gvecs_k)

mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
Kpoint(spin, k, mapping, mapping_inv, Gvecs_k)
end
function Kpoint(basis::PlaneWaveBasis, coordinate::AbstractVector, spin::Int)
Kpoint(spin, coordinate, basis.model.recip_lattice, basis.fft_size, basis.Ecut;
basis.variational, basis.architecture)
end

@timing function build_kpoints(model::Model{T}, fft_size, kcoords, Ecut;
variational=true,
architecture::AbstractArchitecture) where {T}
kpoints_per_spin = [Kpoint[] for _ = 1:model.n_spin_components]
for k in kcoords
k = Vec3{T}(k) # rationals are sloooow
mapping = Int[]
Gvecs_k = Vec3{Int}[]
# provide a rough hint so that the arrays don't have to be resized so much
n_guess = div(prod(fft_size), 8)
sizehint!(mapping, n_guess)
sizehint!(Gvecs_k, n_guess)
for (i, G) in enumerate(G_vectors(fft_size))
if !variational || norm2(model.recip_lattice * (G + k)) / 2 Ecut
push!(mapping, i)
push!(Gvecs_k, G)
end
end
Gvecs_k = to_device(architecture, Gvecs_k)

mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
for= 1:model.n_spin_components
push!(kpoints_per_spin[iσ],
Kpoint(iσ, k, mapping, mapping_inv, Gvecs_k))
# Build all k-points for the first spin
kpoints_spin_1 = [Kpoint(1, k, model.recip_lattice, fft_size, Ecut;
variational, architecture)
for k in kcoords]
all_kpoints = similar(kpoints_spin_1, 0)
for= 1:model.n_spin_components
for kpt in kpoints_spin_1
push!(all_kpoints, Kpoint(iσ, kpt.coordinate,
kpt.mapping, kpt.mapping_inv, kpt.G_vectors))
end
end
vcat(kpoints_per_spin...) # put all spin up first, then all spin down
end
function build_kpoints(basis::PlaneWaveBasis, kcoords)
build_kpoints(basis.model, basis.fft_size, kcoords, basis.Ecut;
variational=basis.variational, basis.architecture)
all_kpoints
end

# Lowest-level constructor, should not be called directly.
Expand Down
2 changes: 2 additions & 0 deletions src/common/constants.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Phonons modes are usually given in cm⁻¹.
const hartree_to_cm⁻¹ = ustrip(u"cm^-1", 1u"bohr^-1") ./ austrip(1u"c0") / 2π
19 changes: 19 additions & 0 deletions src/common/threading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,22 @@ function disable_threading()
@assert n_julia == 1 # To exit in non-master MPI nodes
setup_threading(;n_fft=1, n_blas=1)
end

# Parallelization loop breaking range into chunks.
function parallel_loop_over_range(fun, storages::AbstractVector, range)
chunk_length = cld(length(range), Threads.nthreads())

iszero(chunk_length) && return storages

@sync for (ichunk, chunk) in enumerate(Iterators.partition(range, chunk_length))
Threads.@spawn for idc in chunk # spawn a task per chunk
fun(storages[ichunk], idc)
end
end

return storages
end
function parallel_loop_over_range(fun, allocate_local_storage::Function, range)
storages = [allocate_local_storage() for _ = 1:Threads.nthreads()]
parallel_loop_over_range(fun, storages, range)
end
120 changes: 76 additions & 44 deletions src/densities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,34 @@ using an optional `occupation_threshold`. By default all occupation numbers are
"""
@views @timing function compute_density(basis::PlaneWaveBasis{T}, ψ, occupation;
occupation_threshold=zero(T)) where {T}
S = promote_type(T, real(eltype(ψ[1])))
# occupation should be on the CPU as we are going to be doing scalar indexing.
occupation = [to_cpu(oc) for oc in occupation]
= promote_type(T, real(eltype(ψ[1])))
n_components = basis.model.n_components

# Occupation should be on the CPU as we are going to be doing scalar indexing.
occupation = [to_cpu(oc) for oc in occupation]
mask_occ = [findall(occnk -> abs(occnk) occupation_threshold, occk)
for occk in occupation]
if all(isempty, mask_occ) # No non-zero occupations => return zero density
ρ = zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
else
# we split the total iteration range (ik, n) in chunks, and parallelize over them
ik_n = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
chunk_length = cld(length(ik_n), Threads.nthreads())

# chunk-local variables
ρ_chunklocal = map(1:Threads.nthreads()) do i
zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
end
ψnk_real_chunklocal = [zeros_like(basis.G_vectors, complex(S),
basis.model.n_components, basis.fft_size...)
for _ = 1:Threads.nthreads()]

@sync for (ichunk, chunk) in enumerate(Iterators.partition(ik_n, chunk_length))
Threads.@spawn for (ik, n) in chunk # spawn a task per chunk
ρ_loc = ρ_chunklocal[ichunk]
ψnk_real = ψnk_real_chunklocal[ichunk]
kpt = basis.kpoints[ik]

ifft!(ψnk_real, basis, kpt, ψ[ik][:, :, n])
for σ = 1:basis.model.n_components
ρ_loc[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
.* abs2.(ψnk_real[σ, :, :, :]))

end
synchronize_device(basis.architecture)
end
function allocate_local_storage()
(; ρ=zeros_like(basis.G_vectors, Tρ, basis.fft_size..., basis.model.n_spin_components),
ψnk_real=zeros_like(basis.G_vectors, complex(Tρ), n_components, basis.fft_size...))
end
# We split the total iteration range (ik, n) in chunks, and parallelize over them.
range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]

storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
(ik, n) = kn
kpt = basis.kpoints[ik]

ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
for σ = 1:n_components
storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
.* abs2.(storage.ψnk_real[σ, :, :, :]))
end

ρ = sum(ρ_chunklocal)
synchronize_device(basis.architecture)
end
ρ = sum(getfield.(storages, ))

mpi_sum!(ρ, basis.comm_kpts)
ρ = symmetrize_ρ(basis, ρ; do_lowpass=false)
Expand All @@ -62,18 +51,61 @@ using an optional `occupation_threshold`. By default all occupation numbers are
negtol = max(sqrt(eps(T)), 10occupation_threshold)
minimum(ρ) < -negtol && @warn("Negative ρ detected", min_ρ=minimum(ρ))

ρ
ρ::AbstractArray{Tρ, 4}
end

# Variation in density corresponding to a variation in the orbitals and occupations.
@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ,
occupation, δoccupation=zero.(occupation);
occupation_threshold=zero(T)) where {T}
ForwardDiff.derivative(zero(T)) do ε
ψ_ε = [ψk .+ ε .* δψk for (ψk, δψk) in zip(ψ, δψ)]
occ_ε = [occk .+ ε .* δocck for (occk, δocck) in zip(occupation, δoccupation)]
compute_density(basis, ψ_ε, occ_ε; occupation_threshold)
@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ, occupation,
δoccupation=zero.(occupation);
occupation_threshold=zero(T), q=zero(Vec3{T})) where {T}
= promote_type(T, eltype(ψ[1]))
# δρ is expected to be real when computations are not phonon-related.
Tδρ = iszero(q) ? real(Tψ) :
real_qzero = iszero(q) ? real : identity
n_components = basis.model.n_components

ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, q)]

# occupation should be on the CPU as we are going to be doing scalar indexing.
occupation = [to_cpu(oc) for oc in occupation]
mask_occ = [findall(occnk -> abs(occnk) occupation_threshold, occk)
for occk in occupation]

function allocate_local_storage()
(; δρ=zeros_like(basis.G_vectors, Tδρ, basis.fft_size..., basis.model.n_spin_components),
ψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...),
δψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...))
end
range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]

storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
(ik, n) = kn

kpt = basis.kpoints[ik]
ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
# We return the δψk in the basis k+q which are associated to a displacement of the ψk.
kpt_plus_q, δψk_plus_q = kpq_equivalent_blochwave_to_kpq(basis, kpt, q,
ordering(δψ)[ik])
# The perturbation of the density
# |ψ_nk|² is 2 ψ_{n,k} * δψ_{n,k+q}.
ifft!(storage.δψnk_real, basis, kpt_plus_q, δψk_plus_q[:, :, n])

for σ = 1:basis.model.n_components
storage.δρ[:, :, :, kpt.spin] .+= real_qzero(
2 .* occupation[ik][n] .* basis.kweights[ik] .* conj(storage.ψnk_real)
.* storage.δψnk_real[σ, :, :, :]
.+ δoccupation[ik][n] .* basis.kweights[ik]
.* abs2.(storage.ψnk_real[σ, :, :, :]))
end

synchronize_device(basis.architecture)
end
δρ = sum(getfield.(storages, :δρ))

mpi_sum!(δρ, basis.comm_kpts)
δρ = symmetrize_ρ(basis, δρ; do_lowpass=false)

δρ
end

@views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis{TT}, ψ,
Expand All @@ -82,12 +114,12 @@ end
T = promote_type(TT, real(eltype(ψ[1])))
τ = similar(ψ[1], T, (basis.fft_size..., basis.model.n_spin_components))
τ .= 0
dαψnk_real = zeros(complex(T), basis.fft_size)
dασψnk_real = zeros(complex(T), basis.fft_size)
for (ik, kpt) in enumerate(basis.kpoints)
G_plus_k = [[Gk[α] for Gk in Gplusk_vectors_cart(basis, kpt)] for α = 1:3]
for n = 1:size(ψ[ik], 3), α = 1:3, σ = 1:basis.model.n_components
ifft!(dαψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
@. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψnk_real)
for n = 1:size(ψ[ik], 3), σ = 1:basis.model.n_components, α = 1:3
ifft!(dασψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
@. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dασψnk_real)
end
end
mpi_sum!(τ, basis.comm_kpts)
Expand Down
2 changes: 1 addition & 1 deletion src/density_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T},
ρ_iG / sqrt(model.unit_cell_volume)
end
ρ = to_device(basis.architecture, ρ_cpu)
enforce_real!(basis, ρ) # Symmetrize Fourier coeffs to have real iFFT
enforce_real!(ρ, basis) # Symmetrize Fourier coeffs to have real iFFT
irfft(basis, ρ)
end

Expand Down
3 changes: 2 additions & 1 deletion src/eigen/preconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ ldiv!(P::PreconditionerTPA, R) = ldiv!(R, P, R)
end
(Base.:*)(P::PreconditionerTPA, R) = mul!(copy(R), P, R)

function precondprep!(P::PreconditionerTPA, X)
function precondprep!(P::PreconditionerTPA, X::AbstractArray)
P.mean_kin = map(eachcol(X)) do x
x = from_composite_σG(P.basis, P.kpoint, x)
sum(real(dot(x[σ, :, :], Diagonal(P.kin), x[σ, :, :]))
for σ = 1:P.basis.model.n_components)
end
end
precondprep!(P::PreconditionerTPA, ::Nothing) = 1 # fallback for edge cases
Loading

0 comments on commit 2a0c5b9

Please sign in to comment.