Skip to content

Commit

Permalink
add settings file and modify ValidationDoc classmethod inputs to use …
Browse files Browse the repository at this point in the history
…this settings file
  • Loading branch information
matthewkuner committed Nov 1, 2023
1 parent 2b654d3 commit f2b51a2
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 39 deletions.
131 changes: 131 additions & 0 deletions pymatgen/io/validation/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# mypy: ignore-errors

"""
Settings for pymatgen-io-validation. Used to be part of EmmetSettings.
"""
import json
from pathlib import Path
from typing import Dict, List, Type, TypeVar, Union

import requests
from monty.json import MontyDecoder
from pydantic import field_validator, model_validator, Field, ImportString
from pydantic_settings import BaseSettings, SettingsConfigDict

DEFAULT_CONFIG_FILE_PATH = str(Path.home().joinpath(".emmet.json"))


S = TypeVar("S", bound="IOValidationSettings")


class IOValidationSettings(BaseSettings):
"""
Settings for pymatgen-io-validation
"""

config_file: str = Field(
DEFAULT_CONFIG_FILE_PATH, description="File to load alternative defaults from"
)

VASP_KPTS_TOLERANCE: float = Field(
0.9,
description="Relative tolerance for kpt density to still be a valid task document",
)

VASP_ALLOW_KPT_SHIFT: bool = Field(
False,
description="Whether to consider a task valid if kpoints are shifted by the user",
)

VASP_ALLOW_EXPLICIT_KPT_MESH: Union[str, bool] = Field(
"auto",
description="Whether to consider a task valid if the user defines an explicit kpoint mesh",
)

VASP_FFT_GRID_TOLERANCE: float = Field(
0.9,
description="Relative tolerance for FFT grid parameters to still be a valid",
)

VASP_DEFAULT_INPUT_SETS: Dict[str, ImportString] = Field(
{
"GGA Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
"GGA+U Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
"R2SCAN Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
"SCAN Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
"PBESol Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
"GGA Static": "pymatgen.io.vasp.sets.MPStaticSet",
"GGA+U Static": "pymatgen.io.vasp.sets.MPStaticSet",
"R2SCAN Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
"SCAN Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
"PBESol Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
"HSE06 Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
"GGA NSCF Uniform": "pymatgen.io.vasp.sets.MPNonSCFSet",
"GGA+U NSCF Uniform": "pymatgen.io.vasp.sets.MPNonSCFSet",
"GGA NSCF Line": "pymatgen.io.vasp.sets.MPNonSCFSet",
"GGA+U NSCF Line": "pymatgen.io.vasp.sets.MPNonSCFSet",
"GGA NMR Electric Field Gradient": "pymatgen.io.vasp.sets.MPNMRSet",
"GGA NMR Nuclear Shielding": "pymatgen.io.vasp.sets.MPNMRSet",
"GGA+U NMR Electric Field Gradient": "pymatgen.io.vasp.sets.MPNMRSet",
"GGA+U NMR Nuclear Shielding": "pymatgen.io.vasp.sets.MPNMRSet",
"GGA Deformation": "pymatgen.io.vasp.sets.MPStaticSet",
"GGA+U Deformation": "pymatgen.io.vasp.sets.MPStaticSet",
"GGA DFPT Dielectric": "pymatgen.io.vasp.sets.MPStaticSet",
"GGA+U DFPT Dielectric": "pymatgen.io.vasp.sets.MPStaticSet",
},
description="Default input sets for task validation",
)

VASP_MAX_SCF_GRADIENT: float = Field(
1000,
description="Maximum upward gradient in the last SCF for any VASP calculation",
)

VASP_NUM_IONIC_STEPS_FOR_DRIFT: float = Field(
3,
description="Number of ionic steps to average over when validating drift forces",
)

model_config = SettingsConfigDict(env_prefix="pymatgen_io_validation_", extra="ignore")

@model_validator(mode="before")
@classmethod
def load_default_settings(cls, values):
"""
Loads settings from a root file if available and uses that as defaults in
place of built in defaults
"""
config_file_path: str = values.get("config_file", DEFAULT_CONFIG_FILE_PATH)

new_values = {}

if config_file_path.startswith("http"):
new_values = requests.get(config_file_path).json()
elif Path(config_file_path).exists():
with open(config_file_path) as f:
new_values = json.load(f)

new_values.update(values)

return new_values

@classmethod
def autoload(cls: Type[S], settings: Union[None, dict, S]) -> S:
if settings is None:
return cls()
elif isinstance(settings, dict):
return cls(**settings)
return settings

@field_validator("VASP_DEFAULT_INPUT_SETS", mode="before")
@classmethod
def convert_input_sets(cls, value):
if isinstance(value, dict):
return {k: MontyDecoder().process_decoded(v) for k, v in value.items()}
return value

def as_dict(self):
"""
HotPatch to enable serializing IOValidationSettings via Monty
"""
return self.dict(exclude_unset=True, exclude_defaults=True)
76 changes: 37 additions & 39 deletions pymatgen/io/validation/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

from pymatgen.io.vasp.sets import VaspInputSet

# TODO: why MPMetalRelaxSet
# TODO: AK: why MPMetalRelaxSet
# TODO: MK: because more kpoints are needed for metals given the more complicated Fermi surfaces, and MPMetalRelaxSet uses more kpoints
from pymatgen.io.vasp.sets import MPMetalRelaxSet
from pymatgen.io.vasp.inputs import Potcar

Expand Down Expand Up @@ -64,33 +65,36 @@ class Config:
def from_task_doc(
cls,
task_doc: TaskDocument,
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE,
kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE, # TODO Usused currently,needed?
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, # TODO Usused currently,needed?
allow_kpoint_shifts: bool = SETTINGS.VASP_ALLOW_KPT_SHIFT,
allow_explicit_kpoint_mesh: Union[str, bool] = SETTINGS.VASP_ALLOW_EXPLICIT_KPT_MESH,
fft_grid_tolerance: float = SETTINGS.VASP_FFT_GRID_TOLERANCE,
num_ionic_steps_to_avg_drift_over: float = SETTINGS.VASP_NUM_IONIC_STEPS_FOR_DRIFT,
max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
) -> "ValidationDoc":
"""
Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
Args:
task_doc: the task document to process
kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
kspacing_tolerance: the tolerance to allow kspacing to lag behind the input set settings
input_sets: a dictionary of task_types -> pymatgen input set for validation
pseudo_dir: directory of pseudopotential directory to ensure correct hashes
LDAU_fields: LDAU fields to check for consistency
potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
allow_kpoint_shifts: Whether to consider a task valid if kpoints are shifted by the user
allow_explicit_kpoint_mesh: Whether to consider a task valid if the user defines an explicit kpoint mesh
fft_grid_tolerance: Relative tolerance for FFT grid parameters to still be a valid
num_ionic_steps_to_avg_drift_over: Number of ionic steps to average over when validating drift forces
max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
initial equillibriation period. Note this is in eV per atom.
potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
"""

bandgap = task_doc.output.bandgap
calcs_reversed = task_doc.calcs_reversed
calcs_reversed = [
calc.dict() for calc in calcs_reversed
] # convert to dictionary to use built-in `.get()` method ###################################################
] # convert to dictionary to use built-in `.get()` method

parameters = (
task_doc.input.parameters
Expand Down Expand Up @@ -119,13 +123,6 @@ def from_task_doc(
task_type = _get_task_type(calcs_reversed, orig_inputs)
run_type = _get_run_type(calcs_reversed)

# maybe move following to settings -->
num_ionic_steps_to_avg_drift_over = 3
fft_grid_tolerance = 0.9
allow_kpoint_shifts = False
allow_explicit_kpoint_mesh = "auto" # could also be True or False
# <--

if allow_explicit_kpoint_mesh == "auto":
if "NSCF" in calc_type.name:
allow_explicit_kpoint_mesh = True
Expand Down Expand Up @@ -169,15 +166,15 @@ def from_task_doc(
)
elif valid_input_set:
# Get subset of POTCAR summary stats to validate calculation
allowed_potcar_stats = {}
valid_potcar_summary_stats = {}
for valid_potcar in valid_input_set.potcar:
titel_no_spc = valid_potcar.TITEL.replace(" ", "")
allowed_potcar_stats[titel_no_spc] = potcar_summary_stats[
valid_potcar_summary_stats[titel_no_spc] = potcar_summary_stats[
valid_input_set._config_dict["POTCAR_FUNCTIONAL"]
][titel_no_spc].copy()

if potcar_summary_stats:
_check_potcars(reasons, warnings, potcar, calc_type, allowed_potcar_stats)
_check_potcars(reasons, warnings, potcar, valid_potcar_summary_stats)

# TODO: check for surface/slab calculations!!!!!!

Expand Down Expand Up @@ -249,26 +246,29 @@ def from_task_doc(
def from_directory(
cls,
dir_name: Union[Path, str],
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE,
kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE,
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, # TODO Unused
allow_kpoint_shifts: bool = SETTINGS.VASP_ALLOW_KPT_SHIFT,
allow_explicit_kpoint_mesh: Union[str, bool] = SETTINGS.VASP_ALLOW_EXPLICIT_KPT_MESH,
fft_grid_tolerance: float = SETTINGS.VASP_FFT_GRID_TOLERANCE,
num_ionic_steps_to_avg_drift_over: float = SETTINGS.VASP_NUM_IONIC_STEPS_FOR_DRIFT,
max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
) -> "ValidationDoc":
"""
Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
Args:
dir_name: the directory containing the calculation files to process
kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
kspacing_tolerance: the tolerance to allow kspacing to lag behind the input set settings
input_sets: a dictionary of task_types -> pymatgen input set for validation
pseudo_dir: directory of pseudopotential directory to ensure correct hashes
LDAU_fields: LDAU fields to check for consistency
potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
allow_kpoint_shifts: Whether to consider a task valid if kpoints are shifted by the user
allow_explicit_kpoint_mesh: Whether to consider a task valid if the user defines an explicit kpoint mesh
fft_grid_tolerance: Relative tolerance for FFT grid parameters to still be a valid
num_ionic_steps_to_avg_drift_over: Number of ionic steps to average over when validating drift forces
max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
initial equillibriation period. Note this is in eV per atom.
potcar_summary_stats: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value.
"""
try:
task_doc = TaskDoc.from_directory(
Expand All @@ -278,12 +278,14 @@ def from_directory(

validation_doc = ValidationDoc.from_task_doc(
task_doc=task_doc,
kpts_tolerance=kpts_tolerance,
kspacing_tolerance=kspacing_tolerance,
input_sets=input_sets,
LDAU_fields=LDAU_fields, # TODO Unused
max_allowed_scf_gradient=max_allowed_scf_gradient,
potcar_summary_stats=potcar_summary_stats,
kpts_tolerance=kpts_tolerance,
allow_kpoint_shifts=allow_kpoint_shifts,
allow_explicit_kpoint_mesh=allow_explicit_kpoint_mesh,
fft_grid_tolerance=fft_grid_tolerance,
num_ionic_steps_to_avg_drift_over=num_ionic_steps_to_avg_drift_over,
max_allowed_scf_gradient=max_allowed_scf_gradient,
)

return validation_doc
Expand All @@ -308,7 +310,7 @@ def _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandga
CalcType.PBE_U_Structure_Optimization,
]

# Ensure inputsets get proper additional input values
# Ensure input sets get proper additional input values
if "SCAN" in run_type.value:
valid_input_set: VaspInputSet = input_sets[str(calc_type)](structure, bandgap=bandgap) # type: ignore

Expand Down Expand Up @@ -343,7 +345,6 @@ def _check_potcars(
reasons,
warnings,
potcars: Potcar,
calc_type,
valid_potcar_summary_stats: Dict[str, ImportString],
data_match_tol: float = 1e-6,
):
Expand All @@ -352,9 +353,6 @@ def _check_potcars(
pymatgen input set.
"""

# TODO: Update potcar checks. Whether using hashing or not!
# AK - added summary stats check, removed hash check

if potcars is None:
reasons.append(
"PSEUDOPOTENTIALS --> Missing POTCAR files. "
Expand Down

0 comments on commit f2b51a2

Please sign in to comment.