add settings file and modify ValidationDoc classmethod inputs to use …

…this settings file
materialsproject · Nov 1, 2023 · f2b51a2 · f2b51a2
1 parent 2b654d3
commit f2b51a2
Show file tree

Hide file tree

Showing 2 changed files with 168 additions and 39 deletions.
diff --git a/pymatgen/io/validation/settings.py b/pymatgen/io/validation/settings.py
@@ -0,0 +1,131 @@
+# mypy: ignore-errors
+
+"""
+Settings for pymatgen-io-validation. Used to be part of EmmetSettings.
+"""
+import json
+from pathlib import Path
+from typing import Dict, List, Type, TypeVar, Union
+
+import requests
+from monty.json import MontyDecoder
+from pydantic import field_validator, model_validator, Field, ImportString
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+DEFAULT_CONFIG_FILE_PATH = str(Path.home().joinpath(".emmet.json"))
+
+
+S = TypeVar("S", bound="IOValidationSettings")
+
+
+class IOValidationSettings(BaseSettings):
+    """
+    Settings for pymatgen-io-validation
+    """
+
+    config_file: str = Field(
+        DEFAULT_CONFIG_FILE_PATH, description="File to load alternative defaults from"
+    )
+
+    VASP_KPTS_TOLERANCE: float = Field(
+        0.9,
+        description="Relative tolerance for kpt density to still be a valid task document",
+    )
+
+    VASP_ALLOW_KPT_SHIFT: bool = Field(
+        False,
+        description="Whether to consider a task valid if kpoints are shifted by the user",
+    )
+
+    VASP_ALLOW_EXPLICIT_KPT_MESH: Union[str, bool] = Field(
+        "auto",
+        description="Whether to consider a task valid if the user defines an explicit kpoint mesh",
+    )
+
+    VASP_FFT_GRID_TOLERANCE: float = Field(
+        0.9,
+        description="Relative tolerance for FFT grid parameters to still be a valid",
+    )
+
+    VASP_DEFAULT_INPUT_SETS: Dict[str, ImportString] = Field(
+        {
+            "GGA Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
+            "GGA+U Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
+            "R2SCAN Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
+            "SCAN Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
+            "PBESol Structure Optimization": "pymatgen.io.vasp.sets.MPScanRelaxSet",
+            "GGA Static": "pymatgen.io.vasp.sets.MPStaticSet",
+            "GGA+U Static": "pymatgen.io.vasp.sets.MPStaticSet",
+            "R2SCAN Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
+            "SCAN Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
+            "PBESol Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
+            "HSE06 Static": "pymatgen.io.vasp.sets.MPScanStaticSet",
+            "GGA NSCF Uniform": "pymatgen.io.vasp.sets.MPNonSCFSet",
+            "GGA+U NSCF Uniform": "pymatgen.io.vasp.sets.MPNonSCFSet",
+            "GGA NSCF Line": "pymatgen.io.vasp.sets.MPNonSCFSet",
+            "GGA+U NSCF Line": "pymatgen.io.vasp.sets.MPNonSCFSet",
+            "GGA NMR Electric Field Gradient": "pymatgen.io.vasp.sets.MPNMRSet",
+            "GGA NMR Nuclear Shielding": "pymatgen.io.vasp.sets.MPNMRSet",
+            "GGA+U NMR Electric Field Gradient": "pymatgen.io.vasp.sets.MPNMRSet",
+            "GGA+U NMR Nuclear Shielding": "pymatgen.io.vasp.sets.MPNMRSet",
+            "GGA Deformation": "pymatgen.io.vasp.sets.MPStaticSet",
+            "GGA+U Deformation": "pymatgen.io.vasp.sets.MPStaticSet",
+            "GGA DFPT Dielectric": "pymatgen.io.vasp.sets.MPStaticSet",
+            "GGA+U DFPT Dielectric": "pymatgen.io.vasp.sets.MPStaticSet",
+        },
+        description="Default input sets for task validation",
+    )
+
+    VASP_MAX_SCF_GRADIENT: float = Field(
+        1000,
+        description="Maximum upward gradient in the last SCF for any VASP calculation",
+    )
+
+    VASP_NUM_IONIC_STEPS_FOR_DRIFT: float = Field(
+        3,
+        description="Number of ionic steps to average over when validating drift forces",
+    )
+
+    model_config = SettingsConfigDict(env_prefix="pymatgen_io_validation_", extra="ignore")
+
+    @model_validator(mode="before")
+    @classmethod
+    def load_default_settings(cls, values):
+        """
+        Loads settings from a root file if available and uses that as defaults in
+        place of built in defaults
+        """
+        config_file_path: str = values.get("config_file", DEFAULT_CONFIG_FILE_PATH)
+
+        new_values = {}
+
+        if config_file_path.startswith("http"):
+            new_values = requests.get(config_file_path).json()
+        elif Path(config_file_path).exists():
+            with open(config_file_path) as f:
+                new_values = json.load(f)
+
+        new_values.update(values)
+
+        return new_values
+
+    @classmethod
+    def autoload(cls: Type[S], settings: Union[None, dict, S]) -> S:
+        if settings is None:
+            return cls()
+        elif isinstance(settings, dict):
+            return cls(**settings)
+        return settings
+
+    @field_validator("VASP_DEFAULT_INPUT_SETS", mode="before")
+    @classmethod
+    def convert_input_sets(cls, value):
+        if isinstance(value, dict):
+            return {k: MontyDecoder().process_decoded(v) for k, v in value.items()}
+        return value
+
+    def as_dict(self):
+        """
+        HotPatch to enable serializing IOValidationSettings via Monty
+        """
+        return self.dict(exclude_unset=True, exclude_defaults=True)
diff --git a/pymatgen/io/validation/validation.py b/pymatgen/io/validation/validation.py
@@ -10,7 +10,8 @@
 
 from pymatgen.io.vasp.sets import VaspInputSet
 
-# TODO: why MPMetalRelaxSet
+# TODO: AK: why MPMetalRelaxSet 
+# TODO: MK: because more kpoints are needed for metals given the more complicated Fermi surfaces, and MPMetalRelaxSet uses more kpoints
 from pymatgen.io.vasp.sets import MPMetalRelaxSet
 from pymatgen.io.vasp.inputs import Potcar
 
@@ -64,33 +65,36 @@ class Config:
     def from_task_doc(
         cls,
         task_doc: TaskDocument,
+        input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,        
+        potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
         kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE,
-        kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE,  # TODO Usused currently,needed?
-        input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
-        LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS,  # TODO Usused currently,needed?
+        allow_kpoint_shifts: bool = SETTINGS.VASP_ALLOW_KPT_SHIFT,
+        allow_explicit_kpoint_mesh: Union[str, bool] = SETTINGS.VASP_ALLOW_EXPLICIT_KPT_MESH,
+        fft_grid_tolerance: float = SETTINGS.VASP_FFT_GRID_TOLERANCE,
+        num_ionic_steps_to_avg_drift_over: float = SETTINGS.VASP_NUM_IONIC_STEPS_FOR_DRIFT,
         max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
-        potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
     ) -> "ValidationDoc":
         """
         Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
 
         Args:
             task_doc: the task document to process
-            kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
-            kspacing_tolerance:  the tolerance to allow kspacing to lag behind the input set settings
             input_sets: a dictionary of task_types -> pymatgen input set for validation
-            pseudo_dir: directory of pseudopotential directory to ensure correct hashes
-            LDAU_fields: LDAU fields to check for consistency
+            potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
+            kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
+            allow_kpoint_shifts: Whether to consider a task valid if kpoints are shifted by the user
+            allow_explicit_kpoint_mesh: Whether to consider a task valid if the user defines an explicit kpoint mesh
+            fft_grid_tolerance: Relative tolerance for FFT grid parameters to still be a valid
+            num_ionic_steps_to_avg_drift_over: Number of ionic steps to average over when validating drift forces
             max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
                 initial equillibriation period. Note this is in eV per atom.
-            potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
         """
 
         bandgap = task_doc.output.bandgap
         calcs_reversed = task_doc.calcs_reversed
         calcs_reversed = [
             calc.dict() for calc in calcs_reversed
-        ]  # convert to dictionary to use built-in `.get()` method       ###################################################
+        ]  # convert to dictionary to use built-in `.get()` method
 
         parameters = (
             task_doc.input.parameters
@@ -119,13 +123,6 @@ def from_task_doc(
         task_type = _get_task_type(calcs_reversed, orig_inputs)
         run_type = _get_run_type(calcs_reversed)
 
-        # maybe move following to settings -->
-        num_ionic_steps_to_avg_drift_over = 3
-        fft_grid_tolerance = 0.9
-        allow_kpoint_shifts = False
-        allow_explicit_kpoint_mesh = "auto"  # could also be True or False
-        # <--
-
         if allow_explicit_kpoint_mesh == "auto":
             if "NSCF" in calc_type.name:
                 allow_explicit_kpoint_mesh = True
@@ -169,15 +166,15 @@ def from_task_doc(
             )
         elif valid_input_set:
             # Get subset of POTCAR summary stats to validate calculation
-            allowed_potcar_stats = {}
+            valid_potcar_summary_stats = {}
             for valid_potcar in valid_input_set.potcar:
                 titel_no_spc = valid_potcar.TITEL.replace(" ", "")
-                allowed_potcar_stats[titel_no_spc] = potcar_summary_stats[
+                valid_potcar_summary_stats[titel_no_spc] = potcar_summary_stats[
                     valid_input_set._config_dict["POTCAR_FUNCTIONAL"]
                 ][titel_no_spc].copy()
 
             if potcar_summary_stats:
-                _check_potcars(reasons, warnings, potcar, calc_type, allowed_potcar_stats)
+                _check_potcars(reasons, warnings, potcar, valid_potcar_summary_stats)
 
             # TODO: check for surface/slab calculations!!!!!!
 
@@ -249,26 +246,29 @@ def from_task_doc(
     def from_directory(
         cls,
         dir_name: Union[Path, str],
+        input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,        
+        potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
         kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE,
-        kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE,
-        input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
-        LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS,  # TODO Unused
+        allow_kpoint_shifts: bool = SETTINGS.VASP_ALLOW_KPT_SHIFT,
+        allow_explicit_kpoint_mesh: Union[str, bool] = SETTINGS.VASP_ALLOW_EXPLICIT_KPT_MESH,
+        fft_grid_tolerance: float = SETTINGS.VASP_FFT_GRID_TOLERANCE,
+        num_ionic_steps_to_avg_drift_over: float = SETTINGS.VASP_NUM_IONIC_STEPS_FOR_DRIFT,
         max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
-        potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats,
     ) -> "ValidationDoc":
         """
         Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
 
         Args:
             dir_name: the directory containing the calculation files to process
-            kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
-            kspacing_tolerance:  the tolerance to allow kspacing to lag behind the input set settings
             input_sets: a dictionary of task_types -> pymatgen input set for validation
-            pseudo_dir: directory of pseudopotential directory to ensure correct hashes
-            LDAU_fields: LDAU fields to check for consistency
+            potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data.
+            kpts_tolerance: the tolerance to allow kpts to lag behind the input set settings
+            allow_kpoint_shifts: Whether to consider a task valid if kpoints are shifted by the user
+            allow_explicit_kpoint_mesh: Whether to consider a task valid if the user defines an explicit kpoint mesh
+            fft_grid_tolerance: Relative tolerance for FFT grid parameters to still be a valid
+            num_ionic_steps_to_avg_drift_over: Number of ionic steps to average over when validating drift forces
             max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
                 initial equillibriation period. Note this is in eV per atom.
-            potcar_summary_stats: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value.
         """
         try:
             task_doc = TaskDoc.from_directory(
@@ -278,12 +278,14 @@ def from_directory(
 
             validation_doc = ValidationDoc.from_task_doc(
                 task_doc=task_doc,
-                kpts_tolerance=kpts_tolerance,
-                kspacing_tolerance=kspacing_tolerance,
                 input_sets=input_sets,
-                LDAU_fields=LDAU_fields,  # TODO Unused
-                max_allowed_scf_gradient=max_allowed_scf_gradient,
                 potcar_summary_stats=potcar_summary_stats,
+                kpts_tolerance=kpts_tolerance,
+                allow_kpoint_shifts=allow_kpoint_shifts,
+                allow_explicit_kpoint_mesh=allow_explicit_kpoint_mesh,
+                fft_grid_tolerance=fft_grid_tolerance,
+                num_ionic_steps_to_avg_drift_over=num_ionic_steps_to_avg_drift_over,
+                max_allowed_scf_gradient=max_allowed_scf_gradient,
             )
 
             return validation_doc
@@ -308,7 +310,7 @@ def _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandga
         CalcType.PBE_U_Structure_Optimization,
     ]
 
-    # Ensure inputsets get proper additional input values
+    # Ensure input sets get proper additional input values
     if "SCAN" in run_type.value:
         valid_input_set: VaspInputSet = input_sets[str(calc_type)](structure, bandgap=bandgap)  # type: ignore
 
@@ -343,7 +345,6 @@ def _check_potcars(
     reasons,
     warnings,
     potcars: Potcar,
-    calc_type,
     valid_potcar_summary_stats: Dict[str, ImportString],
     data_match_tol: float = 1e-6,
 ):
@@ -352,9 +353,6 @@ def _check_potcars(
     pymatgen input set.
     """
 
-    # TODO: Update potcar checks. Whether using hashing or not!
-    # AK - added summary stats check, removed hash check
-
     if potcars is None:
         reasons.append(
             "PSEUDOPOTENTIALS --> Missing POTCAR files. "