From 2b654d3c4d2f3e61d47005068726c8e09c8b0555 Mon Sep 17 00:00:00 2001 From: Aaron Kaplan Date: Thu, 26 Oct 2023 17:04:45 -0700 Subject: [PATCH] Made package installable via pip (pymatgen.io.validation) and added potcar validation using potcar_summary_stats from PMG, temporarily disabling mypy on pre-commit due to clash of module / package name in PMG --- .pre-commit-config.yaml | 15 +- pymatgen/io/validation/__init__.py | 4 + pymatgen/io/validation/check_common_errors.py | 83 +-- .../check_for_excess_empty_space.py | 26 +- pymatgen/io/validation/check_incar.py | 583 +++++++++++------- .../io/validation/check_kpoints_kspacing.py | 67 +- pymatgen/io/validation/compare_to_MP_ehull.py | 28 +- pymatgen/io/validation/validation.py | 354 ++++++----- setup.py | 14 +- 9 files changed, 694 insertions(+), 480 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3b387fd..7efa0b4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ exclude: ^(docs|.*test_files|cmd_line|dev_scripts) default_language_version: - python: python3.8 + python: python3.11 ci: autoupdate_schedule: monthly @@ -20,11 +20,6 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/PyCQA/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - - repo: https://github.com/asottile/pyupgrade rev: v3.10.1 hooks: @@ -42,7 +37,7 @@ repos: - --expand-star-imports - --ignore-init-module-imports - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 - hooks: - - id: mypy +# - repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.5.1 +# hooks: +# - id: mypy diff --git a/pymatgen/io/validation/__init__.py b/pymatgen/io/validation/__init__.py index 0ac746c..1b77b97 100644 --- a/pymatgen/io/validation/__init__.py +++ b/pymatgen/io/validation/__init__.py @@ -3,3 +3,7 @@ simulations and calculations. That is, it checks calculation details against a reference to ensure that data is compatible with some standard. """ + +from __future__ import annotations + +from .validation import ValidationDoc diff --git a/pymatgen/io/validation/check_common_errors.py b/pymatgen/io/validation/check_common_errors.py index 20cf5ec..67759f7 100644 --- a/pymatgen/io/validation/check_common_errors.py +++ b/pymatgen/io/validation/check_common_errors.py @@ -1,16 +1,17 @@ import numpy as np + def _check_common_errors( - reasons, - warnings, - task_doc, - calcs_reversed, - parameters, - incar, - structure, - valid_max_allowed_scf_gradient, - ionic_steps, - num_ionic_steps_to_avg_drift_over + reasons, + warnings, + task_doc, + calcs_reversed, + parameters, + incar, + structure, + valid_max_allowed_scf_gradient, + ionic_steps, + num_ionic_steps_to_avg_drift_over, ): # Check for cases where both GGA and METAGGA are set. This should *not* be allowed, as it can erroneously change # the outputted energy significantly. See https://github.com/materialsproject/atomate2/issues/453#issuecomment-1699605867 @@ -22,7 +23,6 @@ def _check_common_errors( "for more information." ) - # check if structure electronically converged final_esteps = ionic_steps[-1]["electronic_steps"] if incar.get("ALGO", "Normal").lower() != "chi" else 0 # In a response function run there is no ionic steps, there is no scf step @@ -31,45 +31,48 @@ def _check_common_errors( to_check = {"e_wo_entrp", "e_fr_energy", "e_0_energy"} while set(final_esteps[i]) == to_check: i += 1 - is_converged = (i + 1 != parameters.get("NELM", 60)) + is_converged = i + 1 != parameters.get("NELM", 60) elif len(final_esteps) < parameters.get("NELM", 60): is_converged = True else: is_converged = False - + if not is_converged: - reasons.append(f"CONVERGENCE --> Did not achieve electronic convergence in the final ionic step. NELM should be increased.") - - + reasons.append( + "CONVERGENCE --> Did not achieve electronic convergence in the final ionic step. NELM should be increased." + ) + # Check if drift force is too large try: all_drift_forces = calcs_reversed[0]["output"]["outcar"]["drift"] if len(all_drift_forces) < num_ionic_steps_to_avg_drift_over: drift_forces_to_avg_over = all_drift_forces else: - drift_forces_to_avg_over = all_drift_forces[::-1][: num_ionic_steps_to_avg_drift_over] + drift_forces_to_avg_over = all_drift_forces[::-1][:num_ionic_steps_to_avg_drift_over] drift_mags_to_avg_over = [np.linalg.norm(drift_forces) for drift_forces in drift_forces_to_avg_over] cur_avg_drift_mag = np.average(drift_mags_to_avg_over) - + valid_max_drift = 0.05 if cur_avg_drift_mag > valid_max_drift: - reasons.append(f"CONVERGENCE --> Excessive drift of {round(cur_avg_drift_mag,4)} eV/A is greater than allowed "\ - f"value of {valid_max_drift} eV/A.") - except Exception as e: + reasons.append( + f"CONVERGENCE --> Excessive drift of {round(cur_avg_drift_mag,4)} eV/A is greater than allowed " + f"value of {valid_max_drift} eV/A." + ) + except Exception: warnings.append("Drift forces not contained in calcs_reversed! Can not check for excessive drift.") - - + # Check for excessively positive final energies (which usually indicates a bad structure) valid_max_energy_per_atom = 50 cur_final_energy_per_atom = task_doc.output.energy_per_atom if cur_final_energy_per_atom > valid_max_energy_per_atom: - reasons.append(f"LARGE POSITIVE FINAL ENERGY --> Final energy is {round(cur_final_energy_per_atom,4)} eV/atom, which is "\ - f"greater than the maximum allowed value of {valid_max_energy_per_atom} eV/atom.") - + reasons.append( + f"LARGE POSITIVE FINAL ENERGY --> Final energy is {round(cur_final_energy_per_atom,4)} eV/atom, which is " + f"greater than the maximum allowed value of {valid_max_energy_per_atom} eV/atom." + ) # Check for excessively large final magnetic moments - ### TODO: make this also work for elements Gd and Eu, which have magmoms >5 in at least one of their pure structures + # TODO: make this also work for elements Gd and Eu, which have magmoms >5 in at least one of their pure structures valid_max_magmoms = {"Gd": 10, "Eu": 10} cur_magmoms = [abs(mag["tot"]) for mag in calcs_reversed[0]["output"]["outcar"]["magnetization"]] bad_site_magmom_msgs = [] @@ -87,36 +90,34 @@ def _check_common_errors( f"at least one {cur_site_ele} site with magmom greater than {cur_site_max_allowed_magmom}" ) if len(bad_site_magmom_msgs) > 0: - msg = f"MAGNETISM --> Final structure contains sites with magnetic moments that are very likely erroneous. This includes: " - msg = msg + "; ".join(set(bad_site_magmom_msgs)) + "." + msg = "MAGNETISM --> Final structure contains sites with magnetic moments that are very likely erroneous. This includes: " + msg = msg + "; ".join(set(bad_site_magmom_msgs)) + "." reasons.append(msg) - # Check for a SCF gradient that is too large (usually indicates unstable calculations) # NOTE: do NOT use `e_0_energy`, as there is a bug in the vasprun.xml when printing that variable # (see https://www.vasp.at/forum/viewtopic.php?t=16942 for more details). skip = abs(parameters.get("NELMDL", -5)) - 1 - energies = [ - d["e_fr_energy"] - for d in ionic_steps[-1]["electronic_steps"] - ] + energies = [d["e_fr_energy"] for d in ionic_steps[-1]["electronic_steps"]] if len(energies) > skip: cur_max_gradient = np.max(np.gradient(energies)[skip:]) cur_max_gradient_per_atom = cur_max_gradient / structure.num_sites if cur_max_gradient_per_atom > valid_max_allowed_scf_gradient: - warnings.append(f"STABILITY --> The max SCF gradient is {round(cur_max_gradient_per_atom,4)} eV/atom, which is larger than the typical max expected value of {valid_max_allowed_scf_gradient} eV/atom. "\ - f"This sometimes indicates an unstable calculation.") + warnings.append( + f"STABILITY --> The max SCF gradient is {round(cur_max_gradient_per_atom,4)} eV/atom, which is larger than the typical max expected value of {valid_max_allowed_scf_gradient} eV/atom. " + f"This sometimes indicates an unstable calculation." + ) else: warnings.append( - "Not enough electronic steps to compute valid gradient" - " and compare with max SCF gradient tolerance." + "Not enough electronic steps to compute valid gradient" " and compare with max SCF gradient tolerance." ) - # Check for Am and Po elements. These currently do not have proper elemental entries # and will not get treated properly by the thermo builder. chemsys = task_doc.chemsys if ("Am" in chemsys) or ("Po" in chemsys): - reasons.append("COMPOSITION --> Your structure contains the elements Am and/or Po, which are not currently being accepted.") + reasons.append( + "COMPOSITION --> Your structure contains the elements Am and/or Po, which are not currently being accepted." + ) - return reasons \ No newline at end of file + return reasons diff --git a/pymatgen/io/validation/check_for_excess_empty_space.py b/pymatgen/io/validation/check_for_excess_empty_space.py index 1ed2dc6..71ca7e1 100644 --- a/pymatgen/io/validation/check_for_excess_empty_space.py +++ b/pymatgen/io/validation/check_for_excess_empty_space.py @@ -1,30 +1,30 @@ from pymatgen.analysis.local_env import VoronoiNN import numpy as np + def check_for_excess_empty_space(structure): - ##### Check 1: find large gaps along one of the defined lattice vectors + # Check 1: find large gaps along one of the defined lattice vectors lattice_vec_lengths = structure.lattice.lengths fcoords = np.array(structure.frac_coords) - lattice_vec_1_frac_coords = list(np.sort(fcoords[:,0])) + lattice_vec_1_frac_coords = list(np.sort(fcoords[:, 0])) lattice_vec_1_frac_coords.append(lattice_vec_1_frac_coords[0] + 1) lattice_vec_1_diffs = np.diff(lattice_vec_1_frac_coords) max_lattice_vec_1_empty_dist = max(lattice_vec_1_diffs) * lattice_vec_lengths[0] - lattice_vec_2_frac_coords = list(np.sort(fcoords[:,1])) + lattice_vec_2_frac_coords = list(np.sort(fcoords[:, 1])) lattice_vec_2_frac_coords.append(lattice_vec_2_frac_coords[0] + 1) lattice_vec_2_diffs = np.diff(lattice_vec_2_frac_coords) max_lattice_vec_2_empty_dist = max(lattice_vec_2_diffs) * lattice_vec_lengths[1] - lattice_vec_3_frac_coords = list(np.sort(fcoords[:,2])) + lattice_vec_3_frac_coords = list(np.sort(fcoords[:, 2])) lattice_vec_3_frac_coords.append(lattice_vec_3_frac_coords[0] + 1) lattice_vec_3_diffs = np.diff(lattice_vec_3_frac_coords) max_lattice_vec_3_empty_dist = max(lattice_vec_3_diffs) * lattice_vec_lengths[2] - - max_empty_distance = max(max_lattice_vec_1_empty_dist, max_lattice_vec_2_empty_dist, max_lattice_vec_3_empty_dist) - - ##### Check 2: get max voronoi polyhedra volume in structure + max_empty_distance = max(max_lattice_vec_1_empty_dist, max_lattice_vec_2_empty_dist, max_lattice_vec_3_empty_dist) + + # Check 2: get max voronoi polyhedra volume in structure def get_max_voronoi_polyhedra_volume(structure): max_voronoi_polyhedra_vol = 0 vnn = VoronoiNN().get_all_voronoi_polyhedra(structure) @@ -39,20 +39,20 @@ def get_max_voronoi_polyhedra_volume(structure): try: max_voronoi_polyhedra_vol = get_max_voronoi_polyhedra_volume(structure) except Exception as e: - if "No Voronoi neighbors found for site - try increasing cutoff".lower() in str(e).lower(): try: - structure.make_supercell(2) # to circumvent weird issue with voronoi class, though this decreases performance significantly. + structure.make_supercell( + 2 + ) # to circumvent weird issue with voronoi class, though this decreases performance significantly. max_voronoi_polyhedra_vol = get_max_voronoi_polyhedra_volume(structure) - except Exception as e: + except Exception: pass if "infinite vertex in the Voronoi construction".lower() in str(e).lower(): print(f"{str(e)} As a result, this structure is marked as having excess empty space.") max_voronoi_polyhedra_vol = np.inf - if (max_voronoi_polyhedra_vol > 25) or (max_voronoi_polyhedra_vol > 5 and max_empty_distance > 7.5): return True else: - return False \ No newline at end of file + return False diff --git a/pymatgen/io/validation/check_incar.py b/pymatgen/io/validation/check_incar.py index 611badd..52d7380 100644 --- a/pymatgen/io/validation/check_incar.py +++ b/pymatgen/io/validation/check_incar.py @@ -9,10 +9,10 @@ def _check_incar( structure, task_doc, calcs_reversed, - ionic_steps, + ionic_steps, nionic_steps, parameters, - incar, + incar, potcar, vasp_major_version, vasp_minor_version, @@ -20,7 +20,7 @@ def _check_incar( task_type, fft_grid_tolerance, ): - # note that all changes to `reasons` and `warnings` can be done in-place (and hence there is no need to return those variables after every functionc all). + # note that all changes to `reasons` and `warnings` can be done in-place (and hence there is no need to return those variables after every functionc all). # # Any cases where that is not done is just to make the code more readable. I didn't think that would be necessary here. _check_chemical_shift_params(reasons, parameters, valid_input_set) _check_dipol_correction_params(reasons, parameters, valid_input_set) @@ -28,19 +28,32 @@ def _check_incar( _check_electronic_projector_params(reasons, parameters, incar, valid_input_set) _check_fft_params(reasons, parameters, incar, valid_input_set, structure, fft_grid_tolerance) _check_hybrid_functional_params(reasons, parameters, valid_input_set) - _check_ionic_params(reasons, warnings, parameters, valid_input_set, task_doc, calcs_reversed, nionic_steps, ionic_steps, structure) + _check_ionic_params( + reasons, warnings, parameters, valid_input_set, task_doc, calcs_reversed, nionic_steps, ionic_steps, structure + ) _check_ismear_and_sigma(reasons, warnings, parameters, task_doc, ionic_steps, nionic_steps, structure) _check_lmaxmix_and_lmaxtau(reasons, warnings, parameters, incar, valid_input_set, structure, task_type) _check_magnetism_params(reasons, parameters, valid_input_set) - _check_misc_params(reasons, warnings, parameters, incar, valid_input_set, calcs_reversed, vasp_major_version, vasp_minor_version, structure) + _check_misc_params( + reasons, + warnings, + parameters, + incar, + valid_input_set, + calcs_reversed, + vasp_major_version, + vasp_minor_version, + structure, + ) _check_precision_params(reasons, parameters, valid_input_set) _check_startup_params(reasons, parameters, incar, valid_input_set) _check_symmetry_params(reasons, parameters, valid_input_set) _check_u_params(reasons, incar, parameters, valid_input_set) _check_write_params(reasons, parameters, valid_input_set) - + return reasons + def _get_default_nbands(structure, parameters, nelect): """ This method is copied from the `estimate_nbands` function in pymatgen.io.vasp.sets.py. @@ -48,62 +61,67 @@ def _get_default_nbands(structure, parameters, nelect): up the psp_resources for pymatgen. """ nions = len(structure.sites) - + if parameters.get("ISPIN", "1") == 1: nmag = 0 else: - nmag = sum(parameters.get("MAGMOM",[0])) - nmag = np.floor((nmag+1)/2) - - possible_val_1 = np.floor((nelect+2)/2) + max(np.floor(nions/2),3) - possible_val_2 = np.floor(nelect*0.6) - + nmag = sum(parameters.get("MAGMOM", [0])) + nmag = np.floor((nmag + 1) / 2) + + possible_val_1 = np.floor((nelect + 2) / 2) + max(np.floor(nions / 2), 3) + possible_val_2 = np.floor(nelect * 0.6) + default_nbands = max(possible_val_1, possible_val_2) + nmag - + if "LNONCOLLINEAR" in parameters.keys(): - if parameters["LNONCOLLINEAR"] == True: + if parameters["LNONCOLLINEAR"]: default_nbands = default_nbands * 2 - + if "NPAR" in parameters.keys(): npar = parameters["NPAR"] - default_nbands = (np.floor((default_nbands+npar-1)/npar))*npar - + default_nbands = (np.floor((default_nbands + npar - 1) / npar)) * npar + return int(default_nbands) -def _get_default_nelect(structure, valid_input_set, potcar = None): +def _get_default_nelect(structure, valid_input_set, potcar=None): # for parsing raw calculation files or for users without the VASP pseudopotentials set up in the pymatgen `psp_resources` directory - if potcar != None: - zval_dict = {p.symbol.split("_")[0]: p.zval for p in potcar} # num of electrons each species should have according to the POTCAR - # change something like "Fe_pv" to just "Fe" for easier matching of species + if potcar is not None: + zval_dict = { + p.symbol.split("_")[0]: p.zval for p in potcar + } # num of electrons each species should have according to the POTCAR + # change something like "Fe_pv" to just "Fe" for easier matching of species default_nelect = 0 for site in structure.sites: default_nelect += zval_dict[site.species_string] # else try using functions that require the `psp_resources` directory to be set up for pymatgen. else: default_nelect = valid_input_set.nelect - + return int(default_nelect) - -def _get_valid_ismears_and_sigma(parameters, bandgap, nionic_steps): - - extra_comments_for_ismear_and_sigma = f"This is flagged as incorrect because this calculation had a bandgap of {round(bandgap,3)}" - if bandgap > 1e-4: # value taken from https://github.com/materialsproject/pymatgen/blob/1f98fa21258837ac174105e00e7ac8563e119ef0/pymatgen/io/vasp/sets.py#L969 - valid_ismears = [-5,0] +def _get_valid_ismears_and_sigma(parameters, bandgap, nionic_steps): + extra_comments_for_ismear_and_sigma = ( + f"This is flagged as incorrect because this calculation had a bandgap of {round(bandgap,3)}" + ) + + if ( + bandgap > 1e-4 + ): # value taken from https://github.com/materialsproject/pymatgen/blob/1f98fa21258837ac174105e00e7ac8563e119ef0/pymatgen/io/vasp/sets.py#L969 + valid_ismears = [-5, 0] valid_sigma = 0.05 else: - valid_ismears = [0,1,2] + valid_ismears = [0, 1, 2] cur_nsw = parameters.get("NSW", 0) if cur_nsw == 0: - valid_ismears.append(-5) # ISMEAR = -5 is valid for metals *only* when doing static calc + valid_ismears.append(-5) # ISMEAR = -5 is valid for metals *only* when doing static calc extra_comments_for_ismear_and_sigma += " and is a static calculation" else: extra_comments_for_ismear_and_sigma += " and is a non-static calculation" valid_sigma = 0.2 extra_comments_for_ismear_and_sigma += "." - + return valid_ismears, valid_sigma, extra_comments_for_ismear_and_sigma @@ -112,29 +130,29 @@ def _check_chemical_shift_params(reasons, parameters, valid_input_set): default_lchimag = False valid_lchimag = valid_input_set.incar.get("LCHIMAG", default_lchimag) _check_required_params(reasons, parameters, "LCHIMAG", default_lchimag, valid_lchimag) - + # LNMR_SYM_RED. default_lnmr_sym_red = False valid_lnmr_sym_red = valid_input_set.incar.get("LNMR_SYM_RED", default_lnmr_sym_red) _check_required_params(reasons, parameters, "LNMR_SYM_RED", default_lnmr_sym_red, valid_lnmr_sym_red) - + def _check_dipol_correction_params(reasons, parameters, valid_input_set): # LDIPOL. default_ldipol = False valid_ldipol = valid_input_set.incar.get("LDIPOL", default_ldipol) _check_required_params(reasons, parameters, "LDIPOL", default_ldipol, valid_ldipol) - + # LMONO. default_lmono = False valid_lmono = valid_input_set.incar.get("LMONO", default_lmono) _check_required_params(reasons, parameters, "LMONO", default_lmono, valid_lmono) - + # IDIPOL. default_idipol = 0 valid_idipol = valid_input_set.incar.get("IDIPOL", default_idipol) _check_required_params(reasons, parameters, "IDIPOL", default_idipol, valid_idipol) - + # EPSILON. default_epsilon = 1.0 valid_epsilon = valid_input_set.incar.get("EPSILON", default_epsilon) @@ -146,47 +164,59 @@ def _check_dipol_correction_params(reasons, parameters, valid_input_set): _check_required_params(reasons, parameters, "EFIELD", default_efield, valid_efield) -def _check_electronic_params(reasons, parameters, valid_input_set, structure, potcar = None): - +def _check_electronic_params(reasons, parameters, valid_input_set, structure, potcar=None): # EDIFF. Should be the same or smaller than in valid_input_set valid_ediff = valid_input_set.incar.get("EDIFF", 1e-4) _check_relative_params(reasons, parameters, "EDIFF", 1e-4, valid_ediff, "less than or equal to") - + # ENCUT. Should be the same or greater than in valid_input_set, as this can affect energies & other results. # *** Note: "ENCUT" is not actually detected by the `Vasprun.parameters` object from pymatgen.io.vasp.outputs. # Rather, the ENMAX tag in the `Vasprun.parameters` object contains the relevant value for ENCUT. - cur_encut = parameters.get("ENMAX", 0) + parameters.get("ENMAX", 0) valid_encut = valid_input_set.incar.get("ENCUT", np.inf) _check_relative_params(reasons, parameters, "ENMAX", 0, valid_encut, "greater than or equal to") - + # ENINI. Only check for IALGO = 48 / ALGO = VeryFast, as this is the only algo that uses this tag. if parameters.get("IALGO", 38) == 48: _check_relative_params(reasons, parameters, "ENINI", 0, valid_encut, "greater than or equal to") - - # ENAUG. Should only be checked for calculations where the relevant MP input set specifies ENAUG. + + # ENAUG. Should only be checked for calculations where the relevant MP input set specifies ENAUG. # In that case, ENAUG should be the same or greater than in valid_input_set. if "ENAUG" in valid_input_set.incar.keys(): - cur_enaug = parameters.get("ENAUG", 0) + parameters.get("ENAUG", 0) valid_enaug = valid_input_set.incar.get("ENAUG", np.inf) _check_relative_params(reasons, parameters, "ENAUG", 0, valid_enaug, "greater than or equal to") - + # IALGO. - valid_ialgos = [38, 58, 68, 90] # TODO: figure out if 'normal' algos every really affect results other than convergence + valid_ialgos = [ + 38, + 58, + 68, + 90, + ] # TODO: figure out if 'normal' algos every really affect results other than convergence _check_allowed_params(reasons, parameters, "IALGO", 38, valid_ialgos) - + # NELECT. - default_nelect = _get_default_nelect(structure, valid_input_set, potcar = potcar) + default_nelect = _get_default_nelect(structure, valid_input_set, potcar=potcar) _check_required_params(reasons, parameters, "NELECT", default_nelect, default_nelect) - + # NBANDS. - min_nbands = int(np.ceil(default_nelect/2) + 1) + min_nbands = int(np.ceil(default_nelect / 2) + 1) default_nbands = _get_default_nbands(structure, parameters, default_nelect) # check for too many bands (can lead to unphysical electronic structures, see https://github.com/materialsproject/custodian/issues/224 for more context too_many_bands_msg = "Too many bands can lead to unphysical electronic structure (see https://github.com/materialsproject/custodian/issues/224 for more context.)" - _check_relative_params(reasons, parameters, "NBANDS", default_nbands, 4*default_nbands, "less than or equal to", extra_comments_upon_failure=too_many_bands_msg) + _check_relative_params( + reasons, + parameters, + "NBANDS", + default_nbands, + 4 * default_nbands, + "less than or equal to", + extra_comments_upon_failure=too_many_bands_msg, + ) # check for too few bands (leads to degenerate energies) _check_relative_params(reasons, parameters, "NBANDS", default_nbands, min_nbands, "greater than or equal to") - + def _check_electronic_projector_params(reasons, parameters, incar, valid_input_set): # LREAL. Should be Auto or False (consistent with MP input sets). @@ -196,7 +226,7 @@ def _check_electronic_projector_params(reasons, parameters, incar, valid_input_s valid_lreals = ["FALSE", "AUTO", "A"] elif str(valid_input_set.incar.get("LREAL")).upper() in ["FALSE"]: valid_lreals = ["FALSE"] - + cur_lreal = str(incar.get("LREAL", "False")).upper() if cur_lreal not in valid_lreals: reasons.append(f"INPUT SETTINGS --> LREAL: is set to {cur_lreal}, but should be one of {valid_lreals}.") @@ -213,30 +243,36 @@ def _check_electronic_projector_params(reasons, parameters, incar, valid_input_s # # cur_lreal = str(incar.get("LREAL", "False")).upper() # # if cur_lreal not in valid_lreals: # # reasons.append(f"INPUT SETTINGS --> LREAL: is set to {cur_lreal}, but should be one of {valid_lreals}.") - - + # LMAXPAW. default_lmaxpaw = -100 valid_lmaxpaw = valid_input_set.incar.get("LMAXPAW", default_lmaxpaw) _check_required_params(reasons, parameters, "LMAXPAW", default_lmaxpaw, valid_lmaxpaw) - + # NLSPLINE. Should be False unless specified by the valid_input_set. default_nlspline = False valid_nlspline = valid_input_set.incar.get("NLSPLINE", default_nlspline) _check_required_params(reasons, parameters, "NLSPLINE", default_nlspline, valid_nlspline) - -def _check_fft_params(reasons, parameters, incar, valid_input_set, structure, fft_grid_tolerance,): +def _check_fft_params( + reasons, + parameters, + incar, + valid_input_set, + structure, + fft_grid_tolerance, +): # NGX/Y/Z and NGXF/YF/ZF. Not checked if not in INCAR file (as this means the VASP default was used). if any(i for i in ["NGX", "NGY", "NGZ", "NGXF", "NGYF", "NGZF"] if i in incar.keys()): - - cur_prec = parameters.get("PREC", "NORMAL").upper() + parameters.get("PREC", "NORMAL").upper() cur_encut = parameters.get("ENMAX", np.inf) - cur_enaug = parameters.get("ENAUG", np.inf) - + parameters.get("ENAUG", np.inf) + valid_encut_for_fft_grid_params = max(cur_encut, valid_input_set.incar.get("ENCUT")) - ([valid_ngx, valid_ngy, valid_ngz], [valid_ngxf, valid_ngyf, valid_ngzf]) = valid_input_set.calculate_ng(custom_encut = valid_encut_for_fft_grid_params) + ([valid_ngx, valid_ngy, valid_ngz], [valid_ngxf, valid_ngyf, valid_ngzf]) = valid_input_set.calculate_ng( + custom_encut=valid_encut_for_fft_grid_params + ) valid_ngx = int(valid_ngx * fft_grid_tolerance) valid_ngy = int(valid_ngy * fft_grid_tolerance) valid_ngz = int(valid_ngz * fft_grid_tolerance) @@ -245,13 +281,61 @@ def _check_fft_params(reasons, parameters, incar, valid_input_set, structure, ff valid_ngzf = int(valid_ngzf * fft_grid_tolerance) extra_comments_for_FFT_grid = "This likely means the number FFT grid points was modified by the user. If not, please create a GitHub issue." - - _check_relative_params(reasons, parameters, "NGX", np.inf, valid_ngx, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) - _check_relative_params(reasons, parameters, "NGY", np.inf, valid_ngy, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) - _check_relative_params(reasons, parameters, "NGZ", np.inf, valid_ngz, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) - _check_relative_params(reasons, parameters, "NGXF", np.inf, valid_ngxf, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) - _check_relative_params(reasons, parameters, "NGYF", np.inf, valid_ngyf, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) - _check_relative_params(reasons, parameters, "NGZF", np.inf, valid_ngzf, "greater than or equal to", extra_comments_upon_failure = extra_comments_for_FFT_grid) + + _check_relative_params( + reasons, + parameters, + "NGX", + np.inf, + valid_ngx, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) + _check_relative_params( + reasons, + parameters, + "NGY", + np.inf, + valid_ngy, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) + _check_relative_params( + reasons, + parameters, + "NGZ", + np.inf, + valid_ngz, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) + _check_relative_params( + reasons, + parameters, + "NGXF", + np.inf, + valid_ngxf, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) + _check_relative_params( + reasons, + parameters, + "NGYF", + np.inf, + valid_ngyf, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) + _check_relative_params( + reasons, + parameters, + "NGZF", + np.inf, + valid_ngzf, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_FFT_grid, + ) # ADDGRID. default_addgrid = False @@ -273,14 +357,14 @@ def _check_hybrid_functional_params(reasons, parameters, valid_input_set): default_aggax = 1.0 default_aldax = 1.0 default_amggax = 1.0 - + if valid_lhfcalc and parameters.get("AEXX", default_aexx) == 1: default_aldac = 0 default_amggac = 0 else: default_aldac = 1.0 default_amggac = 1.0 - + valid_aexx = valid_input_set.incar.get("AEXX", default_aexx) valid_aggac = valid_input_set.incar.get("AGGAC", default_aggac) valid_aggax = valid_input_set.incar.get("AGGAX", default_aggax) @@ -288,7 +372,7 @@ def _check_hybrid_functional_params(reasons, parameters, valid_input_set): valid_aldax = valid_input_set.incar.get("ALDAX", default_aldax) valid_amggac = valid_input_set.incar.get("AMGGAC", default_amggac) valid_amggax = valid_input_set.incar.get("AMGGAX", default_amggax) - + _check_required_params(reasons, parameters, "AEXX", default_aexx, valid_aexx, allow_close=True) _check_required_params(reasons, parameters, "AGGAC", default_aggac, valid_aggac, allow_close=True) _check_required_params(reasons, parameters, "AGGAX", default_aggax, valid_aggax, allow_close=True) @@ -297,9 +381,11 @@ def _check_hybrid_functional_params(reasons, parameters, valid_input_set): _check_required_params(reasons, parameters, "AMGGAC", default_amggac, valid_amggac, allow_close=True) _check_required_params(reasons, parameters, "AMGGAX", default_amggax, valid_amggax, allow_close=True) _check_required_params(reasons, parameters, "LHFCALC", False, valid_lhfcalc) - -def _check_ionic_params(reasons, warnings, parameters, valid_input_set, task_doc, calcs_reversed, nionic_steps, ionic_steps, structure): + +def _check_ionic_params( + reasons, warnings, parameters, valid_input_set, task_doc, calcs_reversed, nionic_steps, ionic_steps, structure +): # IBRION. default_ibrion = 0 if valid_input_set.incar.get("IBRION", default_ibrion) not in [-1, 1, 2]: @@ -308,11 +394,20 @@ def _check_ionic_params(reasons, warnings, parameters, valid_input_set, task_doc else: valid_ibrions = [-1, 1, 2] _check_allowed_params(reasons, parameters, "IBRION", default_ibrion, valid_ibrions) - - # ISIF. + + # ISIF. default_isif = 2 - valid_min_isif = 2 ###################################################################################################################################### - _check_relative_params(reasons, parameters, "ISIF", default_isif, valid_min_isif, "greater than or equal to", extra_comments_upon_failure = "ISIF values < 2 do not output the complete stress tensor.") + # TODO? valid_min_isif was highlighted before + valid_min_isif = 2 + _check_relative_params( + reasons, + parameters, + "ISIF", + default_isif, + valid_min_isif, + "greater than or equal to", + extra_comments_upon_failure="ISIF values < 2 do not output the complete stress tensor.", + ) # PSTRESS. default_pstress = 0.0 @@ -320,22 +415,34 @@ def _check_ionic_params(reasons, warnings, parameters, valid_input_set, task_doc _check_required_params(reasons, parameters, "PSTRESS", default_pstress, valid_pstress, allow_close=True) # POTIM. - if parameters.get("IBRION", 0) in [1,2,3,5,6]: # POTIM is only used for some IBRION values + if parameters.get("IBRION", 0) in [1, 2, 3, 5, 6]: # POTIM is only used for some IBRION values valid_max_potim = 5 - _check_relative_params(reasons, parameters, "POTIM", 0.5, valid_max_potim, "less than or equal to", extra_comments_upon_failure="POTIM being so high will likely lead to erroneous results.") + _check_relative_params( + reasons, + parameters, + "POTIM", + 0.5, + valid_max_potim, + "less than or equal to", + extra_comments_upon_failure="POTIM being so high will likely lead to erroneous results.", + ) # Check for large changes in energy between ionic steps (usually indicates too high POTIM) if nionic_steps > 1: # Do not use `e_0_energy`, as there is a bug in the vasprun.xml when printing that variable # (see https://www.vasp.at/forum/viewtopic.php?t=16942 for more details). - cur_ionic_step_energies = [ionic_step['e_fr_energy'] for ionic_step in ionic_steps] + cur_ionic_step_energies = [ionic_step["e_fr_energy"] for ionic_step in ionic_steps] cur_ionic_step_energy_gradient = np.diff(cur_ionic_step_energies) - cur_max_ionic_step_energy_change_per_atom = max(np.abs(cur_ionic_step_energy_gradient)) / structure.num_sites - valid_max_energy_change_per_atom = 1 + cur_max_ionic_step_energy_change_per_atom = ( + max(np.abs(cur_ionic_step_energy_gradient)) / structure.num_sites + ) + valid_max_energy_change_per_atom = 1 if cur_max_ionic_step_energy_change_per_atom > valid_max_energy_change_per_atom: - reasons.append(f"INPUT SETTINGS --> POTIM: The energy changed by a maximum of {cur_max_ionic_step_energy_change_per_atom} eV/atom "\ - f"between ionic steps, which is greater than the maximum allowed of {valid_max_energy_change_per_atom} eV/atom. "\ - f"This indicates that the POTIM is too high.") - + reasons.append( + f"INPUT SETTINGS --> POTIM: The energy changed by a maximum of {cur_max_ionic_step_energy_change_per_atom} eV/atom " + f"between ionic steps, which is greater than the maximum allowed of {valid_max_energy_change_per_atom} eV/atom. " + f"This indicates that the POTIM is too high." + ) + # SCALEE. default_scalee = 1.0 valid_scalee = valid_input_set.incar.get("SCALEE", default_scalee) @@ -344,84 +451,105 @@ def _check_ionic_params(reasons, warnings, parameters, valid_input_set, task_doc # EDIFFG. # Should be the same or smaller than in valid_input_set. Force-based cutoffs (not in every # every MP-compliant input set, but often have comparable or even better results) will also be accepted - ######## I am **NOT** confident that this should be the final check. Perhaps I need convincing (or perhaps it does indeed need to be changed...) - ######## TODO: -somehow identify if a material is a vdW structure, in which case force-convergence should maybe be more strict? + # I am **NOT** confident that this should be the final check. Perhaps I need convincing (or perhaps it does indeed need to be changed...) + # TODO: -somehow identify if a material is a vdW structure, in which case force-convergence should maybe be more strict? valid_ediff = valid_input_set.incar.get("EDIFF", 1e-4) - ediffg_in_input_set = valid_input_set.incar.get("EDIFFG", 10*valid_ediff) + ediffg_in_input_set = valid_input_set.incar.get("EDIFFG", 10 * valid_ediff) if ediffg_in_input_set > 0: valid_ediffg_energy = ediffg_in_input_set valid_ediffg_force = -0.05 elif ediffg_in_input_set < 0: - valid_ediffg_energy = 10*valid_ediff + valid_ediffg_energy = 10 * valid_ediff valid_ediffg_force = ediffg_in_input_set - if task_doc.output.forces == None: + if task_doc.output.forces is None: is_force_converged = False warnings.append("TaskDoc does not contain output forces!") else: - is_force_converged = all([(np.linalg.norm(force_on_atom) <= abs(valid_ediffg_force)) for force_on_atom in task_doc.output.forces]) + is_force_converged = all( + [(np.linalg.norm(force_on_atom) <= abs(valid_ediffg_force)) for force_on_atom in task_doc.output.forces] + ) - if parameters.get("NSW",0) == 0 or nionic_steps <= 1: - is_converged = is_force_converged ############################################################################################## + if parameters.get("NSW", 0) == 0 or nionic_steps <= 1: + # TODO? why was this highlighted with hashes? + is_converged = is_force_converged else: - energy_of_last_step = calcs_reversed[0]['output']['ionic_steps'][-1]['e_0_energy'] - energy_of_second_to_last_step = calcs_reversed[0]['output']['ionic_steps'][-2]['e_0_energy'] + energy_of_last_step = calcs_reversed[0]["output"]["ionic_steps"][-1]["e_0_energy"] + energy_of_second_to_last_step = calcs_reversed[0]["output"]["ionic_steps"][-2]["e_0_energy"] is_energy_converged = abs(energy_of_last_step - energy_of_second_to_last_step) <= valid_ediffg_energy is_converged = any([is_energy_converged, is_force_converged]) if not is_converged: reasons.append("CONVERGENCE --> Structure is not converged according to the EDIFFG.") - + def _check_ismear_and_sigma(reasons, warnings, parameters, task_doc, ionic_steps, nionic_steps, structure): bandgap = task_doc.output.bandgap - - valid_ismears, valid_sigma, extra_comments_for_ismear_and_sigma = _get_valid_ismears_and_sigma(parameters, bandgap, nionic_steps) + + valid_ismears, valid_sigma, extra_comments_for_ismear_and_sigma = _get_valid_ismears_and_sigma( + parameters, bandgap, nionic_steps + ) # ISMEAR. - _check_allowed_params(reasons, parameters, "ISMEAR", 1, valid_ismears, extra_comments_upon_failure=extra_comments_for_ismear_and_sigma) - + _check_allowed_params( + reasons, parameters, "ISMEAR", 1, valid_ismears, extra_comments_upon_failure=extra_comments_for_ismear_and_sigma + ) + # SIGMA. - ### TODO: improve logic SIGMA reasons given in the case where you have a material that should have been relaxed with ISMEAR in [-5, 0], but used ISMEAR in [1,2]. - ### Because in such cases, the user wouldn't need to update the SIGMA if they use tetrahedron smearing. + # TODO: improve logic SIGMA reasons given in the case where you have a material that should have been relaxed with ISMEAR in [-5, 0], but used ISMEAR in [1,2]. + # Because in such cases, the user wouldn't need to update the SIGMA if they use tetrahedron smearing. cur_ismear = parameters.get("ISMEAR", 1) - if cur_ismear not in [-5, -4, -2]: # SIGMA is not used by the tetrahedron method. - _check_relative_params(reasons, parameters, "SIGMA", 0.2, valid_sigma, "less than or equal to", extra_comments_upon_failure=extra_comments_for_ismear_and_sigma) + if cur_ismear not in [-5, -4, -2]: # SIGMA is not used by the tetrahedron method. + _check_relative_params( + reasons, + parameters, + "SIGMA", + 0.2, + valid_sigma, + "less than or equal to", + extra_comments_upon_failure=extra_comments_for_ismear_and_sigma, + ) else: - warnings.append(f"SIGMA is not being directly checked, as an ISMEAR of {cur_ismear} is being used. However, given the bandgap of {round(bandgap,3)}, the maximum SIGMA used should be {valid_sigma} if using an ISMEAR *not* in [-5, -4, -2].") - + warnings.append( + f"SIGMA is not being directly checked, as an ISMEAR of {cur_ismear} is being used. However, given the bandgap of {round(bandgap,3)}, the maximum SIGMA used should be {valid_sigma} if using an ISMEAR *not* in [-5, -4, -2]." + ) + # Also check if SIGMA is too large according to the VASP wiki, # which occurs when the entropy term in the energy is greater than 1 meV/atom. all_eentropies_per_atom = [] for ionic_step in ionic_steps: - electronic_steps = ionic_step['electronic_steps'] + electronic_steps = ionic_step["electronic_steps"] # print(electronic_steps) for elec_step in electronic_steps: - if 'eentropy' in elec_step.keys(): - if elec_step['eentropy'] != None: - all_eentropies_per_atom.append(elec_step['eentropy'] / structure.num_sites) - + if "eentropy" in elec_step.keys(): + if elec_step["eentropy"] is not None: + all_eentropies_per_atom.append(elec_step["eentropy"] / structure.num_sites) + cur_max_eentropy_per_atom = max(abs(np.array(all_eentropies_per_atom))) valid_max_eentropy_per_atom = 0.001 - + if cur_max_eentropy_per_atom > valid_max_eentropy_per_atom: - reasons.append(f"INPUT SETTINGS --> SIGMA: The entropy term (T*S) in the energy was {round(1000 * cur_max_eentropy_per_atom, 3)} meV/atom, which is "\ - f"greater than the {round(1000 * valid_max_eentropy_per_atom, 1)} meV/atom maximum suggested in the VASP wiki. "\ - f"Thus, SIGMA should be decreased.") + reasons.append( + f"INPUT SETTINGS --> SIGMA: The entropy term (T*S) in the energy was {round(1000 * cur_max_eentropy_per_atom, 3)} meV/atom, which is " + f"greater than the {round(1000 * valid_max_eentropy_per_atom, 1)} meV/atom maximum suggested in the VASP wiki. " + f"Thus, SIGMA should be decreased." + ) def _check_lmaxmix_and_lmaxtau(reasons, warnings, parameters, incar, valid_input_set, structure, task_type): """ - Check that LMAXMIX and LMAXTAU are above the required value. Also ensure that they are not greater than 6, + Check that LMAXMIX and LMAXTAU are above the required value. Also ensure that they are not greater than 6, as that is inadvisable according to the VASP development team (as of writing this in August 2023). """ - + valid_lmaxmix = valid_input_set.incar.get("LMAXMIX", 2) valid_lmaxtau = min(valid_lmaxmix + 2, 6) - lmaxmix_or_lmaxtau_too_high_msg = "From empirical testing, using LMAXMIX and / or LMAXTAU > 6 appears to introduce computational instabilities, " \ + lmaxmix_or_lmaxtau_too_high_msg = ( + "From empirical testing, using LMAXMIX and / or LMAXTAU > 6 appears to introduce computational instabilities, " "and is currently inadvisable according to the VASP development team." - + ) + # LMAXMIX. cur_lmaxmix = parameters.get("LMAXMIX", 2) if (cur_lmaxmix < valid_lmaxmix) or (cur_lmaxmix > 6): @@ -432,7 +560,7 @@ def _check_lmaxmix_and_lmaxtau(reasons, warnings, parameters, incar, valid_input # add additional context for invalidation if user set LMAXMIX > 6 if cur_lmaxmix > 6: lmaxmix_msg += lmaxmix_or_lmaxtau_too_high_msg - + # Either add to reasons or warnings depending on task type (as this affects NSCF calcs the most) # @ Andrew Rosen, is this an adequate check? Or should we somehow also be checking for cases where # a previous SCF calc used the wrong LMAXMIX too? @@ -440,124 +568,132 @@ def _check_lmaxmix_and_lmaxtau(reasons, warnings, parameters, incar, valid_input reasons.append(lmaxmix_msg) else: warnings.append(lmaxmix_msg) - - + # LMAXTAU. Only check for METAGGA calculations if incar.get("METAGGA", None) not in ["--", None, "None"]: - # cannot check LMAXTAU in the `Vasprun.parameters` object, as LMAXTAU is not printed to the parameters. Rather, we must check the INCAR. cur_lmaxtau = incar.get("LMAXTAU", 6) if (cur_lmaxtau < valid_lmaxtau) or (cur_lmaxtau > 6): if valid_lmaxtau < 6: lmaxtau_msg = f"INPUT SETTINGS --> LMAXTAU: value is set to {cur_lmaxtau}, but should be between {valid_lmaxtau} and 6." else: - lmaxtau_msg = f"INPUT SETTINGS --> LMAXTAU: value is set to {cur_lmaxtau}, but should be {valid_lmaxtau}." + lmaxtau_msg = ( + f"INPUT SETTINGS --> LMAXTAU: value is set to {cur_lmaxtau}, but should be {valid_lmaxtau}." + ) # add additional context for invalidation if user set LMAXTAU > 6 if cur_lmaxtau > 6: lmaxtau_msg += lmaxmix_or_lmaxtau_too_high_msg - + reasons.append(lmaxtau_msg) - + def _check_magnetism_params(reasons, parameters, valid_input_set): # LNONCOLLINEAR. default_lnoncollinear = False valid_lnoncollinear = valid_input_set.incar.get("LNONCOLLINEAR", default_lnoncollinear) _check_required_params(reasons, parameters, "LNONCOLLINEAR", default_lnoncollinear, valid_lnoncollinear) - + # LSORBIT. default_lsorbit = False valid_lsorbit = valid_input_set.incar.get("LSORBIT", default_lsorbit) _check_required_params(reasons, parameters, "LSORBIT", default_lsorbit, valid_lsorbit) - -def _check_misc_params(reasons, warnings, parameters, incar, valid_input_set, calcs_reversed, vasp_major_version, vasp_minor_version, structure): - + +def _check_misc_params( + reasons, + warnings, + parameters, + incar, + valid_input_set, + calcs_reversed, + vasp_major_version, + vasp_minor_version, + structure, +): # DEPER. valid_deper = valid_input_set.incar.get("DEPER", 0.3) _check_required_params(reasons, parameters, "DEPER", 0.3, valid_deper, allow_close=True) - + # EBREAK. valid_ebreak = valid_input_set.incar.get("EBREAK", 0.0) _check_required_params(reasons, parameters, "EBREAK", 0.0, valid_ebreak, allow_close=True) - + # GGA_COMPAT. valid_gga_compat = valid_input_set.incar.get("GGA_COMPAT", True) _check_required_params(reasons, parameters, "GGA_COMPAT", True, valid_gga_compat) - + # ICORELEVEL. valid_icorelevel = valid_input_set.incar.get("ICORELEVEL", 0) _check_required_params(reasons, parameters, "ICORELEVEL", 0, valid_icorelevel) - + # IMAGES. valid_images = valid_input_set.incar.get("IMAGES", 0) _check_required_params(reasons, parameters, "IMAGES", 0, valid_images) - + # IVDW. valid_ivdw = valid_input_set.incar.get("IVDW", 0) _check_required_params(reasons, parameters, "IVDW", 0, valid_ivdw) - + # LBERRY. valid_lberry = valid_input_set.incar.get("LBERRY", False) _check_required_params(reasons, parameters, "LBERRY", False, valid_lberry) - + # LCALCEPS. valid_lcalceps = valid_input_set.incar.get("LCALCEPS", False) _check_required_params(reasons, parameters, "LCALCEPS", False, valid_lcalceps) - + # LCALCPOL. valid_lcalcpol = valid_input_set.incar.get("LCALCPOL", False) _check_required_params(reasons, parameters, "LCALCPOL", False, valid_lcalcpol) - + # LEPSILON. valid_lepsilon = valid_input_set.incar.get("LEPSILON", False) _check_required_params(reasons, parameters, "LEPSILON", False, valid_lepsilon) - + # LHYPERFINE. valid_lhyperfine = valid_input_set.incar.get("LHYPERFINE", False) _check_required_params(reasons, parameters, "LHYPERFINE", False, valid_lhyperfine) - + # LKPOINTS_OPT. valid_lkpoints_opt = valid_input_set.incar.get("LKPOINTS_OPT", False) _check_required_params(reasons, parameters, "LKPOINTS_OPT", False, valid_lkpoints_opt) - + # LKPROJ. valid_lkproj = valid_input_set.incar.get("LKPROJ", False) _check_required_params(reasons, parameters, "LKPROJ", False, valid_lkproj) - + # LMP2LT. valid_lmp2lt = valid_input_set.incar.get("LMP2LT", False) _check_required_params(reasons, parameters, "LMP2LT", False, valid_lmp2lt) - + # LOCPROJ. valid_locproj = valid_input_set.incar.get("LOCPROJ", None) _check_required_params(reasons, parameters, "LOCPROJ", None, valid_locproj) - + # LRPA. valid_lrpa = valid_input_set.incar.get("LRPA", False) _check_required_params(reasons, parameters, "LRPA", False, valid_lrpa) - + # LSMP2LT. valid_lsmp2lt = valid_input_set.incar.get("LSMP2LT", False) _check_required_params(reasons, parameters, "LSMP2LT", False, valid_lsmp2lt) - + # LSPECTRAL. valid_lspectral = valid_input_set.incar.get("LSPECTRAL", False) _check_required_params(reasons, parameters, "LSPECTRAL", False, valid_lspectral) - + # LSUBROT. valid_lsubrot = valid_input_set.incar.get("LSUBROT", False) _check_required_params(reasons, parameters, "LSUBROT", False, valid_lsubrot) - + # ML_LMLFF. valid_ml_lmlff = valid_input_set.incar.get("ML_LMLFF", False) _check_required_params(reasons, parameters, "ML_LMLFF", False, valid_ml_lmlff) - + # WEIMIN. valid_weimin = valid_input_set.incar.get("WEIMIN", 0.001) _check_relative_params(reasons, parameters, "WEIMIN", 0.001, valid_weimin, "less than or equal to") - # EFERMI. Only available for VASP >= 6.4. Should not be set to a numerical value, as this may change the number of electrons. if (vasp_major_version >= 6) and (vasp_minor_version >= 4): # must check EFERMI in the *incar*, as it is saved as a numerical value after VASP guesses it in the vasprun.xml `parameters` @@ -566,40 +702,47 @@ def _check_misc_params(reasons, warnings, parameters, incar, valid_input_set, ca allowed_efermis = ["LEGACY", "MIDGAP"] if cur_efermi not in allowed_efermis: reasons.append(f"INPUT SETTINGS --> EFERMI: should be one of {allowed_efermis}.") - + # IWAVPR. if "IWAVPR" in incar.keys(): reasons.append("INPUT SETTINGS --> VASP discourages users from setting the IWAVPR tag (as of July 2023).") - + # LASPH. valid_lasph = valid_input_set.incar.get("LASPH", True) _check_required_params(reasons, parameters, "LASPH", False, valid_lasph) - + # LCORR. cur_ialgo = parameters.get("IALGO", 38) if cur_ialgo != 58: - _check_required_params(reasons, parameters, "LCORR", True, True) - + _check_required_params(reasons, parameters, "LCORR", True, True) + # LORBIT. cur_ispin = parameters.get("ISPIN", 1) # cur_lorbit = incar.get("LORBIT") if "LORBIT" in incar.keys() else parameters.get("LORBIT", None) if (cur_ispin == 2) and (len(calcs_reversed[0]["output"]["outcar"]["magnetization"]) != structure.num_sites): - reasons.append(f"INPUT SETTINGS --> LORBIT: magnetization values were not written to the OUTCAR. This is usually due to LORBIT being set to None or False for calculations with ISPIN=2.") + reasons.append( + "INPUT SETTINGS --> LORBIT: magnetization values were not written to the OUTCAR. This is usually due to LORBIT being set to None or False for calculations with ISPIN=2." + ) if parameters.get("LORBIT", -np.inf) >= 11 and parameters.get("ISYM", 2) and (vasp_major_version < 6): - warnings.append("For LORBIT >= 11 and ISYM = 2 the partial charge densities are not correctly symmetrized and can result "\ - "in different charges for symmetrically equivalent partial charge densities. This issue is fixed as of version "\ - ">=6. See the vasp wiki page for LORBIT for more details.") - - + warnings.append( + "For LORBIT >= 11 and ISYM = 2 the partial charge densities are not correctly symmetrized and can result " + "in different charges for symmetrically equivalent partial charge densities. This issue is fixed as of version " + ">=6. See the vasp wiki page for LORBIT for more details." + ) + # RWIGS. - if True in (x != -1.0 for x in parameters.get("RWIGS", [-1])): # do not allow RWIGS to be changed, as this affects outputs like the magmom on each atom - reasons.append(f"INPUT SETTINGS --> RWIGS: should not be set. This is because it will change some outputs like the magmom on each site.") - + if True in ( + x != -1.0 for x in parameters.get("RWIGS", [-1]) + ): # do not allow RWIGS to be changed, as this affects outputs like the magmom on each atom + reasons.append( + "INPUT SETTINGS --> RWIGS: should not be set. This is because it will change some outputs like the magmom on each site." + ) + # VCA. - if True in (x != 1.0 for x in parameters.get("VCA", [1])): # do not allow VCA calculations - reasons.append(f"INPUT SETTINGS --> VCA: should not be set") - + if True in (x != 1.0 for x in parameters.get("VCA", [1])): # do not allow VCA calculations + reasons.append("INPUT SETTINGS --> VCA: should not be set") + def _check_precision_params(reasons, parameters, valid_input_set): # PREC. @@ -609,10 +752,12 @@ def _check_precision_params(reasons, parameters, valid_input_set): else: raise ValueError("Validation code check for PREC tag needs to be updated to account for a new input set!") _check_allowed_params(reasons, parameters, "PREC", default_prec, valid_precs) - + # ROPT. Should be better than or equal to default for the PREC level. This only matters if projectors are done in real-space. # Note that if the user sets LREAL = Auto in their Incar, it will show up as "True" in the `parameters` object (hence we use the `parameters` object) - if str(parameters.get("LREAL", "FALSE")).upper() == "TRUE": # this only matters if projectors are done in real-space. + if ( + str(parameters.get("LREAL", "FALSE")).upper() == "TRUE" + ): # this only matters if projectors are done in real-space. cur_prec = parameters.get("PREC", "Normal").upper() if cur_prec == "NORMAL": default_ropt = -5e-4 @@ -624,30 +769,32 @@ def _check_precision_params(reasons, parameters, valid_input_set): default_ropt = -0.002 elif cur_prec == "HIGH": default_ropt = -4e-4 - + cur_ropt = parameters.get("ROPT", [default_ropt]) if True in (x < default_ropt for x in cur_ropt): - reasons.append(f"INPUT SETTINGS --> ROPT: value is set to {cur_ropt}, but should be {default_ropt} or stricter.") - + reasons.append( + f"INPUT SETTINGS --> ROPT: value is set to {cur_ropt}, but should be {default_ropt} or stricter." + ) + def _check_startup_params(reasons, parameters, incar, valid_input_set): # ICHARG. if valid_input_set.incar.get("ICHARG", 2) < 10: - valid_icharg = 9 # should be <10 (SCF calcs) + valid_icharg = 9 # should be <10 (SCF calcs) _check_relative_params(reasons, parameters, "ICHARG", 2, valid_icharg, "less than or equal to") else: valid_icharg = valid_input_set.incar.get("ICHARG") _check_required_params(reasons, parameters, "ICHARG", 2, valid_icharg) - + # INIWAV. default_iniwav = 1 valid_iniwav = valid_input_set.incar.get("INIWAV", default_iniwav) _check_required_params(reasons, parameters, "INIWAV", default_iniwav, valid_iniwav) - + # ISTART. valid_istarts = [0, 1, 2] _check_allowed_params(reasons, parameters, "ISTART", 0, valid_istarts) - + def _check_symmetry_params(reasons, parameters, valid_input_set): # ISYM. @@ -655,26 +802,34 @@ def _check_symmetry_params(reasons, parameters, valid_input_set): # allow ISYM as good or better than what is specified in the valid_input_set. if "ISYM" in valid_input_set.incar.keys(): if valid_input_set.incar.get("ISYM") == 3: - valid_isyms = [-1,0,2,3] + valid_isyms = [-1, 0, 2, 3] elif valid_input_set.incar.get("ISYM") == 2: - valid_isyms = [-1,0,2] + valid_isyms = [-1, 0, 2] elif valid_input_set.incar.get("ISYM") == 0: - valid_isyms = [-1,0] + valid_isyms = [-1, 0] elif valid_input_set.incar.get("ISYM") == -1: valid_isyms = [-1] - else: # otherwise let ISYM = -1, 0, or 2 - valid_isyms = [-1,0,2] + else: # otherwise let ISYM = -1, 0, or 2 + valid_isyms = [-1, 0, 2] _check_allowed_params(reasons, parameters, "ISYM", default_isym, valid_isyms) - - # SYMPREC. + + # SYMPREC. default_symprec = 1e-5 - valid_symprec = 1e-3 # custodian will set SYMPREC to a maximum of 1e-3 (as of August 2023) + valid_symprec = 1e-3 # custodian will set SYMPREC to a maximum of 1e-3 (as of August 2023) extra_comments_for_symprec = "If you believe that this SYMPREC value is necessary (perhaps this calculation has a very large cell), please create a GitHub issue and we will consider to admit your calculations." - _check_relative_params(reasons, parameters, "SYMPREC", default_symprec, valid_symprec, "less than or equal to", extra_comments_upon_failure=extra_comments_for_symprec) + _check_relative_params( + reasons, + parameters, + "SYMPREC", + default_symprec, + valid_symprec, + "less than or equal to", + extra_comments_upon_failure=extra_comments_for_symprec, + ) def _check_u_params(reasons, incar, parameters, valid_input_set): - if parameters.get("LDAU", False) == True: + if parameters.get("LDAU", False): valid_ldauu = valid_input_set.incar.get("LDAUU", []) cur_ldauu = incar.get("LDAUU", []) if cur_ldauu != valid_ldauu: @@ -700,24 +855,34 @@ def _check_u_params(reasons, incar, parameters, valid_input_set): def _check_write_params(reasons, parameters, valid_input_set): # NWRITE. - valid_nwrite = valid_input_set.incar.get("NWRITE", 2) # expect this to almost always default to 2 + valid_nwrite = valid_input_set.incar.get("NWRITE", 2) # expect this to almost always default to 2 extra_comments_for_nwrite = f"NWRITE < {valid_nwrite} does not output all of the results we need." - _check_relative_params(reasons, parameters, "NWRITE", 2, valid_nwrite, "greater than or equal to", extra_comments_upon_failure=extra_comments_for_nwrite) - + _check_relative_params( + reasons, + parameters, + "NWRITE", + 2, + valid_nwrite, + "greater than or equal to", + extra_comments_upon_failure=extra_comments_for_nwrite, + ) + # LEFG. valid_lefg = valid_input_set.incar.get("LEFG", False) - _check_required_params(reasons, parameters, "LEFG", False, valid_lefg) - + _check_required_params(reasons, parameters, "LEFG", False, valid_lefg) + # LOPTICS. valid_loptics = valid_input_set.incar.get("LOPTICS", False) _check_required_params(reasons, parameters, "LOPTICS", False, valid_loptics) - -def _check_required_params(reasons, parameters, input_tag, default_val, required_val, allow_close=False, extra_comments_upon_failure=""): + +def _check_required_params( + reasons, parameters, input_tag, default_val, required_val, allow_close=False, extra_comments_upon_failure="" +): cur_val = parameters.get(input_tag, default_val) if allow_close: - if not np.isclose(cur_val, required_val, rtol=1e-05, atol=1e-05): # need to be careful of this + if not np.isclose(cur_val, required_val, rtol=1e-05, atol=1e-05): # need to be careful of this msg = f"INPUT SETTINGS --> {input_tag}: set to {cur_val}, but should be {required_val}." if extra_comments_upon_failure != "": msg += " " + extra_comments_upon_failure @@ -732,7 +897,7 @@ def _check_required_params(reasons, parameters, input_tag, default_val, required def _check_allowed_params(reasons, parameters, input_tag, default_val, allowed_vals, extra_comments_upon_failure=""): # convert to (uppercase) string if allowed vals are also strings - if any(isinstance(item,str) for item in allowed_vals): + if any(isinstance(item, str) for item in allowed_vals): cur_val = str(parameters.get(input_tag, default_val)).upper() else: cur_val = parameters.get(input_tag, default_val) @@ -744,21 +909,27 @@ def _check_allowed_params(reasons, parameters, input_tag, default_val, allowed_v reasons.append(msg) -def _check_relative_params(reasons, parameters, input_tag, default_val, valid_val, should_be, extra_comments_upon_failure=""): +def _check_relative_params( + reasons, parameters, input_tag, default_val, valid_val, should_be, extra_comments_upon_failure="" +): cur_val = parameters.get(input_tag, default_val) - if input_tag == "ENMAX": # change output message for ENMAX / ENCUT to be more clear to users (as they set ENCUT, but this is stored as ENMAX) + if ( + input_tag == "ENMAX" + ): # change output message for ENMAX / ENCUT to be more clear to users (as they set ENCUT, but this is stored as ENMAX) input_tag = "ENCUT" if should_be == "less than or equal to": - if cur_val > valid_val: # check for greater than (opposite of <=) + if cur_val > valid_val: # check for greater than (opposite of <=) msg = f"INPUT SETTINGS --> {input_tag}: set to {cur_val}, but should be less than or equal to {valid_val}." if extra_comments_upon_failure != "": msg += " " + extra_comments_upon_failure reasons.append(msg) if should_be == "greater than or equal to": - if cur_val < valid_val: # check for less than (opposite of >=) - msg = f"INPUT SETTINGS --> {input_tag}: set to {cur_val}, but should be greater than or equal to {valid_val}." + if cur_val < valid_val: # check for less than (opposite of >=) + msg = ( + f"INPUT SETTINGS --> {input_tag}: set to {cur_val}, but should be greater than or equal to {valid_val}." + ) if extra_comments_upon_failure != "": msg += " " + extra_comments_upon_failure - reasons.append(msg) \ No newline at end of file + reasons.append(msg) diff --git a/pymatgen/io/validation/check_kpoints_kspacing.py b/pymatgen/io/validation/check_kpoints_kspacing.py index f879c96..651cfae 100644 --- a/pymatgen/io/validation/check_kpoints_kspacing.py +++ b/pymatgen/io/validation/check_kpoints_kspacing.py @@ -1,66 +1,71 @@ import numpy as np + def _check_kpoints_kspacing( - reasons, + reasons, task_type, parameters, - kpts_tolerance, - valid_input_set, - calcs_reversed, - allow_explicit_kpoint_mesh, + kpts_tolerance, + valid_input_set, + calcs_reversed, + allow_explicit_kpoint_mesh, allow_kpoint_shifts, - structure + structure, ): - # Check number of kpoints used - valid_num_kpts = _get_valid_num_kpts(valid_input_set, structure) - valid_num_kpts = int(np.floor(valid_num_kpts * kpts_tolerance)) - cur_kpoints_obj = calcs_reversed[0]['input']['kpoints'] + valid_num_kpts = _get_valid_num_kpts(valid_input_set, structure) + valid_num_kpts = int(np.floor(valid_num_kpts * kpts_tolerance)) + cur_kpoints_obj = calcs_reversed[0]["input"]["kpoints"] cur_num_kpts = max( - cur_kpoints_obj.get("nkpoints", 0), - np.prod(cur_kpoints_obj.get("kpoints")), - len(cur_kpoints_obj.get("kpoints")) + cur_kpoints_obj.get("nkpoints", 0), np.prod(cur_kpoints_obj.get("kpoints")), len(cur_kpoints_obj.get("kpoints")) ) if cur_num_kpts < valid_num_kpts: - reasons.append(f"INPUT SETTINGS --> KPOINTS or KSPACING: {cur_num_kpts} kpoints were used, but it should have been at least {valid_num_kpts}.") - + reasons.append( + f"INPUT SETTINGS --> KPOINTS or KSPACING: {cur_num_kpts} kpoints were used, but it should have been at least {valid_num_kpts}." + ) + # check for valid kpoint mesh (which depends on symmetry of the structure) cur_kpoint_style = cur_kpoints_obj.get("generation_style").lower() is_hexagonal = structure.lattice.is_hexagonal() - is_face_centered = (structure.get_space_group_info()[0][0] == "F") - monkhorst_mesh_is_invalid = (is_hexagonal or is_face_centered) + is_face_centered = structure.get_space_group_info()[0][0] == "F" + monkhorst_mesh_is_invalid = is_hexagonal or is_face_centered if cur_kpoint_style == "monkhorst" and monkhorst_mesh_is_invalid: - if all([x%2 == 1 for x in cur_kpoints_obj.get("kpoints")[0]]): # allow monkhorst with all odd number of subdivs. + if all( + [x % 2 == 1 for x in cur_kpoints_obj.get("kpoints")[0]] + ): # allow monkhorst with all odd number of subdivs. pass else: - reasons.append(f"INPUT SETTINGS --> KPOINTS or KGAMMA: monkhorst-pack kpoint mesh was used with only even subdivisions, but the structure has symmetry that is incompatible with monkhorst-pack meshes.") - + reasons.append( + "INPUT SETTINGS --> KPOINTS or KGAMMA: monkhorst-pack kpoint mesh was used with only even subdivisions, but the structure has symmetry that is incompatible with monkhorst-pack meshes." + ) # Check for explicit kpoint meshes if not allow_explicit_kpoint_mesh: - if len(cur_kpoints_obj['kpoints']) > 1: - reasons.append(f"INPUT SETTINGS --> KPOINTS: explicitly defining the kpoint mesh is not currently allowed. Automatic kpoint generation is required.") + if len(cur_kpoints_obj["kpoints"]) > 1: + reasons.append( + "INPUT SETTINGS --> KPOINTS: explicitly defining the kpoint mesh is not currently allowed. Automatic kpoint generation is required." + ) # Check for user shifts if not allow_kpoint_shifts: - if any(shift_val != 0 for shift_val in cur_kpoints_obj['usershift']): - reasons.append(f"INPUT SETTINGS --> KPOINTS: shifting the kpoint mesh is not currently allowed.") - + if any(shift_val != 0 for shift_val in cur_kpoints_obj["usershift"]): + reasons.append("INPUT SETTINGS --> KPOINTS: shifting the kpoint mesh is not currently allowed.") + return reasons def _get_valid_num_kpts(valid_input_set, structure): # If MP input set specifies KSPACING in the INCAR - if ("KSPACING" in valid_input_set.incar.keys()) and (valid_input_set.kpoints == None): + if ("KSPACING" in valid_input_set.incar.keys()) and (valid_input_set.kpoints is None): valid_kspacing = valid_input_set.incar.get("KSPACING", 0.5) latt_cur_anorm = structure.lattice.abc # number of kpoints along each of the three lattice vectors - n_1 = max(1, np.ceil( (1/latt_cur_anorm[0]) * 2 * np.pi / valid_kspacing) ) - n_2 = max(1, np.ceil( (1/latt_cur_anorm[1]) * 2 * np.pi / valid_kspacing) ) - n_3 = max(1, np.ceil( (1/latt_cur_anorm[2]) * 2 * np.pi / valid_kspacing) ) + n_1 = max(1, np.ceil((1 / latt_cur_anorm[0]) * 2 * np.pi / valid_kspacing)) + n_2 = max(1, np.ceil((1 / latt_cur_anorm[1]) * 2 * np.pi / valid_kspacing)) + n_3 = max(1, np.ceil((1 / latt_cur_anorm[2]) * 2 * np.pi / valid_kspacing)) valid_num_kpts = n_1 * n_2 * n_3 # If MP input set specifies a KPOINTS file else: valid_num_kpts = valid_input_set.kpoints.num_kpts or np.prod(valid_input_set.kpoints.kpts[0]) - - return int(valid_num_kpts) \ No newline at end of file + + return int(valid_num_kpts) diff --git a/pymatgen/io/validation/compare_to_MP_ehull.py b/pymatgen/io/validation/compare_to_MP_ehull.py index cdbb718..bc74f0e 100644 --- a/pymatgen/io/validation/compare_to_MP_ehull.py +++ b/pymatgen/io/validation/compare_to_MP_ehull.py @@ -1,38 +1,38 @@ from mp_api.client import MPRester -from pymatgen.analysis.phase_diagram import PhaseDiagram, PDEntry +from pymatgen.analysis.phase_diagram import PhaseDiagram from pymatgen.entries.mixing_scheme import MaterialsProjectDFTMixingScheme from pymatgen.entries.computed_entries import ComputedStructureEntry -def compare_to_MP_ehull(mp_api_key = None, task_doc = None): - - if mp_api_key == None: +def compare_to_MP_ehull(mp_api_key=None, task_doc=None): + if mp_api_key is None: raise ValueError("Please input your mp API key") # get ComputedStructureEntry from taskdoc. # The `taskdoc.structure_entry()` does not work directly, as the `entry` field in the taskdoc is None - entry = task_doc.get_entry(task_doc.calcs_reversed, task_id = "-1") + entry = task_doc.get_entry(task_doc.calcs_reversed, task_id="-1") entry_dict = entry.as_dict() - entry_dict['structure'] = task_doc.output.structure + entry_dict["structure"] = task_doc.output.structure cur_structure_entry = ComputedStructureEntry.from_dict(entry_dict) elements = task_doc.output.structure.composition.to_reduced_dict.keys() with MPRester(mp_api_key) as mpr: - # Obtain GGA, GGA+U, and r2SCAN ComputedStructureEntry objects - entries = mpr.get_entries_in_chemsys(elements=elements, - compatible_only = True, - additional_criteria={"thermo_types": ["GGA_GGA+U", "R2SCAN"], "is_stable": True}) - + entries = mpr.get_entries_in_chemsys( + elements=elements, + compatible_only=True, + additional_criteria={"thermo_types": ["GGA_GGA+U", "R2SCAN"], "is_stable": True}, + ) + entries.append(cur_structure_entry) - + # Apply corrections locally with the mixing scheme scheme = MaterialsProjectDFTMixingScheme() corrected_entries = scheme.process_entries(entries) - + # Construct phase diagram pd = PhaseDiagram(corrected_entries) cur_corrected_structure_entry = [entry for entry in corrected_entries if entry.entry_id == "-1"][0] e_above_hull = pd.get_e_above_hull(cur_corrected_structure_entry) - return e_above_hull \ No newline at end of file + return e_above_hull diff --git a/pymatgen/io/validation/validation.py b/pymatgen/io/validation/validation.py index d821665..e50c831 100644 --- a/pymatgen/io/validation/validation.py +++ b/pymatgen/io/validation/validation.py @@ -1,34 +1,37 @@ from datetime import datetime -from typing import Dict, List, Union, Optional +from typing import Dict, List, Union import numpy as np -from pydantic import Field, PyObject +from pkg_resources import resource_filename +from pydantic import Field +from pydantic.types import ImportString # replacement for PyObject from pathlib import Path from monty.os.path import zpath +from monty.serialization import loadfn from pymatgen.io.vasp.sets import VaspInputSet -from pymatgen.io.vasp.sets import MPMetalRelaxSet ######################################################## + +# TODO: why MPMetalRelaxSet +from pymatgen.io.vasp.sets import MPMetalRelaxSet from pymatgen.io.vasp.inputs import Potcar from emmet.core.tasks import TaskDoc from emmet.core.settings import EmmetSettings from emmet.core.base import EmmetBaseModel from emmet.core.mpid import MPID -from emmet.core.utils import DocEnum from emmet.core.vasp.task_valid import TaskDocument from emmet.core.vasp.calc_types.enums import CalcType, TaskType from emmet.core.vasp.calc_types import RunType, calc_type, run_type, task_type -from emmet.core.vasp.validation.check_incar import _check_incar -from emmet.core.vasp.validation.check_common_errors import _check_common_errors -from emmet.core.vasp.validation.check_kpoints_kspacing import _check_kpoints_kspacing +from pymatgen.io.validation.check_incar import _check_incar +from pymatgen.io.validation.check_common_errors import _check_common_errors +from pymatgen.io.validation.check_kpoints_kspacing import _check_kpoints_kspacing SETTINGS = EmmetSettings() +_pmg_potcar_summary_stats = loadfn(resource_filename("pymatgen.io.vasp", "potcar_summary_stats.json.gz")) - -## TODO: check for surface/slab calculations. Especially necessary for external calcs. -## TODO: implement check to make sure calcs are within some amount (e.g. 250 meV) of the convex hull in the MPDB - +# TODO: check for surface/slab calculations. Especially necessary for external calcs. +# TODO: implement check to make sure calcs are within some amount (e.g. 250 meV) of the convex hull in the MPDB class ValidationDoc(EmmetBaseModel): @@ -45,12 +48,9 @@ class ValidationDoc(EmmetBaseModel): default_factory=datetime.utcnow, ) - reasons: List[str] = Field( - None, description="List of deprecation tags detailing why this task isn't valid" - ) - - warnings: List[str] = Field( - [], description="List of potential warnings about this calculation") + reasons: List[str] = Field(None, description="List of deprecation tags detailing why this task isn't valid") + + warnings: List[str] = Field([], description="List of potential warnings about this calculation") # data: Dict = Field( # description="Dictionary of data used to perform validation." @@ -65,11 +65,11 @@ def from_task_doc( cls, task_doc: TaskDocument, kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE, - kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE, - input_sets: Dict[str, PyObject] = SETTINGS.VASP_DEFAULT_INPUT_SETS, - LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, ### Unused + kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE, # TODO Usused currently,needed? + input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS, + LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, # TODO Usused currently,needed? max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT, - potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None, + potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats, ) -> "ValidationDoc": """ Determines if a calculation is valid based on expected input parameters from a pymatgen inputset @@ -83,149 +83,155 @@ def from_task_doc( LDAU_fields: LDAU fields to check for consistency max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the initial equillibriation period. Note this is in eV per atom. - potcar_hashes: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value. - """ - + potcar_summary_stats: Dictionary of potcar summary data. Mapping is calculation type -> potcar symbol -> summary data. + """ + bandgap = task_doc.output.bandgap calcs_reversed = task_doc.calcs_reversed - calcs_reversed = [calc.dict() for calc in calcs_reversed] # convert to dictionary to use built-in `.get()` method ################################################### - - parameters = task_doc.input.parameters # used for most input tag checks (as this is more reliable than examining the INCAR file directly in most cases) - incar = calcs_reversed[0]['input']['incar'] # used for INCAR tag checks where you need to look at the actual INCAR (semi-rare) - if task_doc.orig_inputs == None: + calcs_reversed = [ + calc.dict() for calc in calcs_reversed + ] # convert to dictionary to use built-in `.get()` method ################################################### + + parameters = ( + task_doc.input.parameters + ) # used for most input tag checks (as this is more reliable than examining the INCAR file directly in most cases) + incar = calcs_reversed[0]["input"][ + "incar" + ] # used for INCAR tag checks where you need to look at the actual INCAR (semi-rare) + if task_doc.orig_inputs is None: orig_inputs = {} else: orig_inputs = task_doc.orig_inputs.dict() - if orig_inputs["kpoints"] != None: + if orig_inputs["kpoints"] is not None: orig_inputs["kpoints"] = orig_inputs["kpoints"].as_dict() - - - ionic_steps = calcs_reversed[0]['output']['ionic_steps'] + + ionic_steps = calcs_reversed[0]["output"]["ionic_steps"] nionic_steps = len(ionic_steps) - - try: - potcar = Potcar.from_file(zpath("POTCAR")) ############################################################################# - except: + + potcar_path = zpath("POTCAR") + # for potcar_spec in task_doc.calcs_reversed[0].input.potcar_spec: + if Path.is_file(Path(potcar_path)): + potcar = Potcar.from_file(potcar_path) + else: potcar = None - - + calc_type = _get_calc_type(calcs_reversed, orig_inputs) task_type = _get_task_type(calcs_reversed, orig_inputs) - run_type = _get_run_type(calcs_reversed) - - num_ionic_steps_to_avg_drift_over = 3 ########################################################## maybe move to settings - fft_grid_tolerance = 0.9 ####################################################################### maybe move to settings - allow_kpoint_shifts = False #################################################################### maybe move to settings - allow_explicit_kpoint_mesh = "auto" # or True or False ######################################### maybe move to settings + run_type = _get_run_type(calcs_reversed) + + # maybe move following to settings --> + num_ionic_steps_to_avg_drift_over = 3 + fft_grid_tolerance = 0.9 + allow_kpoint_shifts = False + allow_explicit_kpoint_mesh = "auto" # could also be True or False + # <-- + if allow_explicit_kpoint_mesh == "auto": if "NSCF" in calc_type.name: allow_explicit_kpoint_mesh = True else: allow_explicit_kpoint_mesh = False - - chemsys = task_doc.chemsys - - vasp_version = calcs_reversed[0]['vasp_version'] + + task_doc.chemsys + + vasp_version = calcs_reversed[0]["vasp_version"] vasp_version = vasp_version.split(".") vasp_version = vasp_version[0] + "." + vasp_version[1] + "." + vasp_version[2] vasp_major_version = int(vasp_version.split(".")[0]) vasp_minor_version = int(vasp_version.split(".")[1]) vasp_patch_version = int(vasp_version.split(".")[2]) - if calcs_reversed[0].get("input", {}).get("structure", None): structure = calcs_reversed[0]["input"]["structure"] else: structure = task_doc.input.structure or task_doc.output.structure - reasons = [] # data = {} # type: ignore warnings: List[str] = [] - - + if run_type not in ["GGA", "GGA+U", "PBE", "PBE+U", "R2SCAN"]: reasons.append(f"FUNCTIONAL --> Functional {run_type} not currently accepted.") - + try: - valid_input_set = _get_input_set( - run_type, task_type, calc_type, structure, input_sets, bandgap - ) + valid_input_set = _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandgap) except Exception as e: - reasons.append("NO MATCHING MP INPUT SET --> no matching MP input set was found. If you believe this to be a mistake, please create a GitHub issue.") + reasons.append( + "NO MATCHING MP INPUT SET --> no matching MP input set was found. If you believe this to be a mistake, please create a GitHub issue." + ) valid_input_set = None print(f"Error while finding MP input set: {e}.") - - - if parameters == {} or parameters == None: - reasons.append("CAN NOT PROPERLY PARSE CALCULATION --> Issue parsing input parameters from the vasprun.xml file.") + if parameters == {} or parameters is None: + reasons.append( + "CAN NOT PROPERLY PARSE CALCULATION --> Issue parsing input parameters from the vasprun.xml file." + ) elif valid_input_set: + # Get subset of POTCAR summary stats to validate calculation + allowed_potcar_stats = {} + for valid_potcar in valid_input_set.potcar: + titel_no_spc = valid_potcar.TITEL.replace(" ", "") + allowed_potcar_stats[titel_no_spc] = potcar_summary_stats[ + valid_input_set._config_dict["POTCAR_FUNCTIONAL"] + ][titel_no_spc].copy() + + if potcar_summary_stats: + _check_potcars(reasons, warnings, potcar, calc_type, allowed_potcar_stats) - if potcar_hashes: - _check_potcars(reasons, warnings, calcs_reversed, calc_type, potcar_hashes) - - ## TODO: check for surface/slab calculations!!!!!! + # TODO: check for surface/slab calculations!!!!!! reasons = _check_vasp_version( - reasons, - vasp_version, - vasp_major_version, - vasp_minor_version, - vasp_patch_version, - parameters, - incar - ) - + reasons, vasp_version, vasp_major_version, vasp_minor_version, vasp_patch_version, parameters, incar + ) + reasons = _check_common_errors( - reasons, - warnings, - task_doc, + reasons, + warnings, + task_doc, calcs_reversed, - parameters, - incar, - structure, - max_allowed_scf_gradient, + parameters, + incar, + structure, + max_allowed_scf_gradient, ionic_steps, num_ionic_steps_to_avg_drift_over, ) - + reasons = _check_kpoints_kspacing( reasons, task_type, parameters, - kpts_tolerance, - valid_input_set, - calcs_reversed, - allow_explicit_kpoint_mesh, + kpts_tolerance, + valid_input_set, + calcs_reversed, + allow_explicit_kpoint_mesh, allow_kpoint_shifts, structure, ) - + reasons = _check_incar( reasons, warnings, - valid_input_set, - structure, - task_doc, + valid_input_set, + structure, + task_doc, calcs_reversed, ionic_steps, - nionic_steps, - parameters, - incar, - potcar, - vasp_major_version, - vasp_minor_version, + nionic_steps, + parameters, + incar, + potcar, + vasp_major_version, + vasp_minor_version, vasp_patch_version, task_type, fft_grid_tolerance, ) - # Unsure about what might be a better way to do this... - task_id = task_doc.task_id if task_doc.task_id != None else -1 - + task_id = task_doc.task_id if (task_doc.task_id is not None) else -1 + validation_doc = ValidationDoc( task_id=task_id, calc_type=calc_type, @@ -245,10 +251,10 @@ def from_directory( dir_name: Union[Path, str], kpts_tolerance: float = SETTINGS.VASP_KPTS_TOLERANCE, kspacing_tolerance: float = SETTINGS.VASP_KSPACING_TOLERANCE, - input_sets: Dict[str, PyObject] = SETTINGS.VASP_DEFAULT_INPUT_SETS, - LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, ### Unused + input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS, + LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS, # TODO Unused max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT, - potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None, + potcar_summary_stats: Dict[str, ImportString] = _pmg_potcar_summary_stats, ) -> "ValidationDoc": """ Determines if a calculation is valid based on expected input parameters from a pymatgen inputset @@ -262,21 +268,22 @@ def from_directory( LDAU_fields: LDAU fields to check for consistency max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the initial equillibriation period. Note this is in eV per atom. - potcar_hashes: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value. + potcar_summary_stats: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value. """ - try: + try: task_doc = TaskDoc.from_directory( - dir_name = dir_name, - volumetric_files = (), + dir_name=dir_name, + volumetric_files=(), ) + validation_doc = ValidationDoc.from_task_doc( - task_doc = task_doc, - kpts_tolerance = kpts_tolerance, - kspacing_tolerance = kspacing_tolerance, - input_sets = input_sets, - LDAU_fields = LDAU_fields, ### Unused - max_allowed_scf_gradient = max_allowed_scf_gradient, - potcar_hashes = potcar_hashes, + task_doc=task_doc, + kpts_tolerance=kpts_tolerance, + kspacing_tolerance=kspacing_tolerance, + input_sets=input_sets, + LDAU_fields=LDAU_fields, # TODO Unused + max_allowed_scf_gradient=max_allowed_scf_gradient, + potcar_summary_stats=potcar_summary_stats, ) return validation_doc @@ -285,23 +292,22 @@ def from_directory( if "no vasp files found" in str(e).lower(): raise Exception(f"NO CALCULATION FOUND --> {dir_name} is not a VASP calculation directory.") else: - raise Exception(f"CAN NOT PARSE CALCULATION --> Issue parsing results. This often means your calculation did not complete. The error stack reads: \n {e}") - - + raise Exception( + f"CAN NOT PARSE CALCULATION --> Issue parsing results. This often means your calculation did not complete. The error stack reads: \n {e}" + ) def _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandgap): - - ## TODO: For every input set key in emmet.core.settings.VASP_DEFAULT_INPUT_SETS, - ## with "GGA" in it, create an equivalent dictionary item with "PBE" instead. - ## In the mean time, the below workaround is used. + # TODO: For every input set key in emmet.core.settings.VASP_DEFAULT_INPUT_SETS, + # with "GGA" in it, create an equivalent dictionary item with "PBE" instead. + # In the mean time, the below workaround is used. gga_pbe_structure_opt_calc_types = [ - CalcType.GGA_Structure_Optimization, - CalcType.GGA_U_Structure_Optimization, - CalcType.PBE_Structure_Optimization, + CalcType.GGA_Structure_Optimization, + CalcType.GGA_U_Structure_Optimization, + CalcType.PBE_Structure_Optimization, CalcType.PBE_U_Structure_Optimization, ] - + # Ensure inputsets get proper additional input values if "SCAN" in run_type.value: valid_input_set: VaspInputSet = input_sets[str(calc_type)](structure, bandgap=bandgap) # type: ignore @@ -310,14 +316,16 @@ def _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandga valid_input_set = input_sets[str(calc_type)](structure, mode="uniform") elif task_type == TaskType.NSCF_Line: valid_input_set = input_sets[str(calc_type)](structure, mode="line") - + elif "dielectric" in str(task_type).lower(): - valid_input_set = input_sets[str(calc_type)](structure, lepsilon = True) + valid_input_set = input_sets[str(calc_type)](structure, lepsilon=True) elif task_type == TaskType.NMR_Electric_Field_Gradient: valid_input_set = input_sets[str(calc_type)](structure, mode="efg") elif task_type == TaskType.NMR_Nuclear_Shielding: - valid_input_set = input_sets[str(calc_type)](structure, mode="cs") # Is this correct? Someone more knowledgeable either fix this or remove this comment if it is correct please! + valid_input_set = input_sets[str(calc_type)]( + structure, mode="cs" + ) # Is this correct? Someone more knowledgeable either fix this or remove this comment if it is correct please! elif calc_type in gga_pbe_structure_opt_calc_types: if bandgap == 0: @@ -327,29 +335,58 @@ def _get_input_set(run_type, task_type, calc_type, structure, input_sets, bandga else: valid_input_set = input_sets[str(calc_type)](structure) - + return valid_input_set -def _check_potcars(reasons, warnings, calcs_reversed, calc_type, valid_potcar_hashes): +def _check_potcars( + reasons, + warnings, + potcars: Potcar, + calc_type, + valid_potcar_summary_stats: Dict[str, ImportString], + data_match_tol: float = 1e-6, +): """ Checks to make sure the POTCAR is equivalent to the correct POTCAR from the pymatgen input set. """ - - ### TODO: Update potcar checks. Whether using hashing or not! - try: - potcar_details = calcs_reversed[0]["input"]["potcar_spec"] + # TODO: Update potcar checks. Whether using hashing or not! + # AK - added summary stats check, removed hash check - incorrect_potcars = [] - for entry in potcar_details: - symbol = entry["titel"].split(" ")[1] - hash = valid_potcar_hashes[str(calc_type)].get(symbol, None) - - if not hash or hash != entry["hash"]: - incorrect_potcars.append(symbol) + if potcars is None: + reasons.append( + "PSEUDOPOTENTIALS --> Missing POTCAR files. " + "Alternatively, our potcar checker may have an issue--please create a GitHub issue if you " + "know your POTCAR exists and can be read by Pymatgen." + ) + return + try: + incorrect_potcars = [] + for potcar in potcars: + reference_summary_stats = valid_potcar_summary_stats.get(potcar.TITEL.replace(" ", ""), []) + + key_match = False + data_match = False + for ref_psp in reference_summary_stats: + key_match = all( + set(ref_psp["keywords"][key]) == set(potcar._summary_stats["keywords"][key]) # type: ignore + for key in ["header", "data"] + ) + + data_diff = [ + abs(ref_psp["stats"][key][stat] - potcar._summary_stats["stats"][key][stat]) # type: ignore + for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] + for key in ["header", "data"] + ] + data_match = all(np.array(data_diff) < data_match_tol) + if key_match and data_match: + break + + if (not key_match) or (not data_match): + incorrect_potcars.append(potcar.symbol) if len(incorrect_potcars) > 0: # format error string @@ -359,27 +396,36 @@ def _check_potcars(reasons, warnings, calcs_reversed, calc_type, valid_potcar_ha elif len(incorrect_potcars) >= 3: incorrect_potcars = ", ".join(incorrect_potcars[:-1]) + "," + f" and {incorrect_potcars[-1]}" - reasons.append(f"PSEUDOPOTENTIALS --> Incorrect POTCAR files were used for {incorrect_potcars}. " + reasons.append( + f"PSEUDOPOTENTIALS --> Incorrect POTCAR files were used for {incorrect_potcars}. " "Alternatively, our potcar checker may have an issue--please create a GitHub issue if you " "believe the POTCARs used are correct." ) except KeyError: # Assume it is an old calculation without potcar_spec data and treat it as failing the POTCAR check - reasons.append("Old version of Emmet --> potcar_spec is not saved in TaskDoc and cannot be validated. Hence, it is marked as invalid") + reasons.append( + "Old version of Emmet --> potcar_spec is not saved in TaskDoc and cannot be validated. Hence, it is marked as invalid" + ) -def _check_vasp_version(reasons, vasp_version, vasp_major_version, vasp_minor_version, vasp_patch_version, parameters, incar): +def _check_vasp_version( + reasons, vasp_version, vasp_major_version, vasp_minor_version, vasp_patch_version, parameters, incar +): if vasp_major_version == 6: pass elif (vasp_major_version == 5) and ("METAGGA" in incar.keys()) and (parameters.get("ISPIN", 1) == 2): - reasons.append("POTENTIAL BUG --> We believe that there may be a bug with spin-polarized calculations for METAGGAs " \ - "in some versions of VASP 5. Please create a new GitHub issue if you believe this " \ - "is not the case and we will consider changing this check!") + reasons.append( + "POTENTIAL BUG --> We believe that there may be a bug with spin-polarized calculations for METAGGAs " + "in some versions of VASP 5. Please create a new GitHub issue if you believe this " + "is not the case and we will consider changing this check!" + ) elif (vasp_major_version == 5) and (vasp_minor_version == 4) and (vasp_patch_version == 4): pass else: - reasons.append(f"VASP VERSION --> This calculation is using VASP version {vasp_version}, but we only allow versions 5.4.4 and >=6.0.0 (as of July 2023).") + reasons.append( + f"VASP VERSION --> This calculation is using VASP version {vasp_version}, but we only allow versions 5.4.4 and >=6.0.0 (as of July 2023)." + ) return reasons @@ -390,20 +436,12 @@ def _get_run_type(calcs_reversed) -> RunType: def _get_task_type(calcs_reversed, orig_inputs): - inputs = ( - calcs_reversed[0].get("input", {}) - if len(calcs_reversed) > 0 - else orig_inputs - ) + inputs = calcs_reversed[0].get("input", {}) if len(calcs_reversed) > 0 else orig_inputs return task_type(inputs) def _get_calc_type(calcs_reversed, orig_inputs): - inputs = ( - calcs_reversed[0].get("input", {}) - if len(calcs_reversed) > 0 - else orig_inputs - ) + inputs = calcs_reversed[0].get("input", {}) if len(calcs_reversed) > 0 else orig_inputs params = calcs_reversed[0].get("input", {}).get("parameters", {}) incar = calcs_reversed[0].get("input", {}).get("incar", {}) diff --git a/setup.py b/setup.py index d363342..957ab3b 100644 --- a/setup.py +++ b/setup.py @@ -12,18 +12,18 @@ setup( - name="pymatgen-analysis-myaddon", - packages=find_namespace_packages(include=["pymatgen.analysis.*"]), + name="pymatgen-io-validation", + packages=find_namespace_packages(include=["pymatgen.io.*"]), version="0.0.1", install_requires=["pymatgen>=2022.0.3"], extras_require={}, package_data={}, - author="materials virtual lab", - author_email="ongsp@eng.ucsd.edu", - maintainer="materials virtual lab", - url="https://github.com/materialsproject/pymatgen-addon-template", + author="Matthew Kuner, Janosh Riebesell, Jason Munro, Aaron Kaplan", + author_email="mckuner@lbl.gov", + maintainer="Matthew Kuner", + url="https://github.com/matthewkuner/pymatgen-io-validation", license="BSD", - description="A template for creating add-ons for pymatgen.", + description="A comprehensive I/O validator for electronic structure calculations", long_description=desc, keywords=["pymatgen"], classifiers=[