diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f67b663..1ec5b0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,40 +6,40 @@ ci: repos: - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 23.3.0 hooks: - id: black-jupyter - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/asottile/pyupgrade - rev: v3.2.0 + rev: v3.4.0 hooks: - id: pyupgrade args: [--py38-plus] - repo: https://github.com/PyCQA/autoflake - rev: v1.7.7 + rev: v2.1.1 hooks: - id: autoflake - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.982 + rev: v1.3.0 hooks: - id: mypy - repo: https://github.com/nbQA-dev/nbQA - rev: 1.5.3 + rev: 1.7.0 hooks: - id: nbqa-flake8 args: [--ignore=E402] diff --git a/README.md b/README.md index 5de8771..185ae76 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,7 @@ # M3GNet -M3GNet is a new materials graph neural network architecture that incorporates 3-body interactions. A key difference -with prior materials graph implementations such as [MEGNet](https://github.com/materialsvirtuallab/megnet) is the -addition of the coordinates for atoms and the 3×3 lattice matrix in crystals, which are necessary for obtaining -tensorial quantities such as forces and stresses via auto-differentiation. +[M3GNet](https://www.nature.com/articles/s43588-022-00349-3) is a new materials graph neural network architecture that incorporates 3-body interactions. A key difference with prior materials graph implementations such as [MEGNet](https://github.com/materialsvirtuallab/megnet) is the addition of the coordinates for atoms and the 3×3 lattice matrix in crystals, which are necessary for obtaining tensorial quantities such as forces and stresses via auto-differentiation. As a framework, M3GNet has diverse applications, including: @@ -325,6 +322,56 @@ VASP calculations (default unit is kBar) should be multiplied by -0.1 to work di We use validation dataset to select the stopping epoch number. The dataset has similar format as the training dataset. +If you want to use the offical MPF dataset shared above, here are some code examples that you can follow to load the dataset smoothly and train your own model. + +First, load the MPF dataset consisting of block_0 and block_1 + +```python +import pickle as pk +import pandas as pd +import pymatgen + +print('loading the MPF dataset 2021') +with open('/yourpath/block_0.p', 'rb') as f: + data = pk.load(f) + +with open('/yourpath/block_1.p', 'rb') as f: + data2 = pk.load(f) +print('MPF dataset 2021 loaded') +data.update(data2) +df = pd.DataFrame.from_dict(data) +``` + +Then, split the data based on material id and map the energy to formation energy with unit eV/atom + +```python +id_train, id_val, id_test = get_id_train_val_test( + total_size=len(data), + split_seed=42, + train_ratio=0.90, + val_ratio=0.05, + test_ratio=0.05, + keep_data_order=False, +) + +cnt = 0 +for idx, item in df.items(): + # import pdb; pdb.set_trace() + if cnt in id_train: + for iid in range(len(item['energy'])): + dataset_train.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])}) + elif cnt in id_val: + for iid in range(len(item['energy'])): + dataset_val.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])}) + elif cnt in id_test: + for iid in range(len(item['energy'])): + dataset_test.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])}) + cnt += 1 + +print('using %d samples to train, %d samples to evaluate, and %d samples to test'%(len(dataset_train), len(dataset_val), len(dataset_test))) +``` +After this, you can use the dataset_train to train, dataset_val to evaluate, and dataset_test to test. + A minimal example of model training is shown below. ```python @@ -376,10 +423,7 @@ The training data used to develop the universal M3GNet IAP is `MPF.2021.2.8` and Please cite the following work: -```txt -Chi Chen, and Shyue Ping Ong. "A Universal Graph Deep Learning Interatomic Potential for the Periodic Table." -arXiv preprint [arXiv:2202.02450](https://arxiv.org/abs/2202.02450) (2022). -``` +> Chen, C., Ong, S.P. A universal graph deep learning interatomic potential for the periodic table. Nat Comput Sci 2, 718–728 (2022). https://doi.org/10.1038/s43588-022-00349-3. # Acknowledgements diff --git a/examples/Cubic Crystal Test.ipynb b/examples/Cubic Crystal Test.ipynb index 25f0e29..2a9f025 100644 --- a/examples/Cubic Crystal Test.ipynb +++ b/examples/Cubic Crystal Test.ipynb @@ -9,9 +9,7 @@ "\n", "This notebook is written to demonstrate the use of M3GNet as a structure relaxer as well as to provide more comprehensive benchmarks for cubic crystals based on exp data on Wikipedia and MP DFT data. This benchmark is limited to cubic crystals for ease of comparison since there is only one lattice parameter. \n", "\n", - "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first.\n", - "\n", - "> Note: Because Google Colab's python version is sometimes rather old (v3.7 at the time of writing this notebook (Jul 2022), you will need to install m3gnet without dependencies and install the dependencies manually." + "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first." ] }, { @@ -21,9 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install --no-deps m3gnet\n", - "# !pip install pymatgen ase\n", - "# !pip install lxml" + "# !pip install m3gnet" ] }, { diff --git a/examples/Relaxation of LiFePO4.ipynb b/examples/Relaxation of LiFePO4.ipynb index 56bfc69..c55b50b 100644 --- a/examples/Relaxation of LiFePO4.ipynb +++ b/examples/Relaxation of LiFePO4.ipynb @@ -9,9 +9,7 @@ "\n", "This notebook shows an example of relaxing the LiFePO4 crystal.\n", "\n", - "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first.\n", - "\n", - "> Note: Because Google Colab's python version is sometimes rather old (v3.7 at the time of writing this notebook (Jul 2022), you will need to install m3gnet without dependencies and install the dependencies manually." + "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first." ] }, { @@ -21,9 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install --no-deps m3gnet\n", - "# !pip install pymatgen ase\n", - "# !pip install lxml" + "# !pip install m3gnet" ] }, { diff --git a/m3gnet/graph/_compute.py b/m3gnet/graph/_compute.py index e206dae..8f335b3 100644 --- a/m3gnet/graph/_compute.py +++ b/m3gnet/graph/_compute.py @@ -156,7 +156,6 @@ def _list_include_threebody_indices(graph: list, threebody_cutoff: float | None ij_reverse_map = None original_index = np.arange(n_bond) if bond_atom_indices.shape[0] > 0: - bond_indices, n_triple_ij, n_triple_i, n_triple_s = _compute_3body( bond_atom_indices, graph[Index.N_ATOMS], diff --git a/m3gnet/models/tests/test_model.py b/m3gnet/models/tests/test_model.py index 73cbe84..0fd380c 100644 --- a/m3gnet/models/tests/test_model.py +++ b/m3gnet/models/tests/test_model.py @@ -17,9 +17,9 @@ def setUpClass(cls) -> None: cls.mol = Molecule(["C", "O"], [[0, 0, 0], [1.5, 0, 0]]) cls.structure = Structure(Lattice.cubic(3.30), ["Mo", "Mo"], [[0, 0, 0], [0.5, 0.5, 0.5]]) cls.atoms = Atoms(["Mo", "Mo"], [[0, 0, 0], [0.5, 0.5, 0.5]], cell=np.eye(3) * 3.30, pbc=True) + cls.single_atoms = Structure(Lattice.cubic(6.0), ["Mo"], [[0, 0, 0]]) def test_m3gnet(self): - g = self.model.graph_converter(self.mol) val = self.model.predict_structure(self.mol).numpy().ravel() @@ -46,6 +46,12 @@ def test_potential(self): ) ) + def test_single_atoms(self): + self.potential.get_efs(self.structure) + e, f, s = self.potential.get_efs(self.single_atoms) + shapes = f.numpy().shape + self.assertTupleEqual(shapes, (1, 3)) + def test_relaxer(self): relaxer = Relaxer() # this loads the default model diff --git a/mypy.ini b/mypy.ini index 7faa1e0..0e4c3cc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,5 @@ [mypy] exclude = ["pretrained"] ignore_missing_imports = True -follow_imports = skip -follow_imports_for_stubs = True +#follow_imports = skip +#follow_imports_for_stubs = True diff --git a/requirements.txt b/requirements.txt index 9ca3243..03cfa84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ pymatgen==2022.11.7 -tensorflow==2.11.0 +tensorflow==2.11.1 ase==3.22.1