From 5ba60a2b18a3f3b1a29cc7633ad9ef88b1552d9d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 13:45:35 +0000 Subject: [PATCH 01/19] Bump tqdm from 4.66.2 to 4.66.4 Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.66.2 to 4.66.4. - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.2...v4.66.4) --- updated-dependencies: - dependency-name: tqdm dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index 546f4ff29..a1143a81f 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -1,7 +1,7 @@ numpy==1.26.4 ruamel.yaml==0.18.6 msgpack==1.0.8 -tqdm==4.66.2 +tqdm==4.66.4 pymongo==4.6.3 pandas==2.2.2 orjson==3.10.0 From 1669ab734132be91aa153054c61a1e02a129b435 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 21:20:56 +0000 Subject: [PATCH 02/19] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.3.5 → v0.4.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.3.5...v0.4.3) - [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v4.6.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v4.6.0) - [github.com/pre-commit/mirrors-mypy: v1.9.0 → v1.10.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.9.0...v1.10.0) - [github.com/MarcoGorelli/cython-lint: v0.16.0 → v0.16.2](https://github.com/MarcoGorelli/cython-lint/compare/v0.16.0...v0.16.2) - [github.com/igorshubovych/markdownlint-cli: v0.39.0 → v0.40.0](https://github.com/igorshubovych/markdownlint-cli/compare/v0.39.0...v0.40.0) --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4d3d21fc3..7aa49fec8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,14 +8,14 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.5 + rev: v0.4.3 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -24,7 +24,7 @@ repos: exclude: ^tests - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.9.0 + rev: v1.10.0 hooks: - id: mypy @@ -37,7 +37,7 @@ repos: additional_dependencies: [tomli] # needed to read pyproject.toml below py3.11 - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.16.0 + rev: v0.16.2 hooks: - id: cython-lint args: [--no-pycodestyle] @@ -49,7 +49,7 @@ repos: - id: blacken-docs - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.39.0 + rev: v0.40.0 hooks: - id: markdownlint # MD013: line too long From 4083c0234df04dae9f8c96263adf78302f4b86ab Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 13:29:44 +0000 Subject: [PATCH 03/19] Bump pymongo from 4.6.3 to 4.7.2 Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.6.3 to 4.7.2. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/4.7.2/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.6.3...4.7.2) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index 546f4ff29..72e6a2d80 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -2,7 +2,7 @@ numpy==1.26.4 ruamel.yaml==0.18.6 msgpack==1.0.8 tqdm==4.66.2 -pymongo==4.6.3 +pymongo==4.7.2 pandas==2.2.2 orjson==3.10.0 types-orjson==3.6.2 From 36687d93aeeff8ee593e1e51728a6c5bebcab413 Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Sun, 12 May 2024 08:58:13 -0400 Subject: [PATCH 04/19] Add save and load support for monty.json (second attempt) --- monty/json.py | 240 ++++++++++++++++++++++++++++++++++++++++----- tests/test_json.py | 114 ++++++++++----------- 2 files changed, 275 insertions(+), 79 deletions(-) diff --git a/monty/json.py b/monty/json.py index 7513fc247..b03e8140b 100644 --- a/monty/json.py +++ b/monty/json.py @@ -8,6 +8,7 @@ import json import os import pathlib +import pickle import traceback import types from collections import OrderedDict, defaultdict @@ -16,7 +17,8 @@ from importlib import import_module from inspect import getfullargspec from pathlib import Path -from uuid import UUID +from typing import Any, Dict +from uuid import UUID, uuid4 try: import numpy as np @@ -94,8 +96,14 @@ def _check_type(obj, type_str) -> bool: Checks whether obj is an instance of the type defined by type_str. This removes the need to explicitly import type_str. Handles subclasses like isinstance does. E.g.:: - class A: pass - class B(A): pass + class A: + pass + + + class B(A): + pass + + a, b = A(), B() assert isinstance(a, A) assert isinstance(b, B) @@ -166,7 +174,10 @@ def as_dict(self) -> dict: """ A JSON serializable dict representation of an object. """ - d = {"@module": self.__class__.__module__, "@class": self.__class__.__name__} + d = { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + } try: parent_module = self.__class__.__module__.split(".", maxsplit=1)[0] @@ -357,6 +368,168 @@ def __modify_schema__(cls, field_schema): custom_schema = cls._generic_json_schema() field_schema.update(custom_schema) + def _get_partial_json(self, json_kwargs, pickle_kwargs): + """Used with the save method. Gets the json representation of a class + with the unserializable components sustituted for hash references.""" + + if pickle_kwargs is None: + pickle_kwargs = {} + if json_kwargs is None: + json_kwargs = {} + encoder = MontyEncoder(allow_unserializable_objects=True, **json_kwargs) + encoded = encoder.encode(self) + return encoder, encoded, json_kwargs, pickle_kwargs + + def get_partial_json(self, json_kwargs=None, pickle_kwargs=None): + """ + Parameters + ---------- + json_kwargs : dict + Keyword arguments to pass to the serializer. + pickle_kwargs : dict + Keyword arguments to pass to pickle.dump. + + Returns + ------- + str, dict + The json encoding of the class and the name-object map if one is + required, otherwise None. + """ + + encoder, encoded, json_kwargs, pickle_kwargs = self._get_partial_json( + json_kwargs, pickle_kwargs + ) + name_object_map = encoder._name_object_map + if len(name_object_map) == 0: + name_object_map = None + return encoded, name_object_map, json_kwargs, pickle_kwargs + + def save( + self, + json_path, + mkdir=True, + json_kwargs=None, + pickle_kwargs=None, + strict=True, + ): + """Utility that uses the standard tools of MSONable to convert the + class to json format, but also save it to disk. In addition, this + method intelligently uses pickle to individually pickle class objects + that are not serializable, saving them separately. This maximizes the + readability of the saved class information while allowing _any_ + class to be at least partially serializable to disk. + + For a fully MSONable class, only a class.json file will be saved to + the location {save_dir}/class.json. For a partially MSONable class, + additional information will be saved to the save directory at + {save_dir}. This includes a pickled object for each attribute that + e serialized. + + Parameters + ---------- + file_path : os.PathLike + The file to which to save the json object. A pickled object of + the same name but different extension might also be saved if the + class is not entirely MSONable. + mkdir : bool + If True, makes the provided directory, including all parent + directories. + json_kwargs : dict + Keyword arguments to pass to the serializer. + pickle_kwargs : dict + Keyword arguments to pass to pickle.dump. + strict : bool + If True, will not allow you to overwrite existing files. + """ + + json_path = Path(json_path) + save_dir = json_path.parent + + encoded, name_object_map, json_kwargs, pickle_kwargs = self.get_partial_json( + json_kwargs, pickle_kwargs + ) + + if mkdir: + save_dir.mkdir(exist_ok=True, parents=True) + + # Define the pickle path + pickle_path = save_dir / f"{json_path.stem}.pkl" + + # Check if the files exist and the strict parameter is True + if strict and json_path.exists(): + raise FileExistsError(f"strict is true and file {json_path} exists") + if strict and pickle_path.exists(): + raise FileExistsError(f"strict is true and file {pickle_path} exists") + + # Save the json file + with open(json_path, "w") as outfile: + outfile.write(encoded) + + # Save the pickle file if we have anything to save from the name_object_map + if name_object_map is not None: + with open(pickle_path, "wb") as f: + pickle.dump(name_object_map, f, **pickle_kwargs) + + @classmethod + def load(cls, file_path): + """Loads a class from a provided {load_dir}/class.json and + {load_dir}/class.pkl file (if necessary). + + Parameters + ---------- + file_path : os.PathLike + The json file to load from. + + Returns + ------- + MSONable + An instance of the class being reloaded. + """ + + d = _d_from_path(file_path) + return cls.from_dict(d) + + +def load_anything(path): + """Loads a json file into a class, rehydrating from MSONable.""" + + d = _d_from_path(path) + module = d["@module"] + klass = d["@class"] + signature = f"{module}:{klass}" + module, function = signature.split(":") + module = import_module(module) + klass = getattr(module, function) + return klass.from_dict(d) + + +def _d_from_path(file_path): + json_path = Path(file_path) + save_dir = json_path.parent + pickle_path = save_dir / f"{json_path.stem}.pkl" + + with open(json_path, "r") as infile: + d = json.loads(infile.read()) + + if pickle_path.exists(): + name_object_map = pickle.load(open(pickle_path, "rb")) + d = _recursive_name_object_map_replacement(d, name_object_map) + return d + + +def _recursive_name_object_map_replacement(d, name_object_map): + if isinstance(d, dict): + if "@object_reference" in d: + name = d["@object_reference"] + return name_object_map.pop(name) + return { + k: _recursive_name_object_map_replacement(v, name_object_map) + for k, v in d.items() + } + elif isinstance(d, list): + return [_recursive_name_object_map_replacement(x, name_object_map) for x in d] + return d + class MontyEncoder(json.JSONEncoder): """ @@ -367,6 +540,18 @@ class MontyEncoder(json.JSONEncoder): json.dumps(object, cls=MontyEncoder) """ + def __init__(self, *args, allow_unserializable_objects=False, **kwargs): + super().__init__(*args, **kwargs) + self._allow_unserializable_objects = allow_unserializable_objects + self._name_object_map: Dict[str, Any] = {} + self._index = 0 + + def _update_name_object_map(self, o): + name = f"{self._index:012}-{str(uuid4())}" + self._index += 1 + self._name_object_map[name] = o + return {"@object_reference": name} + def default(self, o) -> dict: # pylint: disable=E0202 """ Overriding default method for JSON encoding. This method does two @@ -380,7 +565,11 @@ def default(self, o) -> dict: # pylint: disable=E0202 Python dict representation. """ if isinstance(o, datetime.datetime): - return {"@module": "datetime", "@class": "datetime", "string": str(o)} + return { + "@module": "datetime", + "@class": "datetime", + "string": str(o), + } if isinstance(o, UUID): return {"@module": "uuid", "@class": "UUID", "string": str(o)} if isinstance(o, Path): @@ -431,10 +620,20 @@ def default(self, o) -> dict: # pylint: disable=E0202 } if bson is not None and isinstance(o, bson.objectid.ObjectId): - return {"@module": "bson.objectid", "@class": "ObjectId", "oid": str(o)} + return { + "@module": "bson.objectid", + "@class": "ObjectId", + "oid": str(o), + } if callable(o) and not isinstance(o, MSONable): - return _serialize_callable(o) + try: + return _serialize_callable(o) + except AttributeError as e: + # Some callables may not have instance __name__ + if self._allow_unserializable_objects: + return self._update_name_object_map(o) + raise AttributeError(e) try: if pydantic is not None and isinstance(o, pydantic.BaseModel): @@ -450,6 +649,11 @@ def default(self, o) -> dict: # pylint: disable=E0202 d = o.as_dict() elif isinstance(o, Enum): d = {"value": o.value} + elif self._allow_unserializable_objects: + # Last resort logic. We keep track of some name of the object + # as a reference, and instead of the object, store that + # name, which of course is json-serializable + d = self._update_name_object_map(o) else: raise TypeError( f"Object of type {o.__class__.__name__} is not JSON serializable" @@ -639,7 +843,11 @@ class MSONError(Exception): def jsanitize( - obj, strict=False, allow_bson=False, enum_values=False, recursive_msonable=False + obj, + strict=False, + allow_bson=False, + enum_values=False, + recursive_msonable=False, ): """ This method cleans an input json-like object, either a list or a dict or @@ -680,24 +888,12 @@ def jsanitize( return obj if isinstance(obj, (list, tuple)): return [ - jsanitize( - i, - strict=strict, - allow_bson=allow_bson, - enum_values=enum_values, - recursive_msonable=recursive_msonable, - ) + jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values) for i in obj ] if np is not None and isinstance(obj, np.ndarray): return [ - jsanitize( - i, - strict=strict, - allow_bson=allow_bson, - enum_values=enum_values, - recursive_msonable=recursive_msonable, - ) + jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values) for i in obj.tolist() ] if np is not None and isinstance(obj, np.generic): diff --git a/tests/test_json.py b/tests/test_json.py index d453baee8..206c5955e 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -390,63 +390,63 @@ def test_enum_serialization_no_msonable(self): f = jsanitize(d, enum_values=True) assert f["123"] == "value_a" - # def test_save_load(self, tmp_path): - # """Tests the save and load serialization methods.""" - # - # test_good_class = GoodMSONClass( - # "Hello", - # "World", - # "Python", - # **{ - # "cant_serialize_me": GoodNOTMSONClass( - # "Hello2", "World2", "Python2", **{"values": []} - # ), - # "cant_serialize_me2": [ - # GoodNOTMSONClass("Hello4", "World4", "Python4", **{"values": []}), - # GoodNOTMSONClass("Hello4", "World4", "Python4", **{"values": []}), - # ], - # "cant_serialize_me3": [ - # { - # "tmp": GoodMSONClass( - # "Hello5", "World5", "Python5", **{"values": []} - # ), - # "tmp2": 2, - # "tmp3": [1, 2, 3], - # }, - # { - # "tmp5": GoodNOTMSONClass( - # "aHello5", "aWorld5", "aPython5", **{"values": []} - # ), - # "tmp2": 5, - # "tmp3": {"test": "test123"}, - # }, - # # Gotta check that if I hide an MSONable class somewhere - # # it still gets correctly serialized. - # {"actually_good": GoodMSONClass("1", "2", "3", **{"values": []})}, - # ], - # "values": [], - # }, - # ) - # - # # This will pass - # test_good_class.as_dict() - # - # # This will fail - # with pytest.raises(TypeError): - # test_good_class.to_json() - # - # # This should also pass though - # target = tmp_path / "test_dir123" - # test_good_class.save(target, json_kwargs={"indent": 4, "sort_keys": True}) - # - # # This will fail - # with pytest.raises(FileExistsError): - # test_good_class.save(target, strict=True) - # - # # Now check that reloading this, the classes are equal! - # test_good_class2 = GoodMSONClass.load(target) - # - # assert test_good_class == test_good_class2 + def test_save_load(self, tmp_path): + """Tests the save and load serialization methods.""" + + test_good_class = GoodMSONClass( + "Hello", + "World", + "Python", + **{ + "cant_serialize_me": GoodNOTMSONClass( + "Hello2", "World2", "Python2", **{"values": []} + ), + "cant_serialize_me2": [ + GoodNOTMSONClass("Hello4", "World4", "Python4", **{"values": []}), + GoodNOTMSONClass("Hello4", "World4", "Python4", **{"values": []}), + ], + "cant_serialize_me3": [ + { + "tmp": GoodMSONClass( + "Hello5", "World5", "Python5", **{"values": []} + ), + "tmp2": 2, + "tmp3": [1, 2, 3], + }, + { + "tmp5": GoodNOTMSONClass( + "aHello5", "aWorld5", "aPython5", **{"values": []} + ), + "tmp2": 5, + "tmp3": {"test": "test123"}, + }, + # Gotta check that if I hide an MSONable class somewhere + # it still gets correctly serialized. + {"actually_good": GoodMSONClass("1", "2", "3", **{"values": []})}, + ], + "values": [], + }, + ) + + # This will pass + test_good_class.as_dict() + + # This will fail + with pytest.raises(TypeError): + test_good_class.to_json() + + # This should also pass though + target = tmp_path / "test_dir123" + test_good_class.save(target, json_kwargs={"indent": 4, "sort_keys": True}) + + # This will fail + with pytest.raises(FileExistsError): + test_good_class.save(target, strict=True) + + # Now check that reloading this, the classes are equal! + test_good_class2 = GoodMSONClass.load(target) + + assert test_good_class == test_good_class2 class TestJson: From 5b26904484e2d7f4b0da90fcd0f06b75c202db2f Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Sun, 12 May 2024 09:04:18 -0400 Subject: [PATCH 05/19] Cleanup, add load test --- monty/json.py | 21 ++++++++++++++------- tests/test_json.py | 9 +++++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/monty/json.py b/monty/json.py index b03e8140b..a718bc838 100644 --- a/monty/json.py +++ b/monty/json.py @@ -472,8 +472,7 @@ class is not entirely MSONable. @classmethod def load(cls, file_path): - """Loads a class from a provided {load_dir}/class.json and - {load_dir}/class.pkl file (if necessary). + """Loads a class from a provided json file. Parameters ---------- @@ -490,16 +489,24 @@ def load(cls, file_path): return cls.from_dict(d) -def load_anything(path): - """Loads a json file into a class, rehydrating from MSONable.""" +def load(path): + """Loads a json file that was saved using MSONable.save. + + Parameters + ---------- + path : os.PathLike + Path to the json file to load. + + Returns + ------- + MSONable + """ d = _d_from_path(path) module = d["@module"] klass = d["@class"] - signature = f"{module}:{klass}" - module, function = signature.split(":") module = import_module(module) - klass = getattr(module, function) + klass = getattr(module, klass) return klass.from_dict(d) diff --git a/tests/test_json.py b/tests/test_json.py index 206c5955e..3a27342ca 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -30,7 +30,8 @@ import pytest -from monty.json import MontyDecoder, MontyEncoder, MSONable, _load_redirect, jsanitize +from monty.json import (MontyDecoder, MontyEncoder, MSONable, _load_redirect, + jsanitize, load) from . import __version__ as tests_version @@ -436,7 +437,7 @@ def test_save_load(self, tmp_path): test_good_class.to_json() # This should also pass though - target = tmp_path / "test_dir123" + target = tmp_path / "test.json" test_good_class.save(target, json_kwargs={"indent": 4, "sort_keys": True}) # This will fail @@ -446,7 +447,11 @@ def test_save_load(self, tmp_path): # Now check that reloading this, the classes are equal! test_good_class2 = GoodMSONClass.load(target) + # Final check using load + test_good_class3 = load(target) + assert test_good_class == test_good_class2 + assert test_good_class == test_good_class3 class TestJson: From 077159b98739ba44626bdf926a83a8f9a3058564 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 May 2024 13:13:46 +0000 Subject: [PATCH 06/19] pre-commit auto-fixes --- tests/test_json.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_json.py b/tests/test_json.py index 3a27342ca..8e629628c 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -30,8 +30,14 @@ import pytest -from monty.json import (MontyDecoder, MontyEncoder, MSONable, _load_redirect, - jsanitize, load) +from monty.json import ( + MontyDecoder, + MontyEncoder, + MSONable, + _load_redirect, + jsanitize, + load, +) from . import __version__ as tests_version From c69ec5f9e4ed0351b60f52800dad9cfddef5641e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 15:49:24 +0000 Subject: [PATCH 07/19] Bump orjson from 3.10.0 to 3.10.3 Bumps [orjson](https://github.com/ijl/orjson) from 3.10.0 to 3.10.3. - [Release notes](https://github.com/ijl/orjson/releases) - [Changelog](https://github.com/ijl/orjson/blob/master/CHANGELOG.md) - [Commits](https://github.com/ijl/orjson/compare/3.10.0...3.10.3) --- updated-dependencies: - dependency-name: orjson dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index 72e6a2d80..475fe0979 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,6 +4,6 @@ msgpack==1.0.8 tqdm==4.66.2 pymongo==4.7.2 pandas==2.2.2 -orjson==3.10.0 +orjson==3.10.3 types-orjson==3.6.2 types-requests==2.31.0.20240406 From 1041b17b4ac20d0e0391fee85d7aec98d8b08bed Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Mon, 13 May 2024 08:51:31 -0700 Subject: [PATCH 08/19] Fix typing errors. --- monty/functools.py | 2 +- monty/serialization.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/monty/functools.py b/monty/functools.py index d5f538198..bc9b99537 100644 --- a/monty/functools.py +++ b/monty/functools.py @@ -84,7 +84,7 @@ def __init__(self, func: Callable) -> None: func: Function to decorate. """ self.__func = func - wraps(self.__func)(self) + wraps(self.__func)(self) # type: ignore def __get__(self, inst: Any, inst_cls) -> Any: if inst is None: diff --git a/monty/serialization.py b/monty/serialization.py index 128b7ec77..0e646d358 100644 --- a/monty/serialization.py +++ b/monty/serialization.py @@ -12,7 +12,7 @@ try: from ruamel.yaml import YAML except ImportError: - YAML = None + YAML = None # type: ignore from monty.io import zopen from monty.json import MontyDecoder, MontyEncoder From df8ba426efc962d1e13fb84bd0ef25054e33146c Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 13 May 2024 09:42:54 -0700 Subject: [PATCH 09/19] Fix internal `jsanitize` calls again In #670, the `recursive_msonable` keyword argument was removed from the internal `jsanitize` calls. I have patched it back. --- monty/json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monty/json.py b/monty/json.py index a718bc838..e8bca7c50 100644 --- a/monty/json.py +++ b/monty/json.py @@ -895,12 +895,12 @@ def jsanitize( return obj if isinstance(obj, (list, tuple)): return [ - jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values) + jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values, recursive_msonable=recursive_msonable) for i in obj ] if np is not None and isinstance(obj, np.ndarray): return [ - jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values) + jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values, recursive_msonable=recursive_msonable) for i in obj.tolist() ] if np is not None and isinstance(obj, np.generic): From e6fbcbf81d33730e46f00605270a1f0f4d0eef6e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 16:43:12 +0000 Subject: [PATCH 10/19] pre-commit auto-fixes --- monty/json.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/monty/json.py b/monty/json.py index e8bca7c50..ccfb8943f 100644 --- a/monty/json.py +++ b/monty/json.py @@ -895,12 +895,24 @@ def jsanitize( return obj if isinstance(obj, (list, tuple)): return [ - jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values, recursive_msonable=recursive_msonable) + jsanitize( + i, + strict=strict, + allow_bson=allow_bson, + enum_values=enum_values, + recursive_msonable=recursive_msonable, + ) for i in obj ] if np is not None and isinstance(obj, np.ndarray): return [ - jsanitize(i, strict=strict, allow_bson=allow_bson, enum_values=enum_values, recursive_msonable=recursive_msonable) + jsanitize( + i, + strict=strict, + allow_bson=allow_bson, + enum_values=enum_values, + recursive_msonable=recursive_msonable, + ) for i in obj.tolist() ] if np is not None and isinstance(obj, np.generic): From 19783c25de5324c99688a7e782aa733e5180740f Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 13 May 2024 09:49:23 -0700 Subject: [PATCH 11/19] Update json.py --- monty/json.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/monty/json.py b/monty/json.py index ccfb8943f..759758b14 100644 --- a/monty/json.py +++ b/monty/json.py @@ -905,16 +905,19 @@ def jsanitize( for i in obj ] if np is not None and isinstance(obj, np.ndarray): - return [ - jsanitize( - i, - strict=strict, - allow_bson=allow_bson, - enum_values=enum_values, - recursive_msonable=recursive_msonable, - ) - for i in obj.tolist() - ] + try: + return [ + jsanitize( + i, + strict=strict, + allow_bson=allow_bson, + enum_values=enum_values, + recursive_msonable=recursive_msonable, + ) + for i in obj.tolist() + ] + except TypeError: + return obj.tolist() if np is not None and isinstance(obj, np.generic): return obj.item() if _check_type( From bec9f1153e7881d1c47fa858658df97a99d6294c Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 13 May 2024 09:49:55 -0700 Subject: [PATCH 12/19] Update test_json.py --- tests/test_json.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_json.py b/tests/test_json.py index 8e629628c..11cbcab95 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -587,6 +587,10 @@ def test_numpy(self): d = jsanitize(x, strict=True) assert isinstance(d["energies"][0], float) + x = {"energy": np.array(-1.0)} + d = jsanitize(x, strict=True) + assert isinstance(d["energy"], float) + # Test data nested in a class x = np.array([[1 + 1j, 2 + 1j], [3 + 1j, 4 + 1j]], dtype="complex64") cls = ClassContainingNumpyArray(np_a={"a": [{"b": x}]}) From 8dcda56f2fd2e313d21646c5a279c2c7cee4008c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 16:50:01 +0000 Subject: [PATCH 13/19] pre-commit auto-fixes --- tests/test_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_json.py b/tests/test_json.py index 11cbcab95..45fa1bcd7 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -590,7 +590,7 @@ def test_numpy(self): x = {"energy": np.array(-1.0)} d = jsanitize(x, strict=True) assert isinstance(d["energy"], float) - + # Test data nested in a class x = np.array([[1 + 1j, 2 + 1j], [3 + 1j, 4 + 1j]], dtype="complex64") cls = ClassContainingNumpyArray(np_a={"a": [{"b": x}]}) From 3cba40551e6fbea75b833f96c78f20077d962cbc Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 13 May 2024 10:05:59 -0700 Subject: [PATCH 14/19] Add more regression tests for `recursive_msonable` behavior This PR adds another regression test for `recursive_msonable` behavior in `jsanitize` to prevent breaking of things in the future. I also ensured the test suite runs if pydantic isn't installed. --- tests/test_json.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_json.py b/tests/test_json.py index 45fa1bcd7..ca3738dda 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -23,6 +23,11 @@ except ImportError: torch = None +try: + import pydantic +except ImportError: + pydantic = None + try: from bson.objectid import ObjectId except ImportError: @@ -739,6 +744,13 @@ def test_jsanitize(self): assert clean_recursive_msonable["hello"]["b"] == 2 assert clean_recursive_msonable["test"] == "hi" + d = {"hello": [GoodMSONClass(1, 2, 3), "test"], "test": "hi"} + clean_recursive_msonable = jsanitize(d, recursive_msonable=True) + assert clean_recursive_msonable["hello"][0]["a"] == 1 + assert clean_recursive_msonable["hello"][0]["b"] == 2 + assert clean_recursive_msonable["hello"][0]["c"] == 3 + assert clean_recursive_msonable["hello"][1] == "test" + d = {"dt": datetime.datetime.now()} clean = jsanitize(d) assert isinstance(clean["dt"], str) @@ -866,6 +878,7 @@ def test_redirect_settings_file(self): } } + @pytest.mark.skipif(pydantic is None, reason="pydantic not present") def test_pydantic_integrations(self): from pydantic import BaseModel, ValidationError From 6f53b7b7d73b722413631c97824d5a5b522475d3 Mon Sep 17 00:00:00 2001 From: Andrew Rosen Date: Mon, 13 May 2024 10:10:19 -0700 Subject: [PATCH 15/19] Add another test --- tests/test_json.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_json.py b/tests/test_json.py index ca3738dda..0a24a4a70 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -742,6 +742,7 @@ def test_jsanitize(self): clean_recursive_msonable = jsanitize(d, recursive_msonable=True) assert clean_recursive_msonable["hello"]["a"] == 1 assert clean_recursive_msonable["hello"]["b"] == 2 + assert clean_recursive_msonable["hello"]["c"] == 3 assert clean_recursive_msonable["test"] == "hi" d = {"hello": [GoodMSONClass(1, 2, 3), "test"], "test": "hi"} @@ -750,6 +751,14 @@ def test_jsanitize(self): assert clean_recursive_msonable["hello"][0]["b"] == 2 assert clean_recursive_msonable["hello"][0]["c"] == 3 assert clean_recursive_msonable["hello"][1] == "test" + assert clean_recursive_msonable["test"] == "hi" + + d = {"hello": (GoodMSONClass(1, 2, 3), "test"), "test": "hi"} + clean_recursive_msonable = jsanitize(d, recursive_msonable=True) + assert clean_recursive_msonable["hello"][0]["a"] == 1 + assert clean_recursive_msonable["hello"][0]["b"] == 2 + assert clean_recursive_msonable["hello"][0]["c"] == 3 + assert clean_recursive_msonable["test"] == "hi" d = {"dt": datetime.datetime.now()} clean = jsanitize(d) From f21e7ee8c171d9befb4348e027ed771069cef50d Mon Sep 17 00:00:00 2001 From: Andrew Rosen Date: Mon, 13 May 2024 10:11:23 -0700 Subject: [PATCH 16/19] add another test --- tests/test_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_json.py b/tests/test_json.py index 0a24a4a70..2653ec4dd 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -758,6 +758,7 @@ def test_jsanitize(self): assert clean_recursive_msonable["hello"][0]["a"] == 1 assert clean_recursive_msonable["hello"][0]["b"] == 2 assert clean_recursive_msonable["hello"][0]["c"] == 3 + assert clean_recursive_msonable["hello"][1] == "test" assert clean_recursive_msonable["test"] == "hi" d = {"dt": datetime.datetime.now()} From 4ba65eca643d6d8b50c7081f757a6a7324f1bc9b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 23:37:36 +0000 Subject: [PATCH 17/19] Bump nokogiri from 1.16.2 to 1.16.5 in /docs Bumps [nokogiri](https://github.com/sparklemotion/nokogiri) from 1.16.2 to 1.16.5. - [Release notes](https://github.com/sparklemotion/nokogiri/releases) - [Changelog](https://github.com/sparklemotion/nokogiri/blob/main/CHANGELOG.md) - [Commits](https://github.com/sparklemotion/nokogiri/compare/v1.16.2...v1.16.5) --- updated-dependencies: - dependency-name: nokogiri dependency-type: indirect ... Signed-off-by: dependabot[bot] --- docs/Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 2c17d9262..0a3518d02 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -210,9 +210,9 @@ GEM jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) minitest (5.19.0) - nokogiri (1.16.2-arm64-darwin) + nokogiri (1.16.5-arm64-darwin) racc (~> 1.4) - nokogiri (1.16.2-x86_64-linux) + nokogiri (1.16.5-x86_64-linux) racc (~> 1.4) octokit (4.25.1) faraday (>= 1, < 3) From 6033c2b5f83fa1c43b8173e9680988fb5f9a2d35 Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Wed, 15 May 2024 07:19:27 -0700 Subject: [PATCH 18/19] UPdate change log. --- docs/changelog.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 243ca7a6d..3dd9abd9d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,8 @@ # Change log +## 2024.5.15 +- Reimplemented support for pickle in MSONAble. (@matthewcarbone) + ## 2024.4.17 - Revert changes to json.py for now. From 7568d6abcfc8dec14ba3022523656d5f9217835e Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Wed, 15 May 2024 07:19:46 -0700 Subject: [PATCH 19/19] Update dev docs --- docs/monty.functools.md | 3 --- docs/monty.os.md | 1 - docs/monty.re.md | 2 -- monty/__init__.py | 2 +- pyproject.toml | 2 +- tests/test_files/3000_lines.txt.gz | Bin 6496 -> 6496 bytes 6 files changed, 2 insertions(+), 8 deletions(-) diff --git a/docs/monty.functools.md b/docs/monty.functools.md index 15bc0fca9..93fd41b68 100644 --- a/docs/monty.functools.md +++ b/docs/monty.functools.md @@ -73,13 +73,10 @@ becomes The decorated main accepts two new arguments: > prof_file: Name of the output file with profiling data - > ```none > If not given, a temporary file is created. > ``` - > sortby: Profiling data are sorted according to this value. - > ```none > default is “time”. See sort_stats. > ``` diff --git a/docs/monty.os.md b/docs/monty.os.md index 8d85d1dc8..96184b2e2 100644 --- a/docs/monty.os.md +++ b/docs/monty.os.md @@ -15,7 +15,6 @@ performing some tasks, and returns to the original working directory afterwards. E.g., > with cd(“/my/path/”): - > ```none > do_something() > ``` diff --git a/docs/monty.re.md b/docs/monty.re.md index 7082093fb..b7e419ef9 100644 --- a/docs/monty.re.md +++ b/docs/monty.re.md @@ -25,11 +25,9 @@ A powerful regular expression version of grep. * **Returns** > {key1: [[[matches…], lineno], [[matches…], lineno], - > ```none > [[matches…], lineno], …], > ``` - > key2: …} For reverse reads, the lineno is given as a -ve number. Please note diff --git a/monty/__init__.py b/monty/__init__.py index 02eb8a816..a96daff45 100644 --- a/monty/__init__.py +++ b/monty/__init__.py @@ -9,7 +9,7 @@ __author__ = "Shyue Ping Ong" __copyright__ = "Copyright 2014, The Materials Virtual Lab" -__version__ = "2024.4.17" +__version__ = "2024.5.15" __maintainer__ = "Shyue Ping Ong" __email__ = "ongsp@ucsd.edu" __date__ = "Oct 12 2020" diff --git a/pyproject.toml b/pyproject.toml index 95ef2d935..4587a74bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ dependencies = [ ] -version = "2024.4.17" +version = "2024.5.15" [tool.setuptools] packages = ["monty"] diff --git a/tests/test_files/3000_lines.txt.gz b/tests/test_files/3000_lines.txt.gz index ca27a30157fc18c468110d6253f29924d6149195..9d95b137359b5a02da35a5a4e63eb53006e38325 100644 GIT binary patch delta 16 XcmaE0^uUN+zMF%i?ug4q_DD$pG-(Bv delta 16 XcmaE0^uUN+zMF$1cq!9H_DD$pF*pSZ