diff --git a/README.md b/README.md index 6195e541ec..abf4b3a69b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub contributors](https://img.shields.io/github/contributors/acl-org/acl-anthology)](https://github.com/acl-org/acl-anthology/graphs/contributors) [![Good first project issues](https://img.shields.io/github/issues/acl-org/acl-anthology/good%20first%20project)](https://github.com/acl-org/acl-anthology/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3A%22good+first+project%22) [![License](https://img.shields.io/github/license/acl-org/acl-anthology)](LICENSE) -[![acl-anthology-py on PyPI](https://img.shields.io/pypi/v/acl-anthology-py?logo=python&label=acl-anthology-py&labelColor=lightgray&color=3776AB)](https://pypi.org/project/acl-anthology-py/) +[![acl-anthology on PyPI](https://img.shields.io/pypi/v/acl-anthology?logo=python&label=acl-anthology&labelColor=lightgray&color=3776AB)](https://pypi.org/project/acl-anthology/) This repository contains: @@ -14,7 +14,7 @@ This repository contains: The official home of this repository is . -## Using the acl-anthology-py Python package +## Using the acl-anthology Python package Please see the separate [README for the Python package](python/README.md) for detailed information. diff --git a/README_detailed.md b/README_detailed.md index a3ca9deb13..919495743a 100644 --- a/README_detailed.md +++ b/README_detailed.md @@ -122,7 +122,7 @@ comprise: defines which author names should be treated as identical for purposes of generating "author" pages. -The "acl-anthology-py" module under [`python/`](python/) is responsible +The "acl-anthology" module under [`python/`](python/) is responsible for parsing and interpreting all these data files. Some information that is not explicitly stored in any of these files is *derived automatically* by this module during Step 1 of building the website. diff --git a/bin/anthology/__init__.py b/bin/anthology/__init__.py index b0e5abfc24..968a9fee52 100644 --- a/bin/anthology/__init__.py +++ b/bin/anthology/__init__.py @@ -7,7 +7,7 @@ import warnings warnings.warn( - """Your code uses the legacy Anthology library. Consider migrating to acl-anthology-py """, + """Your code uses the legacy Anthology library. Consider migrating to acl-anthology """, FutureWarning, stacklevel=2, ) diff --git a/.readthedocs.yaml b/python/.readthedocs.yaml similarity index 86% rename from .readthedocs.yaml rename to python/.readthedocs.yaml index 4fac16136f..fd251f953e 100644 --- a/.readthedocs.yaml +++ b/python/.readthedocs.yaml @@ -17,7 +17,7 @@ build: - pip install poetry - poetry config virtualenvs.create false post_install: - - poetry install --directory=python --with dev + - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --directory=python --with dev # Build documentation with MkDocs mkdocs: diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 37b6fb2086..3f78f2dae6 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +## [0.5.1] — 2025-01-02 + +This release changes the PyPI package name from acl-anthology-py to acl-anthology. + +### Added + +- VenueIndex can now set `no_item_ids=True` to skip reverse-indexing volumes. This avoids parsing all XML files if all you want to access is basic venue information, but means that `Venue.item_ids` will be empty. _You probably don't want to use this unless you know that you are not going to need this information._ + ### Changed - LaTeX encoding now uses [pylatexenc](https://pylatexenc.readthedocs.io/) instead of latexcodec, and wraps all macros in braces. This should address problems with BibTeX handling, see [#4280](https://github.com/acl-org/acl-anthology/issues/4280). diff --git a/python/README.md b/python/README.md index 36e907f37b..b4fa62aa12 100644 --- a/python/README.md +++ b/python/README.md @@ -1,25 +1,25 @@ -# acl-anthology-py +# acl-anthology [![License](https://img.shields.io/github/license/acl-org/acl-anthology)](LICENSE) [![Build Status](https://img.shields.io/github/actions/workflow/status/acl-org/acl-anthology/code-quality.yml)](https://github.com/acl-org/acl-anthology/actions/workflows/code-quality.yml) -[![Documentation](https://img.shields.io/readthedocs/acl-anthology-py)](https://acl-anthology-py.readthedocs.io/en/latest/) +[![Documentation](https://img.shields.io/readthedocs/acl-anthology)](https://acl-anthology.readthedocs.io/en/latest/) [![Code Coverage](https://img.shields.io/codecov/c/gh/acl-org/acl-anthology)](https://codecov.io/gh/acl-org/acl-anthology) -![Supported Python Versions](https://img.shields.io/pypi/pyversions/acl-anthology-py) -![Development Status](https://img.shields.io/pypi/status/acl-anthology-py) -[![Package on PyPI](https://img.shields.io/pypi/v/acl-anthology-py)](https://pypi.org/project/acl-anthology-py/) +![Supported Python Versions](https://img.shields.io/pypi/pyversions/acl-anthology) +![Development Status](https://img.shields.io/pypi/status/acl-anthology) +[![Package on PyPI](https://img.shields.io/pypi/v/acl-anthology)](https://pypi.org/project/acl-anthology/) This package accesses data from the [ACL Anthology](https://aclanthology.org). -- [**Documentation**](https://acl-anthology-py.readthedocs.io/en/latest/) -- [**Package on PyPI**](https://pypi.org/project/acl-anthology-py/) +- [**Documentation**](https://acl-anthology.readthedocs.io/en/latest/) +- [**Package on PyPI**](https://pypi.org/project/acl-anthology/) ## How to use Install via `pip`: ```bash -$ pip install acl-anthology-py +$ pip install acl-anthology ``` Instantiate the library, automatically fetching data files from the [ACL @@ -53,7 +53,7 @@ Two-Level Morphology with Composition ``` Find more examples and details on the API in the [**official -documentation**](https://acl-anthology-py.readthedocs.io/en/latest/). +documentation**](https://acl-anthology.readthedocs.io/en/latest/). ## Developing diff --git a/python/acl_anthology/config.py b/python/acl_anthology/config.py index 1bc9baa879..545acbaa11 100644 --- a/python/acl_anthology/config.py +++ b/python/acl_anthology/config.py @@ -55,5 +55,5 @@ class DefaultConfig: config = OmegaConf.structured(DefaultConfig) """A [structured configuration instance](https://omegaconf.readthedocs.io/en/latest/structured_config.html) that is used by all `acl_anthology` classes.""" -dirs = PlatformDirs("acl-anthology-py") +dirs = PlatformDirs("acl-anthology") """A [PlatformDirs instance](https://platformdirs.readthedocs.io/en/latest/api.html#platformdirs) that returns platform-specific directories for storing data.""" diff --git a/python/acl_anthology/exceptions.py b/python/acl_anthology/exceptions.py index 74830b36ef..38cc3eb11d 100644 --- a/python/acl_anthology/exceptions.py +++ b/python/acl_anthology/exceptions.py @@ -75,5 +75,5 @@ class SchemaMismatchWarning(UserWarning): def __init__(self) -> None: super().__init__( "Data directory contains a different schema.rnc as this library; " - "you might need to update the data or the acl-anthology-py library." + "you might need to update the data or the acl-anthology library." ) diff --git a/python/acl_anthology/venues.py b/python/acl_anthology/venues.py index ad01674ac5..43b4ad0833 100644 --- a/python/acl_anthology/venues.py +++ b/python/acl_anthology/venues.py @@ -120,10 +120,12 @@ class VenueIndex(SlottedDict[Venue]): Attributes: parent: The parent Anthology instance to which this index belongs. + no_item_ids: If set to True, skips parsing all XML files, which means the reverse-indexing of Volumes via `Venue.item_ids` will not be available. is_data_loaded: A flag indicating whether the venue YAML files have been loaded and the index has been built. """ parent: Anthology = field(repr=False, eq=False) + no_item_ids: bool = field(repr=False, default=False) is_data_loaded: bool = field(init=False, repr=False, default=False) def load(self) -> None: @@ -153,6 +155,8 @@ def build(self) -> None: Raises: ValueError: If a volume lists a venue ID that doesn't exist (i.e., isn't defined in the venue YAML files). """ + if self.no_item_ids: + return for volume in self.parent.volumes(): for venue_id in volume.venue_ids: if venue_id not in self.data: diff --git a/python/docs/api/index.md b/python/docs/api/index.md index 734bc742fb..fa506ab5d4 100644 --- a/python/docs/api/index.md +++ b/python/docs/api/index.md @@ -1,3 +1,3 @@ # API Documentation -This is the API documentation for acl-anthology-py. +This is the API documentation for the acl-anthology Python package. diff --git a/python/docs/guide/getting-started.md b/python/docs/guide/getting-started.md index c567ac78a6..84a851a043 100644 --- a/python/docs/guide/getting-started.md +++ b/python/docs/guide/getting-started.md @@ -7,16 +7,13 @@ fetching data from the main ACL Anthology repository, you will also need to have ## Installation The library is available as a [PyPI -package](https://pypi.org/project/acl-anthology-py/) and can therefore simply be +package](https://pypi.org/project/acl-anthology/) and can therefore simply be installed via pip: ```bash -pip install acl-anthology-py +pip install acl-anthology ``` -Alternatively, you can [download releases from -Github](https://github.com/acl-org/acl-anthology/releases/). - ## Instantiating the Anthology ### From the official repository diff --git a/python/docs/index.md b/python/docs/index.md index 0037d5e024..6b07a28d78 100644 --- a/python/docs/index.md +++ b/python/docs/index.md @@ -18,7 +18,7 @@ can even use this library to programmatically make changes to the metadata. This package requires **Python 3.10 or newer**. Install via pip: ```bash -pip install acl-anthology-py +pip install acl-anthology ``` Instantiate the library, automatically fetching data files from the [ACL diff --git a/python/mkdocs.yml b/python/mkdocs.yml index ba6353cfd4..fa2296c01d 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: acl-anthology-py +site_name: acl-anthology site_author: Marcel Bollmann repo_url: https://github.com/acl-org/acl-anthology repo_name: acl-org/acl-anthology @@ -101,7 +101,7 @@ extra: social: - icon: fontawesome/brands/github link: https://github.com/acl-org/acl-anthology - name: acl-anthology-py on Github + name: acl-anthology on Github - icon: material/web link: https://aclanthology.org/ name: ACL Anthology Website diff --git a/python/poetry.lock b/python/poetry.lock index c7601751a6..9aec270d2d 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1474,13 +1474,13 @@ files = [ [[package]] name = "pymdown-extensions" -version = "10.12" +version = "10.13" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.12-py3-none-any.whl", hash = "sha256:49f81412242d3527b8b4967b990df395c89563043bc51a3d2d7d500e52123b77"}, - {file = "pymdown_extensions-10.12.tar.gz", hash = "sha256:b0ee1e0b2bef1071a47891ab17003bfe5bf824a398e13f49f8ed653b699369a7"}, + {file = "pymdown_extensions-10.13-py3-none-any.whl", hash = "sha256:80bc33d715eec68e683e04298946d47d78c7739e79d808203df278ee8ef89428"}, + {file = "pymdown_extensions-10.13.tar.gz", hash = "sha256:e0b351494dc0d8d14a1f52b39b1499a00ef1566b4ba23dc74f1eba75c736f5dd"}, ] [package.dependencies] @@ -2211,4 +2211,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11.0 || >3.11.0,<3.13" -content-hash = "8f4432b2dc6d9963cea60e56e9f2400685944bd221eb57c1bab9789a82d60355" +content-hash = "58f9b390907ffeb4923191a869af7fd9a9b37ffc59c7f9d569df9455a21c8761" diff --git a/python/pyproject.toml b/python/pyproject.toml index 1bd3e4b65e..db08b90a4b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,11 +31,11 @@ target-version = 'py310' ignore = ['E501'] # "Line too long" is black's job [tool.poetry] -name = "acl-anthology-py" +name = "acl-anthology" packages = [ { include = "acl_anthology" }, ] -version = "0.5.0" +version = "0.5.1" description = "A library for accessing the ACL Anthology" authors = ["Marcel Bollmann "] license = "Apache-2.0" @@ -92,6 +92,7 @@ mypy = "^1.3.0" types-python-slugify = "^8.0.0.3" types-pyyaml = "^6.0.12.11" types-lxml = "^2023.3.28" +pymdown-extensions = "^10.13" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/python/tests/venues_test.py b/python/tests/venues_test.py index 7fb9d0a42b..59a9f98b42 100644 --- a/python/tests/venues_test.py +++ b/python/tests/venues_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import pytest from pathlib import Path from acl_anthology.venues import VenueIndex, Venue @@ -80,3 +81,11 @@ def test_venueindex_iter(anthology): index = VenueIndex(anthology) venue_ids = index.keys() assert set(venue_ids) == set(all_toy_venue_ids) + + +def test_venueindex_noindex(anthology, caplog): + """Accessing venues with no_item_ids=True should not load XML files.""" + with caplog.at_level(logging.DEBUG): + index = VenueIndex(anthology, no_item_ids=True) + _ = index.get("cl").name + assert not any("XML data file" in rec.message for rec in caplog.records)