Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache latest version per dataset #528

Merged
merged 6 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions app/crud/versions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, Dict, List, Optional

from async_lru import alru_cache
from asyncpg import UniqueViolationError

from ..errors import RecordAlreadyExistsError, RecordNotFoundError
Expand All @@ -11,7 +12,6 @@
from . import datasets, update_data
from .metadata import (
create_version_metadata,
update_all_metadata,
update_version_metadata,
)

Expand Down Expand Up @@ -52,6 +52,7 @@ async def get_version(dataset: str, version: str) -> ORMVersion:
return row


@alru_cache(maxsize=64, ttl=3600.0)
async def get_latest_version(dataset) -> str:
"""Fetch latest version number."""

Expand Down Expand Up @@ -80,9 +81,6 @@ async def create_version(dataset: str, version: str, **data) -> ORMVersion:
if data.get("is_downloadable") is None:
data["is_downloadable"] = d.is_downloadable

if data.get("is_latest"):
await _reset_is_latest(dataset, version)

metadata_data = data.pop("metadata", None)
try:
new_version: ORMVersion = await ORMVersion.create(
Expand All @@ -100,6 +98,13 @@ async def create_version(dataset: str, version: str, **data) -> ORMVersion:
)
new_version.metadata = metadata

# NOTE: We disallow specifying a new version as latest on creation via
# the VersionCreateIn model in order to prevent requests temporarily going
# to an incompletely-imported asset, however it's technically allowed in
# this function to facilitate testing.
if data.get("is_latest"):
await _reset_is_latest(dataset, version)

return new_version


Expand Down Expand Up @@ -155,8 +160,15 @@ async def _update_is_downloadable(


async def _reset_is_latest(dataset: str, version: str) -> None:
"""Set is_latest to False for all other versions of a dataset."""
# NOTE: Please remember to only call after setting the provided version to
# latest to avoid setting nothing to latest
# FIXME: This will get slower and more DB-intensive the more versions
# there are for a dataset. Could be re-written to use a single DB call,
# no?
versions = await get_versions(dataset)
version_gen = list_to_async_generator(versions)
async for version_orm in version_gen:
if version_orm.version != version:
await update_version(dataset, version_orm.version, is_latest=False)
_: bool = get_latest_version.cache_invalidate(dataset)
10 changes: 6 additions & 4 deletions tests/crud/test_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,12 @@ async def test_versions():

@pytest.mark.asyncio
async def test_latest_versions():
"""Test if trigger function on versions table work It is suppose to reset
is_latest field to False for all versions of a dataset Once a version's
is_latest field is set to True Get Latest Version function should always
return the latest version number."""
"""Test if trigger function on versions table work.

The is_latest field should be set to False for all other versions of a
dataset when a version's is_latest field is set to True.
The get_latest_version function should always return the latest version
number."""

dataset_name = "test"

Expand Down
Loading