Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #3931: Add unversioned flag to Package.set() and Package.set_dir() #3927

Merged
merged 27 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5d31cd3
use list-objects as fallback
drernie Apr 3, 2024
54d072b
test_packages.test_set_dir_cannot_list_object_versions
drernie Apr 3, 2024
983475d
only call list_objects if e is AccessDenied
drernie Apr 3, 2024
43cbc61
Merge branch 'master' into set-dir-without-versions
drernie Apr 11, 2024
254202a
revert changes
drernie Apr 11, 2024
024b1bd
stub test_set_dir_cannot_list_object_versions
drernie Apr 11, 2024
16bf0ae
FAIL test_set_dir_allow_unversioned
drernie Apr 11, 2024
7d4ab0e
catch error
drernie Apr 11, 2024
600c934
pass test
drernie Apr 11, 2024
83ce73c
Update CHANGELOG.md
drernie Apr 11, 2024
56864f7
isort
drernie Apr 11, 2024
e3d4a02
update api doc
drernie Apr 11, 2024
41e83d2
reformat gendoc
drernie Apr 11, 2024
12fa8e7
indent docs more
drernie Apr 11, 2024
117763b
unversioned: bool
drernie Apr 11, 2024
127f4c2
filter objects to IsLatest sooner
drernie Apr 11, 2024
ee26cd7
toggle on 'unversioned' instead of error
drernie Apr 11, 2024
85676fe
fail test_set_package_entry_unversioned
drernie Apr 11, 2024
e070f0e
pass test_set_package_entry_unversioned
drernie Apr 11, 2024
b66dcb1
pycodestyle: shorten long lines
drernie Apr 11, 2024
6bb26e8
Update api/python/quilt3/packages.py
sir-sigurd Apr 12, 2024
0cc1376
rework tests
sir-sigurd Apr 12, 2024
e16e859
Update docs/CHANGELOG.md
sir-sigurd Apr 12, 2024
049b4bd
put the comment back
sir-sigurd Apr 12, 2024
37f03d1
lint
sir-sigurd Apr 12, 2024
ad1b9d9
Merge branch 'master' into set-dir-without-versions
drernie Apr 14, 2024
e845b36
Fix S3 URI reference in CHANGELOG
drernie Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions api/python/quilt3/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
legacy_calculate_checksum,
legacy_calculate_checksum_bytes,
list_object_versions,
list_objects,
list_url,
put_bytes,
)
Expand Down Expand Up @@ -841,7 +842,7 @@ def _load(cls, readable_file):
gc.enable()
return pkg

def set_dir(self, lkey, path=None, meta=None, update_policy="incoming"):
def set_dir(self, lkey, path=None, meta=None, update_policy="incoming", unversioned: bool = False):
"""
Adds all files from `path` to the package.

Expand All @@ -858,6 +859,7 @@ def set_dir(self, lkey, path=None, meta=None, update_policy="incoming"):
If 'incoming', whenever logical keys match, always take the new entry from set_dir.
If 'existing', whenever logical keys match, retain existing entries
and ignore new entries from set_dir.
unversioned(bool): when True, do not retrieve VersionId for S3 physical keys.

Returns:
self
Expand Down Expand Up @@ -910,10 +912,13 @@ def set_dir(self, lkey, path=None, meta=None, update_policy="incoming"):
src_path = src.path
if src.basename() != '':
src_path += '/'
objects, _ = list_object_versions(src.bucket, src_path)
if not unversioned:
objects, _ = list_object_versions(src.bucket, src_path)
objects = filter(lambda obj: obj["IsLatest"], objects)
else:
objects = list_objects(src.bucket, src_path, recursive=True)

for obj in objects:
if not obj['IsLatest']:
continue
# Skip S3 pseduo directory files and Keys that end in /
sir-sigurd marked this conversation as resolved.
Show resolved Hide resolved
if obj['Key'].endswith('/'):
if obj['Size'] != 0:
Expand Down Expand Up @@ -1134,7 +1139,15 @@ def manifest(self):
for logical_key, entry in self.walk():
yield {'logical_key': logical_key, **entry.as_dict()}

def set(self, logical_key, entry=None, meta=None, serialization_location=None, serialization_format_opts=None):
def set(
self,
logical_key,
entry=None,
meta=None,
serialization_location=None,
serialization_format_opts=None,
unversioned: bool = False,
):
"""
Returns self with the object at logical_key set to entry.

Expand All @@ -1154,6 +1167,7 @@ def set(self, logical_key, entry=None, meta=None, serialization_location=None, s
https://github.com/quiltdata/quilt/blob/master/api/python/quilt3/formats.py
serialization_location(string): Optional. If passed in, only used if entry is an object. Where the
serialized object should be written, e.g. "./mydataframe.parquet"
unversioned(bool): when True, do not retrieve VersionId for S3 physical keys.

Returns:
self
Expand All @@ -1162,9 +1176,18 @@ def set(self, logical_key, entry=None, meta=None, serialization_location=None, s
entry=entry,
meta=meta,
serialization_location=serialization_location,
serialization_format_opts=serialization_format_opts)
serialization_format_opts=serialization_format_opts,
unversioned=unversioned)

def _set(self, logical_key, entry=None, meta=None, serialization_location=None, serialization_format_opts=None):
def _set(
self,
logical_key,
entry=None,
meta=None,
serialization_location=None,
serialization_format_opts=None,
unversioned: bool = False,
):
if not logical_key or logical_key.endswith('/'):
raise QuiltException(
f"Invalid logical key {logical_key!r}. "
Expand All @@ -1181,7 +1204,7 @@ def _set(self, logical_key, entry=None, meta=None, serialization_location=None,
size, version_id = get_size_and_version(src)

# Determine if a new version needs to be appended.
if not src.is_local() and src.version_id is None and version_id is not None:
if not src.is_local() and src.version_id is None and version_id is not None and not unversioned:
src.version_id = version_id
entry = PackageEntry(src, size, None, None)
elif isinstance(entry, PackageEntry):
Expand Down
30 changes: 30 additions & 0 deletions api/python/tests/integration/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,25 @@ def test_set_dir_wrong_update_policy(self):
pkg.set_dir("nested", DATA_DIR, update_policy='invalid_policy')
assert expected_err in str(e.value)

@mock.patch("quilt3.packages.list_objects")
@mock.patch("quilt3.packages.list_object_versions")
def test_set_dir_unversioned(self, list_object_versions_mock, list_objects_mock):
list_objects_mock.return_value = [
{
"Key": "foo/bar.txt",
"Size": 123,
},
]

pkg = Package().set_dir(".", "s3://bucket/foo", unversioned=True)

list_object_versions_mock.assert_not_called()
list_objects_mock.assert_called_once_with("bucket", "foo/", recursive=True)
assert [
(lk, e.get())
for lk, e in pkg.walk()
] == [("bar.txt", "s3://bucket/foo/bar.txt")]

def test_package_entry_meta(self):
pkg = (
Package()
Expand Down Expand Up @@ -746,6 +765,17 @@ def test_set_package_entry_as_object(self):
file_path = pkg[lk].physical_key.path
assert not pathlib.Path(file_path).exists(), "These temp files should have been deleted during push()"

@patch("quilt3.packages.get_size_and_version", mock.Mock(return_value=(123, "v1")))
def test_set_package_entry_unversioned_flag(self):
for flag_value, version_id in {
True: None,
False: "v1",
}.items():
with self.subTest(flag_value=flag_value, version_id=version_id):
pkg = Package()
pkg.set("bar", "s3://bucket/bar", unversioned=flag_value)
assert pkg["bar"].physical_key == PhysicalKey("bucket", "bar", version_id)

def test_tophash_changes(self):
test_file = Path('test.txt')
test_file.write_text('asdf', 'utf-8')
Expand Down
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Entries inside each section should be ordered by type:

# unreleased - YYYY-MM-DD
## Python API
* [Added] New 'unversioned' parameter to `Package.set_dir()` and `Package.set()` for use with S3 URIs, such as HealthOmics, that do not support `ListBucketVersions` and/or `GetObjectVersion` ([#3927](https://github.com/quiltdata/quilt/pull/3927))
* [Removed] Drop Python 3.7 support ([#3841](https://github.com/quiltdata/quilt/pull/3841))
* [Changed] Set S3 client `max_pool_connections` to `QUILT_TRANSFER_MAX_CONCURRENCY` ([#3867](https://github.com/quiltdata/quilt/pull/3867))
* [Changed] **BREAKING:** Switch from a regular SHA256 checksum to a hash list (`sha2-256-chunked`) to match S3's built-in checksums ([#2782](https://github.com/quiltdata/quilt/pull/2782))
Expand Down
6 changes: 4 additions & 2 deletions docs/api-reference/Package.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ json decode error
invalid package exception


## Package.set\_dir(self, lkey, path=None, meta=None, update\_policy='incoming') {#Package.set\_dir}
## Package.set\_dir(self, lkey, path=None, meta=None, update\_policy='incoming', unversioned: bool = False) {#Package.set\_dir}

Adds all files from `path` to the package.

Expand All @@ -149,6 +149,7 @@ __Arguments__
If 'incoming', whenever logical keys match, always take the new entry from set_dir.
If 'existing', whenever logical keys match, retain existing entries
and ignore new entries from set_dir.
* __unversioned(bool)__: when True, do not retrieve VersionId for S3 physical keys.

__Returns__

Expand Down Expand Up @@ -230,7 +231,7 @@ fail to create file
fail to finish write


## Package.set(self, logical\_key, entry=None, meta=None, serialization\_location=None, serialization\_format\_opts=None) {#Package.set}
## Package.set(self, logical\_key, entry=None, meta=None, serialization\_location=None, serialization\_format\_opts=None, unversioned: bool = False) {#Package.set}

Returns self with the object at logical_key set to entry.

Expand All @@ -251,6 +252,7 @@ __Arguments__
* __https__: //github.com/quiltdata/quilt/blob/master/api/python/quilt3/formats.py
* __serialization_location(string)__: Optional. If passed in, only used if entry is an object. Where the
serialized object should be written, e.g. "./mydataframe.parquet"
* __unversioned(bool)__: when True, do not retrieve VersionId for S3 physical keys.

__Returns__

Expand Down
Loading