From 4fdd81f942d5a953d4da3baeb936feaf5b9af9aa Mon Sep 17 00:00:00 2001 From: Dan Meliza Date: Fri, 3 Jan 2025 14:43:39 -0500 Subject: [PATCH] default is to not repack; test separately and skip on windows --- .coverage | Bin 53248 -> 53248 bytes arfx/core.py | 55 ++++++++++++++++++++++++++++++++-------------- arfx/io.py | 4 ++-- test/test_core.py | 43 ++++++++++++++++++++++++++---------- test/test_io.py | 10 ++++----- 5 files changed, 76 insertions(+), 36 deletions(-) diff --git a/.coverage b/.coverage index 64da414ee14a9adc8a51b1bee3e0035fd407b9bc..5eddbdedcf8bb3ac09f06c0f61c00849fbd2db19 100644 GIT binary patch delta 34 scmV+-0Nww9paX!Q1F*`E6O0$|@V~ut;Ca~z%=s8PpU#K=v)zv)KspQ(n*aa+ delta 34 scmV+-0Nww9paX!Q1F*`E6Py?D@V~ut;Ca~z%=s8PpU#KAv)zv)KspQ(YXATM diff --git a/arfx/core.py b/arfx/core.py index 1e0be1b..37b6bd4 100644 --- a/arfx/core.py +++ b/arfx/core.py @@ -19,14 +19,13 @@ import argparse import logging import os -import sys import shutil import subprocess from collections.abc import Container, Iterable, Sequence from functools import lru_cache from pathlib import Path, PurePosixPath -from typing import Optional, Union from tempfile import TemporaryDirectory +from typing import Optional, Tuple, Union import arf import h5py as h5 @@ -87,7 +86,7 @@ def entry_repr(entry: h5.Group) -> str: return out -def dataset_properties(dset: h5.Dataset) -> tuple[str, str, int]: +def dataset_properties(dset: h5.Dataset) -> Tuple[str, str, int]: """Infers the type of data and some properties of an hdf5 dataset. Returns tuple: (sampled|event|interval|unknown), (array|table|vlarry), ncol @@ -352,12 +351,14 @@ def extract_entries( log.debug("%s -> %s", dset.name, fname) -def delete_entries(src: Union[Path, str], entries: Iterable[str], *, repack: bool = True, **options): +def delete_entries( + src: Union[Path, str], entries: Iterable[str], *, repack: bool = False, **options +): """ Delete one or more entries from a file. entries: list of the entries to delete - repack: if True (default), repack the file afterward to reclaim space + repack: if True, repack the file afterward to reclaim space """ src = Path(src) if not src.is_file(): @@ -456,7 +457,13 @@ def list_entries( print(entry_repr(arfp[ename])) -def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *, verbose: bool = False, **metadata): +def update_entries( + src: Union[Path, str], + entries: Optional[Container[str]], + *, + verbose: bool = False, + **metadata, +): """ Update metadata on one or more entries. @@ -472,7 +479,7 @@ def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *, arf.check_file_version(arfp) except Warning as e: log.warning("warning: %s", e) - for i, entry_name in enumerate(arfp): + for entry_name in arfp: entry_name = PurePosixPath(entry_name).name if entries is None or entry_name in entries: enode = arfp[entry_name] @@ -486,7 +493,9 @@ def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *, print("^^^^^^^^^^") -def write_toplevel_attribute(tgt: Union[Path, str], files: Iterable[Union[Path, str]], **options) -> None: +def write_toplevel_attribute( + tgt: Union[Path, str], files: Iterable[Union[Path, str]], **options +) -> None: """Store contents of files as text in top-level attribute with basename of each file""" with arf.open_file(tgt, "a") as arfp: try: @@ -500,7 +509,9 @@ def write_toplevel_attribute(tgt: Union[Path, str], files: Iterable[Union[Path, arfp.attrs[attrname] = fname.read_text() -def read_toplevel_attribute(src: Union[Path, str], attrnames: Iterable[str], **options) -> None: +def read_toplevel_attribute( + src: Union[Path, str], attrnames: Iterable[str], **options +) -> None: """Print text data stored in top-level attributes by write_toplevel_attribute()""" with arf.open_file(src, "r") as arfp: try: @@ -516,7 +527,9 @@ def read_toplevel_attribute(src: Union[Path, str], attrnames: Iterable[str], **o print(" - no such attribute") -def repack_file(src: Union[Path, str], *, compress: Optional[int] = None, **options) -> None: +def repack_file( + src: Union[Path, str], *, compress: Optional[int] = None, **options +) -> None: """Call h5repack on a file""" src_path = Path(src) if not src_path.is_file(): @@ -531,16 +544,20 @@ def repack_file(src: Union[Path, str], *, compress: Optional[int] = None, **opti [*cmd, str(src_path), str(tgt_file)], capture_output=True, text=True, - check=False + check=False, ) if result.returncode == 0: log.info("Repacked %s", src_path) shutil.copy2(tgt_file, src_path) else: - log.error("Failed to repack %s, keeping original. Error: %s", src_path, result.stderr.strip()) + log.error( + "Failed to repack %s, keeping original. Error: %s", + src_path, + result.stderr.strip(), + ) except subprocess.SubprocessError as e: log.exception("Error executing h5repack command: %s", e) - + class ParseKeyVal(argparse.Action): def __call__(self, parser, namespace, arg, option_string=None): @@ -560,7 +577,7 @@ def __call__(self, parser, namespace, arg, option_string=None): if arg.isdigit(): setattr(namespace, self.dest, arf.DataTypes(int(arg))) else: - setattr(namespace, self.dest, arf.DataTypes[arg]) + setattr(namespace, self.dest, arf.DataTypes[arg]) def setup_log(log, debug=False): @@ -712,8 +729,8 @@ def arfx(argv=None): ) g.add_argument( "-P", - help="don't repack when deleting entries", - action="store_false", + help="repack when deleting entries", + action="store_true", dest="repack", ) g.add_argument( @@ -723,7 +740,11 @@ def arfx(argv=None): default=1, dest="compress", ) - g.add_argument("--directory", help="when extracting files, store them in this directory", type=Path) + g.add_argument( + "--directory", + help="when extracting files, store them in this directory", + type=Path, + ) args = p.parse_args(argv) setup_log(log, args.verbose) diff --git a/arfx/io.py b/arfx/io.py index ecbd41f..db0ccf2 100644 --- a/arfx/io.py +++ b/arfx/io.py @@ -38,14 +38,14 @@ def open(filename: Union[str, Path], *args, **kwargs): cls = ep.load() return cls(filename, *args, **kwargs) except ValueError: - raise ValueError(f"No handler defined for files of type '{ext}'") + raise ValueError(f"No handler defined for files of type '{ext}'") from None except TypeError: # shim for python < 3.10 for ep in entry_points().get(_entrypoint, []): if ep.name == ext: cls = ep.load() return cls(filename, *args, **kwargs) - raise ValueError(f"No handler defined for files of type '{ext}'") + raise ValueError(f"No handler defined for files of type '{ext}'") from None def list_plugins() -> str: diff --git a/test/test_core.py b/test/test_core.py index e04b241..9d20e90 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # -*- mode: python -*- +import sys import time from pathlib import Path @@ -113,7 +114,12 @@ def test_add_entries(src_wav_files, tmp_path): def test_add_entries_with_metadata(src_wav_files, tmp_path): tgt_file = tmp_path / "output.arf" - core.add_entries(tgt_file, src_wav_files, datatype=arf.DataTypes.ACOUSTIC, attrs= {"my_attr": "test_value"}) + core.add_entries( + tgt_file, + src_wav_files, + datatype=arf.DataTypes.ACOUSTIC, + attrs={"my_attr": "test_value"}, + ) with arf.open_file(tgt_file, "r") as fp: assert len(fp) == 3 for dset, entry in zip(datasets, fp.values()): @@ -121,7 +127,7 @@ def test_add_entries_with_metadata(src_wav_files, tmp_path): assert entry.attrs["my_attr"] == "test_value" d = entry["pcm"] # data always stored as pcm assert d.attrs["datatype"] == arf.DataTypes.ACOUSTIC - + def test_add_entries_with_template(src_wav_files, tmp_path): tgt_file = tmp_path / "output.arf" @@ -138,7 +144,16 @@ def test_add_entries_with_template(src_wav_files, tmp_path): def test_script_add_entries(src_wav_files, tmp_path): tgt_file = tmp_path / "output.arf" src_wav_files = [str(path) for path in src_wav_files] - argv = ["-cvf", str(tgt_file), "-T", "ACOUSTIC", "-k", "this=that", "-z 9", *src_wav_files] + argv = [ + "-cvf", + str(tgt_file), + "-T", + "ACOUSTIC", + "-k", + "this=that", + "-z 9", + *src_wav_files, + ] core.arfx(argv) with arf.open_file(tgt_file, "r") as fp: assert len(fp) == 3 @@ -149,7 +164,7 @@ def test_script_add_entries(src_wav_files, tmp_path): assert d.shape == dset["data"].shape assert np.all(d[:] == dset["data"]) - + def test_extract_entries(src_arf_file, tmp_path): core.extract_entries(src_arf_file, directory=tmp_path) # only the sampled data can be extracted @@ -177,7 +192,7 @@ def test_extract_entries_with_template(src_arf_file, tmp_path): assert data.shape == dset["data"].shape assert np.all(data == dset["data"]) - + def test_extract_entry(src_arf_file, tmp_path): core.extract_entries(src_arf_file, ["entry"], directory=tmp_path) # only the sampled data can be extracted @@ -196,7 +211,7 @@ def test_extract_nonexistent_entry(src_arf_file, tmp_path): for dset in datasets[:3]: tgt_file = tmp_path / f"entry_{dset['name']}.wav" assert not tgt_file.exists() - + def test_script_extract_entries(src_arf_file, tmp_path): argv = ["-xvf", str(src_arf_file), "--directory", str(tmp_path)] @@ -210,7 +225,7 @@ def test_script_extract_entries(src_arf_file, tmp_path): assert data.shape == dset["data"].shape assert np.all(data == dset["data"]) - + def test_delete_entry(src_arf_file): core.delete_entries(src_arf_file, ["entry"]) with arf.open_file(src_arf_file, "r") as fp: @@ -222,13 +237,13 @@ def test_delete_nonexistent_entry(src_arf_file): with arf.open_file(src_arf_file, "r") as fp: assert "entry" in fp - + def test_update_all_entries(src_arf_file): core.update_entries(src_arf_file, None, my_attr="test_value") with arf.open_file(src_arf_file, "r") as fp: assert fp["entry"].attrs["my_attr"] == "test_value" - + def test_update_entry(src_arf_file): core.update_entries(src_arf_file, ["entry"], my_attr="test_value") with arf.open_file(src_arf_file, "r") as fp: @@ -239,7 +254,7 @@ def test_update_nonexistent_entry(src_arf_file): core.update_entries(src_arf_file, ["no_such_entry"], my_attr="test_value") with arf.open_file(src_arf_file, "r") as fp: assert "my_attr" not in fp["entry"].attrs - + def test_copy_file(src_arf_file, tmp_path): tgt_file = tmp_path / "output.arf" @@ -323,8 +338,12 @@ def test_toplevel_attributes(src_arf_file, tmp_path): # just test that the read function works core.read_toplevel_attribute(src_arf_file, ["my_text.txt"]) + +@pytest.mark.skipif(sys.platform == "win32", reason="Test does not run on Windows") +def test_repack(src_arf_file): + core.repack_file(src_arf_file, compress=9) + + def test_repack_nonexistent_file(tmp_path): with pytest.raises(FileNotFoundError): core.repack_file(tmp_path / "no_such_file.arf") - - diff --git a/test/test_io.py b/test/test_io.py index 47cbc35..0550c95 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -12,25 +12,25 @@ def test_included_plugins(): def test_open_mda(tmp_path): tmp_file = tmp_path / "test.mda" - fp = io.open(tmp_file, "w", sampling_rate=20000) + _fp = io.open(tmp_file, "w", sampling_rate=20000) def test_open_pcm(tmp_path): tmp_file = tmp_path / "test.pcm" - fp = io.open(tmp_file, "w", sampling_rate=20000) + _fp = io.open(tmp_file, "w", sampling_rate=20000) def test_open_npy(tmp_path): tmp_file = tmp_path / "test.npy" - fp = io.open(tmp_file, "w", sampling_rate=20000) + _fp = io.open(tmp_file, "w", sampling_rate=20000) def test_open_wav(tmp_path): tmp_file = tmp_path / "test.wav" - fp = io.open(tmp_file, "w", sampling_rate=20000) + _fp = io.open(tmp_file, "w", sampling_rate=20000) def test_unsupported_format(tmp_path): tmp_file = tmp_path / "test.blah" with pytest.raises(ValueError): - fp = io.open(tmp_file, "w") + _fp = io.open(tmp_file, "w")