Skip to content

Commit

Permalink
default is to not repack; test separately and skip on windows
Browse files Browse the repository at this point in the history
  • Loading branch information
dmeliza committed Jan 3, 2025
1 parent 87b81f4 commit 4fdd81f
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 36 deletions.
Binary file modified .coverage
Binary file not shown.
55 changes: 38 additions & 17 deletions arfx/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
import argparse
import logging
import os
import sys
import shutil
import subprocess
from collections.abc import Container, Iterable, Sequence
from functools import lru_cache
from pathlib import Path, PurePosixPath
from typing import Optional, Union
from tempfile import TemporaryDirectory
from typing import Optional, Tuple, Union

import arf
import h5py as h5
Expand Down Expand Up @@ -87,7 +86,7 @@ def entry_repr(entry: h5.Group) -> str:
return out


def dataset_properties(dset: h5.Dataset) -> tuple[str, str, int]:
def dataset_properties(dset: h5.Dataset) -> Tuple[str, str, int]:
"""Infers the type of data and some properties of an hdf5 dataset.
Returns tuple: (sampled|event|interval|unknown), (array|table|vlarry), ncol
Expand Down Expand Up @@ -352,12 +351,14 @@ def extract_entries(
log.debug("%s -> %s", dset.name, fname)


def delete_entries(src: Union[Path, str], entries: Iterable[str], *, repack: bool = True, **options):
def delete_entries(
src: Union[Path, str], entries: Iterable[str], *, repack: bool = False, **options
):
"""
Delete one or more entries from a file.
entries: list of the entries to delete
repack: if True (default), repack the file afterward to reclaim space
repack: if True, repack the file afterward to reclaim space
"""
src = Path(src)
if not src.is_file():
Expand Down Expand Up @@ -456,7 +457,13 @@ def list_entries(
print(entry_repr(arfp[ename]))


def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *, verbose: bool = False, **metadata):
def update_entries(
src: Union[Path, str],
entries: Optional[Container[str]],
*,
verbose: bool = False,
**metadata,
):
"""
Update metadata on one or more entries.
Expand All @@ -472,7 +479,7 @@ def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *,
arf.check_file_version(arfp)
except Warning as e:
log.warning("warning: %s", e)
for i, entry_name in enumerate(arfp):
for entry_name in arfp:
entry_name = PurePosixPath(entry_name).name
if entries is None or entry_name in entries:
enode = arfp[entry_name]
Expand All @@ -486,7 +493,9 @@ def update_entries(src: Union[Path, str], entries: Optional[Container[str]], *,
print("^^^^^^^^^^")


def write_toplevel_attribute(tgt: Union[Path, str], files: Iterable[Union[Path, str]], **options) -> None:
def write_toplevel_attribute(
tgt: Union[Path, str], files: Iterable[Union[Path, str]], **options
) -> None:
"""Store contents of files as text in top-level attribute with basename of each file"""
with arf.open_file(tgt, "a") as arfp:
try:
Expand All @@ -500,7 +509,9 @@ def write_toplevel_attribute(tgt: Union[Path, str], files: Iterable[Union[Path,
arfp.attrs[attrname] = fname.read_text()


def read_toplevel_attribute(src: Union[Path, str], attrnames: Iterable[str], **options) -> None:
def read_toplevel_attribute(
src: Union[Path, str], attrnames: Iterable[str], **options
) -> None:
"""Print text data stored in top-level attributes by write_toplevel_attribute()"""
with arf.open_file(src, "r") as arfp:
try:
Expand All @@ -516,7 +527,9 @@ def read_toplevel_attribute(src: Union[Path, str], attrnames: Iterable[str], **o
print(" - no such attribute")


def repack_file(src: Union[Path, str], *, compress: Optional[int] = None, **options) -> None:
def repack_file(
src: Union[Path, str], *, compress: Optional[int] = None, **options
) -> None:
"""Call h5repack on a file"""
src_path = Path(src)
if not src_path.is_file():
Expand All @@ -531,16 +544,20 @@ def repack_file(src: Union[Path, str], *, compress: Optional[int] = None, **opti
[*cmd, str(src_path), str(tgt_file)],
capture_output=True,
text=True,
check=False
check=False,
)
if result.returncode == 0:
log.info("Repacked %s", src_path)
shutil.copy2(tgt_file, src_path)
else:
log.error("Failed to repack %s, keeping original. Error: %s", src_path, result.stderr.strip())
log.error(
"Failed to repack %s, keeping original. Error: %s",
src_path,
result.stderr.strip(),
)
except subprocess.SubprocessError as e:
log.exception("Error executing h5repack command: %s", e)


class ParseKeyVal(argparse.Action):
def __call__(self, parser, namespace, arg, option_string=None):
Expand All @@ -560,7 +577,7 @@ def __call__(self, parser, namespace, arg, option_string=None):
if arg.isdigit():
setattr(namespace, self.dest, arf.DataTypes(int(arg)))
else:
setattr(namespace, self.dest, arf.DataTypes[arg])
setattr(namespace, self.dest, arf.DataTypes[arg])


def setup_log(log, debug=False):
Expand Down Expand Up @@ -712,8 +729,8 @@ def arfx(argv=None):
)
g.add_argument(
"-P",
help="don't repack when deleting entries",
action="store_false",
help="repack when deleting entries",
action="store_true",
dest="repack",
)
g.add_argument(
Expand All @@ -723,7 +740,11 @@ def arfx(argv=None):
default=1,
dest="compress",
)
g.add_argument("--directory", help="when extracting files, store them in this directory", type=Path)
g.add_argument(
"--directory",
help="when extracting files, store them in this directory",
type=Path,
)

args = p.parse_args(argv)
setup_log(log, args.verbose)
Expand Down
4 changes: 2 additions & 2 deletions arfx/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ def open(filename: Union[str, Path], *args, **kwargs):
cls = ep.load()
return cls(filename, *args, **kwargs)
except ValueError:
raise ValueError(f"No handler defined for files of type '{ext}'")
raise ValueError(f"No handler defined for files of type '{ext}'") from None
except TypeError:
# shim for python < 3.10
for ep in entry_points().get(_entrypoint, []):
if ep.name == ext:
cls = ep.load()
return cls(filename, *args, **kwargs)
raise ValueError(f"No handler defined for files of type '{ext}'")
raise ValueError(f"No handler defined for files of type '{ext}'") from None


def list_plugins() -> str:
Expand Down
43 changes: 31 additions & 12 deletions test/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# -*- mode: python -*-
import sys
import time
from pathlib import Path

Expand Down Expand Up @@ -113,15 +114,20 @@ def test_add_entries(src_wav_files, tmp_path):

def test_add_entries_with_metadata(src_wav_files, tmp_path):
tgt_file = tmp_path / "output.arf"
core.add_entries(tgt_file, src_wav_files, datatype=arf.DataTypes.ACOUSTIC, attrs= {"my_attr": "test_value"})
core.add_entries(
tgt_file,
src_wav_files,
datatype=arf.DataTypes.ACOUSTIC,
attrs={"my_attr": "test_value"},
)
with arf.open_file(tgt_file, "r") as fp:
assert len(fp) == 3
for dset, entry in zip(datasets, fp.values()):
assert Path(entry.name).name == dset["name"]
assert entry.attrs["my_attr"] == "test_value"
d = entry["pcm"] # data always stored as pcm
assert d.attrs["datatype"] == arf.DataTypes.ACOUSTIC


def test_add_entries_with_template(src_wav_files, tmp_path):
tgt_file = tmp_path / "output.arf"
Expand All @@ -138,7 +144,16 @@ def test_add_entries_with_template(src_wav_files, tmp_path):
def test_script_add_entries(src_wav_files, tmp_path):
tgt_file = tmp_path / "output.arf"
src_wav_files = [str(path) for path in src_wav_files]
argv = ["-cvf", str(tgt_file), "-T", "ACOUSTIC", "-k", "this=that", "-z 9", *src_wav_files]
argv = [
"-cvf",
str(tgt_file),
"-T",
"ACOUSTIC",
"-k",
"this=that",
"-z 9",
*src_wav_files,
]
core.arfx(argv)
with arf.open_file(tgt_file, "r") as fp:
assert len(fp) == 3
Expand All @@ -149,7 +164,7 @@ def test_script_add_entries(src_wav_files, tmp_path):
assert d.shape == dset["data"].shape
assert np.all(d[:] == dset["data"])


def test_extract_entries(src_arf_file, tmp_path):
core.extract_entries(src_arf_file, directory=tmp_path)
# only the sampled data can be extracted
Expand Down Expand Up @@ -177,7 +192,7 @@ def test_extract_entries_with_template(src_arf_file, tmp_path):
assert data.shape == dset["data"].shape
assert np.all(data == dset["data"])


def test_extract_entry(src_arf_file, tmp_path):
core.extract_entries(src_arf_file, ["entry"], directory=tmp_path)
# only the sampled data can be extracted
Expand All @@ -196,7 +211,7 @@ def test_extract_nonexistent_entry(src_arf_file, tmp_path):
for dset in datasets[:3]:
tgt_file = tmp_path / f"entry_{dset['name']}.wav"
assert not tgt_file.exists()


def test_script_extract_entries(src_arf_file, tmp_path):
argv = ["-xvf", str(src_arf_file), "--directory", str(tmp_path)]
Expand All @@ -210,7 +225,7 @@ def test_script_extract_entries(src_arf_file, tmp_path):
assert data.shape == dset["data"].shape
assert np.all(data == dset["data"])


def test_delete_entry(src_arf_file):
core.delete_entries(src_arf_file, ["entry"])
with arf.open_file(src_arf_file, "r") as fp:
Expand All @@ -222,13 +237,13 @@ def test_delete_nonexistent_entry(src_arf_file):
with arf.open_file(src_arf_file, "r") as fp:
assert "entry" in fp


def test_update_all_entries(src_arf_file):
core.update_entries(src_arf_file, None, my_attr="test_value")
with arf.open_file(src_arf_file, "r") as fp:
assert fp["entry"].attrs["my_attr"] == "test_value"


def test_update_entry(src_arf_file):
core.update_entries(src_arf_file, ["entry"], my_attr="test_value")
with arf.open_file(src_arf_file, "r") as fp:
Expand All @@ -239,7 +254,7 @@ def test_update_nonexistent_entry(src_arf_file):
core.update_entries(src_arf_file, ["no_such_entry"], my_attr="test_value")
with arf.open_file(src_arf_file, "r") as fp:
assert "my_attr" not in fp["entry"].attrs


def test_copy_file(src_arf_file, tmp_path):
tgt_file = tmp_path / "output.arf"
Expand Down Expand Up @@ -323,8 +338,12 @@ def test_toplevel_attributes(src_arf_file, tmp_path):
# just test that the read function works
core.read_toplevel_attribute(src_arf_file, ["my_text.txt"])


@pytest.mark.skipif(sys.platform == "win32", reason="Test does not run on Windows")
def test_repack(src_arf_file):
core.repack_file(src_arf_file, compress=9)


def test_repack_nonexistent_file(tmp_path):
with pytest.raises(FileNotFoundError):
core.repack_file(tmp_path / "no_such_file.arf")


10 changes: 5 additions & 5 deletions test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,25 @@ def test_included_plugins():

def test_open_mda(tmp_path):
tmp_file = tmp_path / "test.mda"
fp = io.open(tmp_file, "w", sampling_rate=20000)
_fp = io.open(tmp_file, "w", sampling_rate=20000)


def test_open_pcm(tmp_path):
tmp_file = tmp_path / "test.pcm"
fp = io.open(tmp_file, "w", sampling_rate=20000)
_fp = io.open(tmp_file, "w", sampling_rate=20000)


def test_open_npy(tmp_path):
tmp_file = tmp_path / "test.npy"
fp = io.open(tmp_file, "w", sampling_rate=20000)
_fp = io.open(tmp_file, "w", sampling_rate=20000)


def test_open_wav(tmp_path):
tmp_file = tmp_path / "test.wav"
fp = io.open(tmp_file, "w", sampling_rate=20000)
_fp = io.open(tmp_file, "w", sampling_rate=20000)


def test_unsupported_format(tmp_path):
tmp_file = tmp_path / "test.blah"
with pytest.raises(ValueError):
fp = io.open(tmp_file, "w")
_fp = io.open(tmp_file, "w")

0 comments on commit 4fdd81f

Please sign in to comment.