Skip to content

Commit

Permalink
Merge pull request #472 from European-XFEL/feat/validation-file-access
Browse files Browse the repository at this point in the history
Validation: clearer info on filesystem errors
  • Loading branch information
takluyver authored Dec 13, 2023
2 parents 88b5725 + bab64df commit 6ffb284
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 11 deletions.
13 changes: 11 additions & 2 deletions extra_data/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pytest import fixture, raises
from tempfile import TemporaryDirectory

from extra_data.validation import FileAccess, FileValidator, RunValidator, ValidationError
from extra_data.validation import FileAccess, FileValidator, RunValidator, ValidationError, main
from . import make_examples


Expand Down Expand Up @@ -39,7 +39,7 @@ def test_file_error(mock_fxe_raw_run):

problems = RunValidator(mock_fxe_raw_run).run_checks()
assert len(problems) == 1
assert problems[0]['msg'] == 'Could not open file'
assert problems[0]['msg'] == 'Could not access file'
assert problems[0]['file'] == str(not_readable)


Expand Down Expand Up @@ -170,3 +170,12 @@ def test_control_data_timestamps(data_aggregator_file):
assert problem['msg'] == 'Timestamp is decreasing, e.g. at 10 (5 < 10)'
assert problem['dataset'] == 'CONTROL/SA1_XTD2_XGM/DOOCS/MAIN/pulseEnergy/photonFlux/timestamp'
assert 'RAW-R0450-DA01-S00001.h5' in problem['file']


def test_main_file_non_h5(tmp_path, capsys):
not_h5 = tmp_path / 'notHDF5.h5'
not_h5.write_text("Accessible file, not HDF5")

status = main([str(not_h5)])
assert status == 1
assert 'Could not open HDF5 file' in capsys.readouterr().out
35 changes: 26 additions & 9 deletions extra_data/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,18 +211,30 @@ def check_index_contiguous(firsts, counts, record):
))


def _check_file(args):
runpath, filename = args
filepath = osp.join(runpath, filename)
problems = []
def _open_file(filepath):
try:
fa = FileAccess(filepath)
except Exception as e:
problems.append(
dict(msg="Could not open file", file=filepath, error=e)
)
return filename, None, problems
try:
with open(filepath, "rb") as f:
f.read(16)
except OSError as e2:
# Filesystem issue, e.g. dCache node down. HDF5 errors can be
# confusing, so record the OS error instead.
pb = dict(msg="Could not access file", file=filepath, error=e2)
else:
# HDF5 file corrupted or missing expected information
pb = dict(msg="Could not open HDF5 file", file=filepath, error=e)
return None, [pb]
else:
return fa, []


def _check_file(args):
runpath, filename = args
filepath = osp.join(runpath, filename)
fa, problems = _open_file(filepath)
if fa is not None:
fv = FileValidator(fa)
problems.extend(fv.run_checks())
fa.close()
Expand Down Expand Up @@ -328,7 +340,12 @@ def main(argv=None):
validator = RunValidator(path, term_progress=True)
else:
print("Checking file:", path)
validator = FileValidator(H5File(path).files[0])
fa, problems = _open_file(path)
if problems:
print(str(ValidationError(problems)))
return 1

validator = FileValidator(fa)

try:
validator.run_checks()
Expand Down

0 comments on commit 6ffb284

Please sign in to comment.