diff --git a/dorado/read_pipeline/ScalerNode.cpp b/dorado/read_pipeline/ScalerNode.cpp index 7deca4505..69922d4d5 100644 --- a/dorado/read_pipeline/ScalerNode.cpp +++ b/dorado/read_pipeline/ScalerNode.cpp @@ -141,7 +141,8 @@ void ScalerNode::input_thread_fn() { int trim_start = 0; if (is_rna_model) { trim_start = determine_rna_adapter_pos(*read, m_model_type); - if (m_trim_rna_adapter) { + if (m_trim_rna_adapter && + size_t(trim_start) < read->read_common.get_raw_data_samples()) { read->read_common.raw_data = read->read_common.raw_data.index({Slice(trim_start, at::indexing::None)}); read->read_common.rna_adapter_end_signal_pos = 0; @@ -241,8 +242,12 @@ void ScalerNode::input_thread_fn() { utils::DEFAULT_TRIM_MIN_ELEMENTS); } - read->read_common.raw_data = - read->read_common.raw_data.index({Slice(trim_start, at::indexing::None)}); + if (size_t(trim_start) < read->read_common.get_raw_data_samples()) { + read->read_common.raw_data = + read->read_common.raw_data.index({Slice(trim_start, at::indexing::None)}); + } else { + trim_start = 0; + } } read->read_common.num_trimmed_samples = trim_start; diff --git a/tests/data/pod5/degenerate/trimming_bomb.pod5 b/tests/data/pod5/degenerate/trimming_bomb.pod5 new file mode 100644 index 000000000..2ca66cb8e Binary files /dev/null and b/tests/data/pod5/degenerate/trimming_bomb.pod5 differ diff --git a/tests/test_simple_basecaller_execution.sh b/tests/test_simple_basecaller_execution.sh index e5f072d7a..4246755cc 100755 --- a/tests/test_simple_basecaller_execution.sh +++ b/tests/test_simple_basecaller_execution.sh @@ -78,6 +78,9 @@ if $dorado_bin basecaller $model_5k_v43 $data_dir/duplex/pod5 --modified-bases 5 fi set -e +# Check that dorado handles degenerate reads without crashing +$dorado_bin basecaller $model_5k_v43 $data_dir/pod5/degenerate --skip-model-compatibility-check > $output_dir/error_condition.fq + echo dorado summary test stage $dorado_bin summary $output_dir/calls.bam