Skip to content

Commit

Permalink
[WIP]: Broken, not compiling. starting to calc RPT.
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Dec 6, 2023
1 parent c2fe1d8 commit b4ce76b
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 54 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ git-testament = "0.2.1"
indexmap = "1.9.1"
indicatif = "0.16.2"
itertools = "0.10.5"
lazy_static = "1.4.0"
noodles = { version = "0.34.0", features = [
"async",
"bam",
Expand Down
87 changes: 48 additions & 39 deletions src/derive/command/endedness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use tracing::info;
use tracing::trace;

use crate::derive::endedness::compute;
use crate::derive::endedness::compute::{BOTH, FIRST, LAST, NEITHER, OVERALL, UNKNOWN_READ_GROUP};
use crate::utils::formats::bam::ParsedBAMFile;
use crate::utils::formats::utils::IndexCheck;

Expand All @@ -31,7 +32,7 @@ pub fn deviance_in_range(deviance_raw: &str) -> Result<f64, String> {
/// Clap arguments for the `ngs derive endedness` subcommand.
#[derive(Args)]
pub struct DeriveEndednessArgs {
// Source BAM.
/// Source BAM.
#[arg(value_name = "BAM")]
src: PathBuf,

Expand Down Expand Up @@ -64,32 +65,25 @@ struct ReadGroup {
both: usize,
neither: usize,
}

/// Main function for the `ngs derive endedness` subcommand.
pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
info!("Starting derive endedness subcommand.");

let mut new_ordering_flags: HashMap<String, usize> = HashMap::new();
new_ordering_flags.insert("f+l-".to_string(), 0);
new_ordering_flags.insert("f-l+".to_string(), 0);
new_ordering_flags.insert("f+l+".to_string(), 0);
new_ordering_flags.insert("f-l-".to_string(), 0);
new_ordering_flags.insert(*FIRST, 0);
new_ordering_flags.insert(*LAST, 0);
new_ordering_flags.insert(*BOTH, 0);
new_ordering_flags.insert(*NEITHER, 0);

let mut ordering_flags: HashMap<Rc<String>, HashMap<String, usize>> = HashMap::new();
ordering_flags.insert(Rc::new("overall".to_string()), new_ordering_flags.clone());
ordering_flags.insert(Rc::new("unknown_read_group".to_string()), new_ordering_flags.clone());

new_ordering_flags
.entry("f+l-".to_string())
.and_modify(|e| *e += 1);
new_ordering_flags
.entry("f-l+".to_string())
.and_modify(|e| *e += 1);
new_ordering_flags
.entry("f+l+".to_string())
.and_modify(|e| *e += 1);
new_ordering_flags
.entry("f-l-".to_string())
.and_modify(|e| *e += 1);
ordering_flags.insert(Rc::new(*OVERALL), new_ordering_flags.clone());
ordering_flags.insert(Rc::new(*UNKNOWN_READ_GROUP), new_ordering_flags.clone());

new_ordering_flags.entry(*FIRST).and_modify(|e| *e += 1);
new_ordering_flags.entry(*LAST).and_modify(|e| *e += 1);
new_ordering_flags.entry(*BOTH).and_modify(|e| *e += 1);
new_ordering_flags.entry(*NEITHER).and_modify(|e| *e += 1);

// only used if args.calc_rpt is true
let mut found_rgs = HashSet::new();
Expand Down Expand Up @@ -118,18 +112,28 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
continue;
}

let read_group = record
.data()
.get(Tag::ReadGroup)
.map(|v| Rc::new(v.to_string()))
.unwrap_or(Rc::new(String::from("unknown_read_group")));
let read_group = match record.data().get(Tag::ReadGroup) {
Some(rg) => Rc::new(String::from(rg.as_str().unwrap())),
None => Rc::new(*UNKNOWN_READ_GROUP),
};

if args.calc_rpt {
found_rgs.insert(Rc::clone(&read_group));

match record.read_name() {
Some(rn) => {
read_names.insert(rn.to_string(), vec![Rc::clone(&read_group)]);
let rg_vec = read_names.get_mut(&rn.to_string());

match rg_vec {
Some(rg_vec) => {
rg_vec.push(Rc::clone(&read_group));
}
None => {
let mut rg_vec = Vec::new();
rg_vec.push(Rc::clone(&read_group));
read_names.insert(rn.to_string(), rg_vec);
}
}
}
None => {
trace!("Could not parse a QNAME from a read in the file.");
Expand All @@ -140,43 +144,47 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
}

if record.flags().is_first_segment() && !record.flags().is_last_segment() {
ordering_flags.entry(Rc::new("overall".to_string())).and_modify(|e| {
e.entry("f+l-".to_string()).and_modify(|e| *e += 1);
ordering_flags.entry(Rc::new(*OVERALL)).and_modify(|e| {
e.entry(*FIRST).and_modify(|e| *e += 1);
});

ordering_flags
.entry(read_group)
.and_modify(|e| {
e.entry("f+l-".to_string()).and_modify(|e| *e += 1);
e.entry(*FIRST).and_modify(|e| *e += 1);
})
.or_insert(new_ordering_flags.clone());
} else if !record.flags().is_first_segment() && record.flags().is_last_segment() {
ordering_flags.entry(Rc::new("overall".to_string())).and_modify(|e| {
e.entry("f-l+".to_string()).and_modify(|e| *e += 1);
ordering_flags.entry(Rc::new(*OVERALL)).and_modify(|e| {
e.entry(*LAST).and_modify(|e| *e += 1);
});

ordering_flags
.entry(read_group)
.and_modify(|e| {
e.entry("f-l+".to_string()).and_modify(|e| *e += 1);
e.entry(*LAST).and_modify(|e| *e += 1);
})
.or_insert(new_ordering_flags.clone());
} else if record.flags().is_first_segment() && record.flags().is_last_segment() {
ordering_flags.entry(Rc::new("overall".to_string())).and_modify(|e| {
e.entry("f+l+".to_string()).and_modify(|e| *e += 1);
ordering_flags.entry(Rc::new(*OVERALL)).and_modify(|e| {
e.entry(*BOTH).and_modify(|e| *e += 1);
});

ordering_flags
.entry(read_group)
.and_modify(|e| {
e.entry("f+l+".to_string()).and_modify(|e| *e += 1);
e.entry(*BOTH).and_modify(|e| *e += 1);
})
.or_insert(new_ordering_flags.clone());
} else if !record.flags().is_first_segment() && !record.flags().is_last_segment() {
ordering_flags.entry(Rc::new("overall".to_string())).and_modify(|e| {
e.entry("f-l-".to_string()).and_modify(|e| *e += 1);
ordering_flags.entry(Rc::new(*OVERALL)).and_modify(|e| {
e.entry(*NEITHER).and_modify(|e| *e += 1);
});

ordering_flags
.entry(read_group)
.and_modify(|e| {
e.entry("f-l-".to_string()).and_modify(|e| *e += 1);
e.entry(*NEITHER).and_modify(|e| *e += 1);
})
.or_insert(new_ordering_flags.clone());
} else {
Expand All @@ -192,7 +200,8 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
}

// (2) Derive the consensus endedness based on the ordering flags gathered.
let result = compute::predict(ordering_flags, args.paired_deviance.unwrap()).unwrap();
let result =
compute::predict(ordering_flags, read_names, args.paired_deviance.unwrap()).unwrap();

// (3) Print the output to stdout as JSON (more support for different output
// types may be added in the future, but for now, only JSON).
Expand Down
66 changes: 51 additions & 15 deletions src/derive/endedness/compute.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
//! Module holding the logic for computing the endedness of a BAM.
use anyhow::bail;
use lazy_static::lazy_static;
use radix_trie::iter;
use radix_trie::Trie;
use serde::Serialize;
use std::collections::HashMap;
use std::rc::Rc;

lazy_static! {
// Strings used to index into the HashMaps used to store the Read Group ordering flags.
pub static ref OVERALL: String = String::from("overall");
pub static ref UNKNOWN_READ_GROUP: String = String::from("unknown_read_group");

// Strings used to index into the HashMaps used to store the ordering flag counts.
pub static ref FIRST: String = String::from("f+l-");
pub static ref LAST: String = String::from("f-l+");
pub static ref BOTH: String = String::from("f+l+");
pub static ref NEITHER: String = String::from("f-l-");
}

/// Struct holding the final results for an `ngs derive endedness` subcommand
/// call.
#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -49,60 +64,81 @@ impl DerivedEndednessResult {
}
}

fn calculate_reads_per_template(
read_names: Trie<String, Vec<Rc<String>>>,
) -> HashMap<Rc<String>, f64> {
let mut total_reads: usize = 0;
let mut templates_templates: usize = 0;
let mut reads_per_template: HashMap<Rc<String>, f64> = HashMap::new();

for read_groups in iter::Iter::new(read_names) {}

reads_per_template
}

/// Main method to evaluate the collected ordering flags and
/// return a result for the endedness of the file. This may fail, and the
/// resulting [`DerivedEndednessResult`] should be evaluated accordingly.
pub fn predict(
ordering_flags: HashMap<Rc<String>, HashMap<String, usize>>,
read_names: Trie<String, Vec<Rc<String>>>,
paired_deviance: f64,
) -> Result<DerivedEndednessResult, anyhow::Error> {
let first = ordering_flags[&String::from("overall")]["f+l-"];
let last = ordering_flags[&String::from("overall")]["f-l+"];
let both = ordering_flags[&String::from("overall")]["f+l+"];
let neither = ordering_flags[&String::from("overall")]["f-l-"];
let overall_ordering_flags = ordering_flags.get(&*OVERALL).unwrap();

let overall_first = *overall_ordering_flags.get(&*FIRST).unwrap();
let overall_last = *overall_ordering_flags.get(&*LAST).unwrap();
let overall_both = *overall_ordering_flags.get(&*BOTH).unwrap();
let overall_neither = *overall_ordering_flags.get(&*NEITHER).unwrap();

let mut result =
DerivedEndednessResult::new(false, "Unknown".to_string(), first, last, both, neither);
let mut result = DerivedEndednessResult::new(
false,
"Unknown".to_string(),
overall_first,
overall_last,
overall_both,
overall_neither,
);

// all zeroes
if first == 0 && last == 0 && both == 0 && neither == 0 {
if overall_first == 0 && overall_last == 0 && overall_both == 0 && overall_neither == 0 {
bail!("No reads were detected in the file.");
}

// only first present
if first > 0 && last == 0 && both == 0 && neither == 0 {
if overall_first > 0 && overall_last == 0 && overall_both == 0 && overall_neither == 0 {
return Ok(result);
}
// only last present
if first == 0 && last > 0 && both == 0 && neither == 0 {
if overall_first == 0 && overall_last > 0 && overall_both == 0 && overall_neither == 0 {
return Ok(result);
}
// only both present
if first == 0 && last == 0 && both > 0 && neither == 0 {
if overall_first == 0 && overall_last == 0 && overall_both > 0 && overall_neither == 0 {
result.succeeded = true;
result.endedness = "Single-End".to_string();
return Ok(result);
}
// only neither present
if first == 0 && last == 0 && both == 0 && neither > 0 {
if overall_first == 0 && overall_last == 0 && overall_both == 0 && overall_neither > 0 {
return Ok(result);
}
// first/last mixed with both/neither
if (first > 0 || last > 0) && (both > 0 || neither > 0) {
if (overall_first > 0 || overall_last > 0) && (overall_both > 0 || overall_neither > 0) {
return Ok(result);
}
// any mix of both/neither, regardless of first/last
if both > 0 && neither > 0 {
if overall_both > 0 && overall_neither > 0 {
return Ok(result);
}

// both and neither are now guarenteed to be 0
// We only need to check first and last

let first_frac = first as f64 / (first + last) as f64;
let first_frac = overall_first as f64 / (overall_first + overall_last) as f64;
let lower_limit = 0.5 - paired_deviance;
let upper_limit = 0.5 + paired_deviance;
if (first == last) || (lower_limit <= first_frac && first_frac <= upper_limit) {
if (overall_first == overall_last) || (lower_limit <= first_frac && first_frac <= upper_limit) {
result.succeeded = true;
result.endedness = "Paired-End".to_string();
return Ok(result);
Expand Down

0 comments on commit b4ce76b

Please sign in to comment.