diff --git a/R/import-vdj.R b/R/import-vdj.R index ab26f3f..a904f71 100644 --- a/R/import-vdj.R +++ b/R/import-vdj.R @@ -169,9 +169,9 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", filter_chains <- TRUE cli::cli_warn( - "When `include_mutations` is `TRUE`, `filter_chains` is also + "When `include_mutations` is `TRUE`, `filter_chains` is automatically set `TRUE` since mutation data is only available for - productive chains." + productive chains" ) } @@ -193,7 +193,6 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", len_cols <- paste0(seq_cols, "_length") # Columns containing per-cell info - # cell_cols <- c("barcode", "clonotype_id") cell_cols <- c("barcode", "clonotype_id", "paired") # Optional aggr columns @@ -319,24 +318,23 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", indel_cols <- indel_cols[indel_cols != "contig_id"] # Join indel data - # SHOULD CHECK BARCODE OVERLAP HERE!!! - # IF BARCODES DO NOT OVERLAP HERE, WILL RETURN ALL 0s + # check that barcodes match + # if barcodes do not match, 0s will get added for mutation columns indel_ctigs <- purrr::map2( contigs, indels, dplyr::left_join, by = "contig_id", relationship = "many-to-one" ) purrr::walk2(contigs, indels, ~ { - indel_ovlp <- sum(.y$contig_id %in% .x$contig_id) / nrow(.y) - - if (indel_ovlp < 1) { - cli::cli_abort( - "Barcodes from mutation data do not match, check your input files." + if (any(!.y$contig_id %in% .x$contig_id)) { + .malformed_data_error( + "cell barcodes from concat_ref.bam and + filtered_contig_annotations.csv do not match" ) } }) - # Replace NAs with 0 + # Replace NAs with 0s # contigs that did not have any mutations will have NAs indel_ctigs <- purrr::map( indel_ctigs, @@ -541,9 +539,7 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", .data$clonotype_id != "None", !is.na(.data$clonotype_id) ) - if (nrow(res) == 0) { - .malformed_data_error("no valid clonotypes present") - } + if (nrow(res) == 0) .malformed_data_error("no valid clonotypes present") # Add prefix to V(D)J columns res <- dplyr::rename_with(res, ~ paste0(prefix, .x)) @@ -936,7 +932,9 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", ) if (ncol(res) == 1) { - cli::cli_abort("V(D)J coordinates not found, check {.file airr_file}") + msg <- "columns containing V(D)J coordinates were not found in " + + .malformed_data_error(paste0(msg, basename(airr_file))) } res <- tidyr::pivot_longer(res, -"contig_id") @@ -1469,13 +1467,14 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "", #' Malformed data error #' #' @noRd -.malformed_data_error <- function(msg) { +.malformed_data_error <- function(msg, call = NULL) { cli::cli_abort( "Malformed input data, {msg}. Did you modify the `cellranger` output files? {.fn import_vdj} requires files that are in the format generated by `cellranger`. If you are having trouble loading your data, please file an issue at - {.url https://github.com/rnabioco/djvdj/issues}." + {.url https://github.com/rnabioco/djvdj/issues}.", + call = call ) }