bug fix:

* write_structure: structure doesn't like spaces in id names... add removal of spaces in the file along in the clean function * filter_hwe: ggtern bug with ggplot2 v.3.0.0 remove temporarily the figure
thierrygosselin · Jul 17, 2018 · 5365e6c · 5365e6c
1 parent 7695cb2
commit 5365e6c
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 48 deletions.
diff --git a/R/clean.R b/R/clean.R
@@ -31,8 +31,8 @@ clean_markers_names <- function(x) {
 clean_ind_names <- function(x) {
   x <- stringi::stri_replace_all_fixed(
     str = as.character(x),
-    pattern = c("_", ":"),
-    replacement = c("-", "-"),
+    pattern = c("_", ":", " "),
+    replacement = c("-", "-", ""),
     vectorize_all = FALSE)
 }#End clean_ind_names
 

diff --git a/R/filter_hwe.R b/R/filter_hwe.R
@@ -628,37 +628,39 @@ filter_hwe <- function(
       dplyr::mutate(POP_ID = factor(POP_ID, pop.levels))
     parabola <- sample.size <- NULL
 
-    plot.tern <- ggtern::ggtern(
-      data = data.sum,
-      ggtern::aes(AA, AB, BB, color = GROUPINGS, size = MISSING_PROP)) +
-      ggplot2::scale_color_manual(name = "Exact test mid p-value", values = group_colors) +
-      ggplot2::scale_size_continuous(name = "Missing genotypes proportion") +
-      ggtern::theme_rgbw() +
-      ggplot2::geom_point(alpha = 0.4) +
-      ggplot2::geom_line(data = hw.parabola, ggplot2::aes(x = AA, y = AB),
-                         linetype = 2, size = 0.6, colour = "black") +
-      ggtern::theme_nogrid_minor() +
-      ggtern::theme_nogrid_major() +
-      ggplot2::labs(
-        x = "AA", y = "AB", z = "BB",
-        title = "Hardy-Weinberg Equilibrium ternary plots",
-        subtitle = "genotypes frequencies shown for AA: REF/REF, AB: REF/ALT and BB: ALT/ALT"
-      ) +
-      ggplot2::theme(
-        plot.title = ggplot2::element_text(size = 12, family = "Helvetica", face = "bold", hjust = 0.5),
-        plot.subtitle = ggplot2::element_text(size = 10, family = "Helvetica", hjust = 0.5)
-      ) +
-      ggplot2::facet_wrap(~ POP_ID)
-    # plot.tern
-    ggtern::ggsave(
-      limitsize = FALSE,
-      plot = plot.tern,
-      # filename = file.path(path.folder, "hwe.ternary.plots.read.depth.pdf"),
-      filename = file.path(path.folder, "hwe.ternary.plots.missing.data.pdf"),
-      width = n.pop * 5, height = n.pop * 4,
-      dpi = 300, units = "cm", useDingbats = FALSE)
-    hw.parabola <- NULL
-    if (verbose) message("Plot written: hwe.ternary.plots.missing.data.pdf")
+    # plot.tern <- ggtern::ggtern(
+    #   data = data.sum,
+    #   ggtern::aes(AA, AB, BB, color = GROUPINGS, size = MISSING_PROP)) +
+    #   ggplot2::scale_color_manual(name = "Exact test mid p-value", values = group_colors) +
+    #   ggplot2::scale_size_continuous(name = "Missing genotypes proportion") +
+    #   ggplot2::geom_point(alpha = 0.4) +
+    #   ggplot2::geom_line(data = hw.parabola, ggplot2::aes(x = AA, y = AB),
+    #                      linetype = 2, size = 0.6, colour = "black") +
+    #   ggplot2::labs(
+    #     x = "AA", y = "AB", z = "BB",
+    #     title = "Hardy-Weinberg Equilibrium ternary plots",
+    #     subtitle = "genotypes frequencies shown for AA: REF/REF, AB: REF/ALT and BB: ALT/ALT"
+    #   ) +
+    #   ggplot2::theme(
+    #     plot.title = ggplot2::element_text(size = 12, family = "Helvetica", face = "bold", hjust = 0.5),
+    #     plot.subtitle = ggplot2::element_text(size = 10, family = "Helvetica", hjust = 0.5)
+    #   ) +
+    #   ggtern::theme_rgbw() +
+    #   ggtern::theme_nogrid_minor() +
+    #   ggtern::theme_nogrid_major() +
+    #   ggplot2::facet_wrap(~ POP_ID)
+    # # plot.tern
+    # ggtern::ggsave(
+    #   limitsize = FALSE,
+    #   plot = plot.tern,
+    #   # filename = file.path(path.folder, "hwe.ternary.plots.read.depth.pdf"),
+    #   filename = file.path(path.folder, "hwe.ternary.plots.missing.data.pdf"),
+    #   width = n.pop * 5, height = n.pop * 4,
+    #   dpi = 300, units = "cm", useDingbats = FALSE)
+    # hw.parabola <- NULL
+    # if (verbose) message("Plot written: hwe.ternary.plots.missing.data.pdf")
+
+    plot.tern <- "temporarily out of order"
 
     # Manhattan plot -------------------------------------------------------------
     data.sum.man <- dplyr::mutate(data.sum, X = "x") %>% dplyr::filter(MID_P_VALUE < 0.05)

diff --git a/R/write_structure.R b/R/write_structure.R
@@ -12,7 +12,6 @@
 #' \emph{How to get a tidy data frame ?}
 #' Look into \pkg{radiator} \code{\link{tidy_genomic_data}}.
 
-
 #' @param pop.levels (optional, string) A character string with your populations ordered.
 #' Default: \code{pop.levels = NULL}.
 
@@ -24,7 +23,6 @@
 #' structure file.
 #' Default: \code{markers.line = TRUE}.
 
-
 #' @param filename (optional) The file name prefix for the structure file
 #' written to the working directory. With default: \code{filename = NULL},
 #' the date and time is appended to \code{radiator_structure_}.
@@ -61,39 +59,43 @@ write_structure <- function(
 
   # Import data ---------------------------------------------------------------
   if (is.vector(data)) {
-    input <- radiator::tidy_wide(data = data, import.metadata = FALSE)
+    data <- radiator::tidy_wide(data = data, import.metadata = FALSE)
   } else {
-    input <- data
+    data$INDIVIDUALS <- clean_ind_names(data$INDIVIDUALS)
+    data$POP_ID <- clean_pop_names(data$POP_ID)
+    data$MARKERS <- clean_markers_names(data$MARKERS)
   }
 
+
+
   # necessary steps to make sure we work with unique markers and not duplicated LOCUS
-  if (tibble::has_name(input, "LOCUS") && !tibble::has_name(input, "MARKERS")) {
-    input <- dplyr::rename(.data = input, MARKERS = LOCUS)
+  if (tibble::has_name(data, "LOCUS") && !tibble::has_name(data, "MARKERS")) {
+    data <- dplyr::rename(.data = data, MARKERS = LOCUS)
   }
 
 
-  input <- dplyr::select(.data = input, POP_ID, INDIVIDUALS, MARKERS, GT)
+  data <- dplyr::select(.data = data, POP_ID, INDIVIDUALS, MARKERS, GT)
 
   # pop.levels -----------------------------------------------------------------
   if (!is.null(pop.levels)) {
-    input <- dplyr::mutate(
-      .data = input,
+    data <- dplyr::mutate(
+      .data = data,
         POP_ID = factor(POP_ID, levels = pop.levels, ordered = TRUE),
         POP_ID = droplevels(POP_ID)
       ) %>%
       dplyr::arrange(POP_ID, INDIVIDUALS, MARKERS)
   } else {
-    input <- dplyr::mutate(.data = input, POP_ID = factor(POP_ID)) %>%
+    data <- dplyr::mutate(.data = data, POP_ID = factor(POP_ID)) %>%
       dplyr::arrange(POP_ID, INDIVIDUALS, MARKERS)
   }
 
   # Create a marker vector  ------------------------------------------------
-  markers <- dplyr::distinct(.data = input, MARKERS) %>%
+  markers <- dplyr::distinct(.data = data, MARKERS) %>%
     dplyr::arrange(MARKERS) %>%
     purrr::flatten_chr(.)
 
   # Structure format ----------------------------------------------------------------
-  input <- input %>%
+  data <- data %>%
     tidyr::separate(col = GT, into = c("A1", "A2"), sep = 3, extra = "drop", remove = TRUE) %>%
     tidyr::gather(data = ., key = ALLELES, value = GT, -c(POP_ID, INDIVIDUALS, MARKERS)) %>%
     dplyr::mutate(
@@ -120,5 +122,5 @@ write_structure <- function(
   writeLines(text = stringi::stri_join(markers, sep = "\t", collapse = "\t"),
              con = filename.connection, sep = "\n")
   close(filename.connection) # close the connection
-  readr::write_tsv(x = input, path = filename, append = TRUE, col_names = FALSE)
+  readr::write_tsv(x = data, path = filename, append = TRUE, col_names = FALSE)
 } # end write_structure
diff --git a/README.md b/README.md
@@ -8,8 +8,8 @@ state and is being actively
 developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
 [![DOI](https://zenodo.org/badge/14548/thierrygosselin/radiator.svg)](https://zenodo.org/badge/latestdoi/14548/thierrygosselin/radiator)
 
-[![packageversion](https://img.shields.io/badge/Package%20version-0.0.12-orange.svg)](commits/master)
-[![Last-changedate](https://img.shields.io/badge/last%20change-2018--07--09-brightgreen.svg)](/commits/master)
+[![packageversion](https://img.shields.io/badge/Package%20version-0.0.13-orange.svg)](commits/master)
+[![Last-changedate](https://img.shields.io/badge/last%20change-2018--07--17-brightgreen.svg)](/commits/master)
 
 ------------------------------------------------------------------------