diff --git a/.gitignore b/.gitignore index babe1ead..53568a67 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ *.Rproj *.so *.Rprofile -devel/* -Pedixplorer.Rcheck \ No newline at end of file +*.lintr +Pedixplorer* +devel \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..1a638a48 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,50 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "R-Debugger", + "name": "Launch R-Workspace", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}" + }, + { + "type": "R-Debugger", + "name": "Debug R-File", + "request": "launch", + "debugMode": "file", + "workingDirectory": "${workspaceFolder}", + "file": "${file}" + }, + { + "type": "R-Debugger", + "name": "Debug R-Function", + "request": "launch", + "debugMode": "function", + "workingDirectory": "${workspaceFolder}", + "file": "${file}", + "mainFunction": "main", + "allowGlobalDebugging": false + }, + { + "type": "R-Debugger", + "name": "Debug R-Package", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}", + "includePackageScopes": true, + "loadPackages": [ + "." + ] + }, + { + "type": "R-Debugger", + "request": "attach", + "name": "Attach to R process", + "splitOverwrittenOutput": true + } + ] +} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 12520819..a05ccba7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Pedixplorer -Version: 0.99.2 -Date: 2023-09-19 +Version: 1.1.0 +Date: 2023-11-15 Title: Pedigree Functions Authors@R: c( person("Louis", "Le Nézet", email="louislenezet@gmail.com", @@ -24,7 +24,9 @@ Imports: dplyr, tidyr, quadprog, - Matrix + Matrix, + S4Vectors, + testthat Description: Routines to handle family data with a Pedigree object. The initial purpose was to create correlation structures that describe family relationships such as kinship and identity-by-descent, which can be used to model family data @@ -38,39 +40,39 @@ RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr Suggests: - testthat (>= 3.0.0), diffviewer, - vdiffr, - rmarkdown, + vdiffr, + rmarkdown, BiocStyle, knitr, - withr + withr, + magick Config/testthat/edition: 3 biocViews: Software, DataRepresentation, Genetics, Alignment BugReports: https://github.com/LouisLeLezet/Pedixplorer/issues -url: https://github.com/LouisLeNezet/Pedixplorer +URL: https://github.com/LouisLeNezet/Pedixplorer BiocType: Software Collate: - 'Pedigree.R' + 'AllValidity.R' + 'AllClass.R' + 'kindepth.R' + 'kinship.R' + 'utils.R' + 'AllConstructor.R' + 'AllAccessors.R' + 'AllGeneric.R' 'Pedixplorer-package.R' 'alignped4.R' 'alignped3.R' 'alignped2.R' 'alignped1.R' - 'validity.R' - 'pedigreeClass.R' - 'check_hints.R' - 'kindepth.R' 'auto_hint.R' 'align.R' 'best_hint.R' 'bit_size.R' 'data.R' 'descendants.R' - 'make_famid.R' 'family_check.R' - 'kinship.R' - 'utils.R' 'find_unavailable.R' 'find_avail_affected.R' 'find_avail_noninform.R' @@ -79,6 +81,7 @@ Collate: 'generate_colors.R' 'ibd_matrix.R' 'is_informative.R' + 'make_famid.R' 'min_dist_inf.R' 'norm_data.R' 'num_child.R' @@ -88,6 +91,6 @@ Collate: 'plot_fromdf.R' 'plot.R' 'shrink.R' - 'trim.R' 'unrelated.R' 'useful_inds.R' +LazyData: false diff --git a/NAMESPACE b/NAMESPACE index 0e9efe7a..f1549711 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,17 +1,45 @@ # Generated by roxygen2: do not edit by hand +export("affected<-") +export("avail<-") +export("border<-") +export("dadid<-") +export("famid<-") +export("fill<-") +export("hints<-") +export("horder<-") +export("id<-") +export("isinf<-") +export("kin<-") +export("momid<-") +export("ped<-") +export("rel<-") +export("scales<-") +export("sex<-") +export("spouse<-") +export("status<-") +export("useful<-") +export(Hints) +export(Ped) export(Pedigree) +export(Rel) +export(Scales) +export(affected) export(align) -export(alignped1) -export(alignped2) -export(alignped3) -export(alignped4) +export(ancestors) +export(anchor_to_factor) export(auto_hint) +export(avail) export(best_hint) export(bit_size) -export(check_hints) +export(border) +export(circfun) +export(code) +export(dadid) export(descendants) +export(famid) export(family_check) +export(fill) export(find_avail_affected) export(find_avail_noninform) export(find_unavailable) @@ -21,12 +49,23 @@ export(generate_border) export(generate_colors) export(generate_fill) export(hints) +export(horder) export(ibd_matrix) +export(id) +export(id1) +export(id2) +export(is_disconnected) +export(is_founder) export(is_informative) +export(is_valid_scales) +export(isinf) +export(kin) export(kindepth) export(kinship) export(make_famid) export(min_dist_inf) +export(momid) +export(na_to_length) export(norm_ped) export(norm_rel) export(num_child) @@ -34,34 +73,114 @@ export(ped) export(ped_to_legdf) export(ped_to_plotdf) export(plot_fromdf) +export(polyfun) +export(polygons) export(rel) export(rel_code_to_factor) export(scales) +export(sex) export(sex_to_factor) export(shrink) -export(trim) +export(spouse) +export(status) export(unrelated) +export(upd_famid_id) +export(useful) export(useful_inds) export(vect_to_binary) +exportClasses(Hints) +exportClasses(Ped) exportClasses(Pedigree) +exportClasses(Rel) +exportClasses(Scales) +exportMethods("[") +exportMethods("affected<-") +exportMethods("avail<-") +exportMethods("border<-") +exportMethods("dadid<-") +exportMethods("famid<-") +exportMethods("fill<-") +exportMethods("hints<-") +exportMethods("horder<-") +exportMethods("id<-") +exportMethods("isinf<-") +exportMethods("kin<-") +exportMethods("mcols<-") +exportMethods("momid<-") +exportMethods("ped<-") +exportMethods("rel<-") +exportMethods("scales<-") +exportMethods("sex<-") +exportMethods("spouse<-") +exportMethods("status<-") +exportMethods("useful<-") +exportMethods(Hints) +exportMethods(Ped) exportMethods(Pedigree) +exportMethods(Rel) +exportMethods(Scales) +exportMethods(affected) exportMethods(as.data.frame) exportMethods(as.list) +exportMethods(auto_hint) +exportMethods(avail) +exportMethods(border) +exportMethods(code) +exportMethods(dadid) +exportMethods(famid) exportMethods(family_check) +exportMethods(fill) +exportMethods(find_avail_affected) +exportMethods(find_avail_noninform) +exportMethods(find_unavailable) exportMethods(fix_parents) exportMethods(generate_colors) +exportMethods(hints) +exportMethods(horder) +exportMethods(id) +exportMethods(id1) +exportMethods(id2) exportMethods(is_informative) -exportMethods(kindepth) +exportMethods(is_parent) +exportMethods(isinf) +exportMethods(kin) exportMethods(kinship) exportMethods(length) -exportMethods(make_famid) +exportMethods(mcols) exportMethods(min_dist_inf) +exportMethods(momid) exportMethods(num_child) +exportMethods(ped) +exportMethods(ped_to_legdf) +exportMethods(ped_to_plotdf) exportMethods(plot) +exportMethods(rel) +exportMethods(scales) +exportMethods(sex) +exportMethods(show) +exportMethods(shrink) +exportMethods(spouse) +exportMethods(status) +exportMethods(subset) +exportMethods(summary) +exportMethods(unrelated) +exportMethods(upd_famid_id) +exportMethods(useful) exportMethods(useful_inds) importFrom(Matrix,bdiag) importFrom(Matrix,forceSymmetric) importFrom(Matrix,sparseMatrix) +importFrom(S4Vectors,'mcols') +importFrom(S4Vectors,'mcols<-') +importFrom(S4Vectors,as.data.frame) +importFrom(S4Vectors,as.list) +importFrom(S4Vectors,cbind_mcols_for_display) +importFrom(S4Vectors,classNameForDisplay) +importFrom(S4Vectors,makeClassinfoRowForCompactPrinting) +importFrom(S4Vectors,mcols) +importFrom(S4Vectors,parallel_slot_names) +importFrom(S4Vectors,subset) +importFrom(S4Vectors,summary) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,group_by) @@ -98,10 +217,13 @@ importFrom(graphics,segments) importFrom(graphics,strheight) importFrom(graphics,strwidth) importFrom(graphics,text) -importFrom(methods,"slot<-") +importFrom(methods,'slot<-') importFrom(methods,as) +importFrom(methods,callNextMethod) +importFrom(methods,is) importFrom(methods,new) -importFrom(methods,slot) +importFrom(methods,show) +importFrom(methods,slotNames) importFrom(methods,validObject) importFrom(plyr,rbind.fill) importFrom(plyr,revalue) @@ -116,3 +238,4 @@ importFrom(tidyr,pivot_longer) importFrom(tidyr,unite) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) +importFrom(testthat, test_that) diff --git a/R/AllAccessors.R b/R/AllAccessors.R new file mode 100644 index 00000000..3d43a638 --- /dev/null +++ b/R/AllAccessors.R @@ -0,0 +1,1241 @@ +#### S4 Accessors #### +#' @include AllConstructor.R +#' @include AllClass.R +#' @importFrom S4Vectors mcols +NULL + +#### S4 Ped Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Ped object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. For each getter, you have a setter with the +#' same name, to be use as `slot(x) <- value`. +#' The `value` parameter is a vector of the same length as `x`, except +#' for the `mcols()` accessors where `value` is a list or a data.frame with +#' each elements with the same length as `x`. + +##### Id Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("id", function(x) { + standardGeneric("id") +}) + +#' @section Accessors: +#' - `id(x)` : Individuals identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("id", signature(x = "Ped"), function(x) { + x@id +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("id<-", function(x, value) { + standardGeneric("id<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("id<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("id must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for id should be: ", + "equal to the length of the Ped object" + ) + } + x@id <- as.character(value) + validObject(x) + x + } +) + +##### Dadid Accessors ##### +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("dadid", function(x) { + standardGeneric("dadid") +}) + +#' @section Accessors: +#' - `dadid(x)` : Individuals' father identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("dadid", signature(x = "Ped"), function(x) { + x@dadid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("dadid<-", function(x, value) { + standardGeneric("dadid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("dadid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("dadid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for dadid should be: ", + "equal to the length of the Ped object" + ) + } + x@dadid <- as.character(value) + validObject(x) + x + } +) + +##### Momid Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("momid", function(x) { + standardGeneric("momid") +}) + +#' @section Accessors: +#' - `momid(x)` : Individuals' mother identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("momid", signature(x = "Ped"), function(x) { + x@momid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("momid<-", function(x, value) { + standardGeneric("momid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("momid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("momid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for momid should be: ", + "equal to the length of the Ped object" + ) + } + x@momid <- as.character(value) + validObject(x) + x + } +) + +##### Famid Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("famid", function(x) { + standardGeneric("famid") +}) + +#' @section Accessors: +#' - `famid(x)` : Individuals' family identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Ped"), function(x) { + x@famid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("famid<-", function(x, value) { + standardGeneric("famid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("famid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("famid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for famid should be: ", + "equal to the length of the Ped object" + ) + } + x@famid <- as.character(value) + x <- upd_famid_id(x) + validObject(x) + x + } +) + +##### Sex Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("sex", function(x) { + standardGeneric("sex") +}) + +#' @section Accessors: +#' - `sex(x)` : Individuals' gender +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("sex", signature(x = "Ped"), function(x) { + x@sex +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("sex<-", function(x, value) { + standardGeneric("sex<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("sex<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if ( + ! is.character(value) && + ! is.integer(value) && + ! is.factor(value) + ) { + stop("sex must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for sex should be: ", + "equal to the length of the Ped object" + ) + } + x@sex <- sex_to_factor(value) + validObject(x) + x + } +) + +##### Affected Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("affected", function(x) { + standardGeneric("affected") +}) + +#' @section Accessors: +#' - `affected(x)` : Individuals' affection status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("affected", signature(x = "Ped"), function(x) { + x@affected +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("affected<-", function(x, value) { + standardGeneric("affected<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("affected<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for affected should be: ", + "equal to the length of the Ped object" + ) + } + } + x@affected <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Avail Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("avail", function(x) { + standardGeneric("avail") +}) + +#' @section Accessors: +#' - `avail(x)` : Individuals' availability status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("avail", signature(x = "Ped"), function(x) { + x@avail +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("avail<-", function(x, value) { + standardGeneric("avail<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("avail<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for avail should be: ", + "equal to the length of the Ped object" + ) + } + } + x@avail <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Status Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("status", function(x) { + standardGeneric("status") +}) + +#' @section Accessors: +#' - `status(x)` : Individuals' death status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("status", signature(x = "Ped"), function(x) { + x@status +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("status<-", function(x, value) { + standardGeneric("status<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("status<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for avail should be: ", + "equal to the length of the Ped object" + ) + } + } + x@status <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Isinf Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("isinf", function(x) { + standardGeneric("isinf") +}) + +#' @section Accessors: +#' - `isinf(x)` : Individuals' informativeness status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("isinf", signature(x = "Ped"), function(x) { + x@isinf +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("isinf<-", function(x, value) { + standardGeneric("isinf<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("isinf<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for isinf should be: ", + "equal to the length of the Ped object" + ) + } + } + x@isinf <- value + validObject(x) + x + } +) + +##### Kin Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("kin", function(x) { + standardGeneric("kin") +}) + +#' @section Accessors: +#' - `kin(x)` : Individuals' kinship distance to the +#' informative individuals +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("kin", signature(x = "Ped"), function(x) { + x@kin +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("kin<-", function(x, value) { + standardGeneric("kin<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("kin<-", + signature(x = "Ped", value = "numeric"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for kin should be: ", + "equal to the length of the Ped object" + ) + } + } + x@kin <- value + validObject(x) + x + } +) + +##### Useful Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("useful", function(x) { + standardGeneric("useful") +}) + +#' @section Accessors: +#' - `useful(x)` : Individuals' usefullness status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("useful", signature(x = "Ped"), function(x) { + x@useful +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("useful<-", function(x, value) { + standardGeneric("useful<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("useful<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for useful should be: ", + "equal to the length of the Ped object" + ) + } + } + x@useful <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Mcols Accessors ##### + +#' @section Accessors: +#' - `mcols(x)` : Individuals' metadata +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("mcols<-", + signature(x = "Ped", value = "list"), + function(x, value) { + mcols(x) <- as(value, "DataFrame") + x + } +) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("mcols<-", + signature(x = "Ped", value = "data.frame"), + function(x, value) { + mcols(x) <- as(value, "DataFrame") + x + } +) + +#### S4 Rel Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Rel object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. + +##### Code Accessors ##### +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("code", function(x) { + standardGeneric("code") +}) + +#' @section Accessors: +#' - `code(x)` : Relationships' code +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("code", signature(x = "Rel"), function(x) { + x@code +}) + +##### Id1 Accessors ##### +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("id1", function(x) { + standardGeneric("id1") +}) + +#' @section Accessors: +#' - `id1(x)` : Relationships' first individuals' identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("id1", signature(x = "Rel"), function(x) { + x@id1 +}) + +##### Id2 Accessors ##### + +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("id2", function(x) { + standardGeneric("id2") +}) + +#' @section Accessors: +#' - `id2(x)` : Relationships' second individuals' identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("id2", signature(x = "Rel"), function(x) { + x@id2 +}) + +#' @section Accessors: +#' - `famid(x)` : Relationships' individuals' family identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Rel"), function(x) { + x@famid +}) + +#' @section Accessors: +#' - `famid(x) <- value` : Set the relationships' individuals' family +#' identifier +#' - `value` : A character or integer vector of the same length as x +#' with the family identifiers +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("famid<-", + signature(x = "Rel", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("famid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for famid should be: ", + "equal to the length of the Ped object" + ) + } + x@famid <- as.character(value) + x <- upd_famid_id(x) + validObject(x) + x + } +) + +#### S4 Pedigree Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Pedigree object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. + +#' @section Accessors: +#' - `famid(x)` : Get the family identifiers of a Pedigree object. This +#' function is a wrapper around `famid(ped(x))`. +#' +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Pedigree"), function(x) { + famid(ped(x)) +}) + +##### S4 ped Accessors ##### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("ped", function(object, slot) { + standardGeneric("ped") +}) + +#' @section Accessors: +#' - `ped(x, slot)` : Get the value of a specific slot of the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped", + signature(object = "Pedigree", slot = "ANY"), + function(object, slot) { + slot(object@ped, slot) + } +) + +#' @section Accessors: +#' - `ped(x)` : Get the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped", + signature(object = "Pedigree", slot = "missing"), + function(object) { + object@ped + } +) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("ped<-", function(object, slot, value) { + standardGeneric("ped<-") +}) + +#' @section Accessors: +#' - `ped(x, slot) <- value` : Set the value of a specific slot of +#' the Ped object +#' Wrapper of `slot(ped(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped<-", + signature(object = "Pedigree", slot = "ANY", value = "ANY"), + function(object, slot, value) { + ped_slots <- c( + "id", "dadid", "momid", "sex", "famid", + "steril", "status", "avail", "affected", + "kin", "useful", "isinf", + "num_child_tot", "num_child_dir", "num_child_ind" + ) + if (! slot %in% ped_slots) { + stop("slot selected: ", slot, " is not a Ped slot") + } + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + slot(object@ped, slot) <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `ped(x) <- value` : Set the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped<-", + signature(object = "Pedigree", slot = "missing", value = "Ped"), + function(object, slot, value) { + object@ped <- value + validObject(object) + object + } +) + +##### S4 mcols Accessors ##### + +#' @section Accessors: +#' - `mcols(x)` : Get the metadata of a Pedigree object. +#' This function is a wrapper around `mcols(ped(x))`. +#' @rdname Pedigree-class +#' @usage NULL +#' @importFrom S4Vectors 'mcols' +#' @export +setMethod( + "mcols", + signature(x = "Pedigree"), + function(x) { + mcols(x@ped) + } +) + +#' @section Accessors: +#' - `mcols(x) <- value` : Set the metadata of a Pedigree object. +#' This function is a wrapper around `mcols(ped(x)) <- value`. +#' @rdname Pedigree-class +#' @usage NULL +#' @importFrom S4Vectors 'mcols<-' +#' @export +setMethod( + "mcols<-", + signature(x = "Pedigree", value = "ANY"), + function(x, value) { + mcols(x@ped) <- value + x + } +) + +##### S4 rel Accessors ##### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("rel", function(object, slot) { + standardGeneric("rel") +}) + +#' @section Accessors: +#' - `rel(x, slot)` : Get the value of a specific slot of the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel", + signature(object = "Pedigree", slot = "ANY"), + function(object, slot) { + slot(object@rel, slot) + } +) + +#' @section Accessors: +#' - `rel(x)` : Get the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel", + signature(object = "Pedigree", slot = "missing"), + function(object, slot) { + object@rel + } +) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("rel<-", function(object, slot, value) { + standardGeneric("rel<-") +}) + +#' @section Accessors: +#' - `rel(x, slot) <- value` : Set the value of a specific slot of the +#' Rel object +#' Wrapper of `slot(rel(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel<-", + signature(object = "Pedigree", slot = "ANY", value = "ANY"), + function(object, slot, value) { + rel_cols <- c("id1", "id2", "code", "famid") + if (! slot %in% rel_cols) { + stop("slot selected: ", slot, " is not a relationship column") + } + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + slot(object@rel, slot) <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `rel(x) <- value` : Set the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel<-", + signature(object = "Pedigree", slot = "missing", value = "Rel"), + function(object, slot, value) { + object@rel <- value + validObject(object) + object + } +) + +##### S4 scales Accessors #### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("scales", function(object) { + standardGeneric("scales") +}) + +#' @section Accessors: +#' - `scales(x)` : Get the Scales object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("scales", signature(object = "Pedigree"), function(object) { + object@scales +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("scales<-", function(object, value) { + standardGeneric("scales<-") +}) + +#' @section Accessors: +#' - `scales(x) <- value` : Set the Scales object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "scales<-", signature(object = "Pedigree", value = "Scales"), + function(object, value) { + object@scales <- value + object + } +) + +#### S4 fill Accessors #### + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("fill", function(object) { + standardGeneric("fill") +}) + +#' @section Accessors: +#' - `fill(x)` : Get the fill data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod("fill", + signature(object = "Scales"), + function(object) { + object@fill + } +) + +#' @section Accessors: +#' - `fill(x)` : Get the fill data.frame from the Scales object. +#' Wrapper of `fill(scales(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("fill", + signature(object = "Pedigree"), + function(object) { + fill(scales(object)) + } +) + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("fill<-", function(object, value) { + standardGeneric("fill<-") +}) + +#' @section Accessors: +#' - `fill(x) <- value` : Set the fill data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod( + "fill<-", + signature(object = "Scales", value = "data.frame"), + function(object, value) { + object@fill <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `fill(x) <- value` : Set the fill data.frame from the Scales object. +#' Wrapper of `fill(scales(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "fill<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + fill(scales(object)) <- value + validObject(object) + object + } +) + +#### S4 border Accessors #### + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("border", function(object) { + standardGeneric("border") +}) + +#' @section Accessors: +#' - `border(x)` : Get the border data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod("border", + signature(object = "Scales"), + function(object) { + object@border + } +) + +#' @section Accessors: +#' - `border(x)` : Get the border data.frame from the Scales object. +#' Wrapper of `border(scales(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("border", + signature(object = "Pedigree"), + function(object) { + border(scales(object)) + } +) + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("border<-", function(object, value) { + standardGeneric("border<-") +}) + +#' @section Accessors: +#' - `border(x) <- value` : Set the border data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod( + "border<-", + signature(object = "Scales", value = "data.frame"), + function(object, value) { + object@border <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `border(x) <- value` : Set the border data.frame from the Scales object. +#' Wrapper of `border(scales(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "border<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + border(scales(object)) <- value + validObject(object) + object + } +) + +#### S4 hints Accessors #### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("hints", function(object) { + standardGeneric("hints") +}) + +#' @section Accessors: +#' - `hints(x)` : Get the Hints object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("hints", signature(object = "Pedigree"), function(object) { + object@hints +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("hints<-", function(object, value) { + standardGeneric("hints<-") +}) + +#' @section Accessors: +#' - `hints(x) <- value` : Set the Hints object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("hints<-", signature(object = "Pedigree", value = "Hints"), function( + object, value +) { + object@hints <- value + validObject(object) + object +}) + +#### S4 horder Accessors #### + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("horder", function(object) { + standardGeneric("horder") +}) + +#' @section Accessors: +#' - `horder(x)` : Get the horder vector +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("horder", "Hints", function(object) { + object@horder +}) + +#' @section Accessors: +#' - `horder(x)` : Get the horder vector from the Hints object. +#' Wrapper of `horder(hints(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("horder", "Pedigree", function(object) { + horder(hints(object)) +}) + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("horder<-", function(object, value) { + standardGeneric("horder<-") +}) + +#' @section Accessors: +#' - `horder(x) <- value` : Set the horder vector +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod( + "horder<-", + signature(object = "Hints", value = "ANY"), + function(object, value) { + if (length(value) > 0 && is.null(names(value))) { + stop("horder must be named") + } + object@horder <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `horder(x) <- value` : Set the horder vector from the Hints object. +#' Wrapper of `horder(hints(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "horder<-", + signature(object = "Pedigree", value = "ANY"), + function(object, value) { + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + horder(hints(object)) <- value + validObject(object) + object + } +) + +#### S4 spouse Accessors #### + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("spouse", function(object) { + standardGeneric("spouse") +}) + +#' @section Accessors: +#' - `spouse(x)` : Get the spouse data.frame +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("spouse", signature(object = "Hints"), function(object) { + object@spouse +}) + +#' @section Accessors: +#' - `spouse(x)` : Get the spouse data.frame from the Hints object. +#' Wrapper of `spouse(hints(x))`. +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("spouse", signature(object = "Pedigree"), function(object) { + spouse(hints(object)) +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("spouse<-", function(object, value) { + standardGeneric("spouse<-") +}) + +#' @section Accessors: +#' - `spouse(x) <- value` : Set the spouse data.frame +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod( + "spouse<-", + signature(object = "Hints", value = "data.frame"), + function(object, value) { + df <- check_columns(value, c("idl", "idr", "anchor")) + df$anchor <- anchor_to_factor(df$anchor) + object@spouse <- df + validObject(object) + object + } +) + +#' @section Accessors: +#' - `spouse(x) <- value` : Set the spouse data.frame from the Hints object. +#' Wrapper of `spouse(hints(x)) <- value`. +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "spouse<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + spouse(hints(object)) <- value + validObject(object) + object + } +) diff --git a/R/AllClass.R b/R/AllClass.R new file mode 100644 index 00000000..c79d2569 --- /dev/null +++ b/R/AllClass.R @@ -0,0 +1,253 @@ +#' @importFrom S4Vectors parallel_slot_names +NULL + +#### Class integer / character #### + +setClassUnion("character_OR_integer", c("character", "integer")) +setClassUnion("numeric_OR_logical", c("numeric", "logical")) +setClassUnion("missing_OR_NULL", c("missing", "NULL")) + +#### Ped Class #### + +#' Ped object +#' +#' S4 class to represent the identity informations of the individuals +#' in a pedigree. +#' +#' The minimal needed informations are `id`, `dadid`, `momid` and `sex`. +#' The other slots are used to store recognized informations. +#' Additional columns can be added to the Ped object and will be +#' stored in the `elementMetadata` slot of the Ped object. +#' +#' @slot id A character vector with the id of the individuals. +#' @slot dadid A character vector with the id of the father of the individuals. +#' @slot momid A character vector with the id of the mother of the individuals. +#' @slot sex An ordered factor vector for the sex of the individuals +#' (i.e. `male` < `female` < `unknown` < `terminated`). +#' @slot famid A character vector with the family identifiers of the +#' individuals (optional). +#' @slot steril A logical vector with the sterilisation status of the +#' individuals +#' (i.e. `FALSE` = not sterilised, `TRUE` = sterilised, `NA` = unknown). +#' @slot status A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = alive, `TRUE` = dead, `NA` = unknown). +#' @slot avail A logical vector with the availability status of the +#' individuals +#' (i.e. `FALSE` = not available, `TRUE` = available, `NA` = unknown). +#' @slot affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = not affected, `TRUE` = affected, `NA` = unknown). +#' @slot useful A logical vector with the usefulness status of the +#' individuals +#' (i.e. `FALSE` = not useful, `TRUE` = useful). +#' @slot isinf A logical vector indicating if the individual is informative +#' or not +#' (i.e. `FALSE` = not informative, `TRUE` = informative). +#' @slot kin A numeric vector with minimal kinship value between the +#' individuals and the useful individuals. +#' @slot num_child_tot A numeric vector with the total number of children +#' of the individuals. +#' @slot num_child_dir A numeric vector with the number of children +#' of the individuals. +#' @slot num_child_ind A numeric vector with the number of children +#' of the individuals. +#' @slot elementMetadata A DataFrame with the additional metadata columns +#' of the Ped object. +#' @slot metadata Meta informations about the pedigree. +#' +#' @seealso [Pedigree()] +#' @name Ped-class +#' @export +setClass("Ped", + contains = "Vector", + slots = c( + id = "character", + dadid = "character", + momid = "character", + sex = "factor", + famid = "character", + steril = "logical", + status = "logical", + avail = "logical", + affected = "logical", + useful = "logical", + kin = "numeric", + isinf = "logical", + num_child_tot = "numeric", + num_child_dir = "numeric", + num_child_ind = "numeric" + ) +) + +#' @importFrom S4Vectors parallel_slot_names +setMethod("parallel_slot_names", "Ped", + function(x) { + c( + "id", "dadid", "momid", "sex", "famid", + "steril", "status", "avail", "affected", + "useful", "kin", "isinf", + "num_child_tot", "num_child_dir", "num_child_ind", + callNextMethod() + ) + } +) + +setValidity("Ped", is_valid_ped) + +#### Rel Class #### + +#' Rel object +#' +#' S4 class to represent the special relationships in a Pedigree. +#' +#' A Rel object is a list of special relationships +#' between individuals in the pedigree. +#' It is used to create a Pedigree object. +#' The minimal needed informations are `id1`, `id2` and `code`. +#' +#' If a `famid` is provided, the individuals `id` will be aggregated +#' to the `famid` character to ensure the uniqueness of the `id`. +#' +#' @slot id1 A character vector with the id of the first individual. +#' @slot id2 A character vector with the id of the second individual. +#' @slot code An ordered factor vector with the code of the special +#' relationship. +#' +#' (i.e. `MZ twin` < `DZ twin` < `UZ twin` < `Spouse`). +#' @slot famid A character vector with the famid of the individuals. +#' +#' @seealso [Pedigree()] +#' @name Rel-class +#' @export +setClass("Rel", + contains = "Vector", + slots = c( + id1 = "character", + id2 = "character", + code = "factor", + famid = "character" + ) +) + +setMethod("parallel_slot_names", "Rel", + function(x) { + c( + "id1", "id2", "code", "famid", + callNextMethod() + ) + } +) + +setValidity("Rel", is_valid_rel) + +#### Hints Class #### + +#' Hints object +#' +#' The hints are used to specify the order of the individuals in the pedigree +#' and to specify the order of the spouses. +#' +#' @slot horder A numeric named vector with one element per subject in the +#' Pedigree. It determines the relative horizontal order of subjects within +#' a sibship, as well as the relative order of processing for the founder +#' couples. (For this latter, the female founders are ordered as though they +#' were sisters). +#' @slot spouse A data.frame with one row per hinted marriage, usually +#' only a few marriages in a Pedigree will need an added hint, for +#' instance reverse the plot order of a husband/wife pair. +#' Each row contains the identifiers of the left spouse, the right hand spouse, +#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). +#' +#' @seealso [Pedigree()] +#' @rdname Hints-class +#' @export +setClass("Hints", + representation( + horder = "numeric", + spouse = "data.frame" + ) +) + +setValidity("Hints", is_valid_hints) + +#### Scale Class #### + +#' Scales object +#' +#' A Scales object is a list of two data.frame. +#' The first one is used to represent the affection status of the individuals +#' and therefore the filling of the individuals in the pedigree plot. +#' The second one is used to represent the availability status of the +#' individuals and therefore the border color of the individuals in the +#' pedigree plot. +#' +#' @slot fill A data.frame with the informations for the affection status. +#' The columns needed are: +#' - 'order': the order of the affection to be used +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'affected': a logical value indicating if the mod correspond to an affected +#' individuals +#' - 'fill': the color to use for this mods +#' - 'density': the density of the shading +#' - 'angle': the angle of the shading +#' @slot border A data.frame with the informations for the availability status. +#' The columns needed are: +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'border': the color to use for this mods +#' +#' @seealso [Pedigree()] +#' @docType class +#' @rdname Scales-class +#' @export +setClass("Scales", + representation( + fill = "data.frame", + border = "data.frame" + ) +) + +setValidity("Scales", is_valid_scales) + +#### Pedigree Class #### +#' Pedigree object +#' +#' A pedigree is a ensemble of individuals linked to each other into +#' a family tree. +#' A Pedigree object store the informations of the individuals and the +#' special relationships between them. It also permit to store the +#' informations needed to plot the pedigree (i.e. scales and hints). +#' +#' @slot ped A Ped object for the identity informations. See [Ped()] for +#' more informations. +#' @slot rel A Rel object for the special relationships. See [Rel()] for +#' more informations. +#' @slot scales A Scales object for the filling and bordering +#' colors used in the plot. See [Scales()] for more informations. +#' @slot hints A Hints object for the ordering of the +#' individuals in the plot. See [Hints()] for more informations. +#' +#' @seealso [Pedigree()], [Ped()], [Rel()], [Scales()], [Hints()] +#' @rdname Pedigree-class +#' @include AllValidity.R +#' @export +setClass("Pedigree", + representation( + ped = "Ped", # identity data + rel = "Rel", # special relationships + scales = "Scales", # scales for the plot + hints = "Hints" # hints for the plot + ) +) + +setValidity("Pedigree", is_valid_pedigree) diff --git a/R/AllConstructor.R b/R/AllConstructor.R new file mode 100644 index 00000000..2f95ecb8 --- /dev/null +++ b/R/AllConstructor.R @@ -0,0 +1,889 @@ +#' @importFrom methods callNextMethod is new 'slot<-' validObject +NULL + +#' NA to specific length +#' +#' Check if all value in a vector is `NA`. +#' If so set all of them to a new value matching the length +#' of the template. +#' If not check that the size of the vector is equal to +#' the template. +#' +#' @param x The vector to check. +#' @param temp A template vector to use to determine the length. +#' @param value The value to use to fill the vector. +#' +#' @return A vector with the same length as temp. +#' @keywords internal +#' @examples +#' +#' na_to_length(NA, rep(0, 4), "NewValue") +#' na_to_length(c(1, 2, 3, NA), rep(0, 4), "NewValue") +#' @export +na_to_length <- function(x, temp, value) { + if (length(x) == 1 && all(is.na(x))) { + rep(value, length(temp)) + } else { + if (length(x) != length(temp)) { + stop("The length of the vector should be: ", + "equal to the length of the template vector" + ) + } + x + } +} + +#### S4 Ped constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide a vector of the same size for each slot +#' or a `data.frame` with the corresponding columns. +#' +#' The metadata will correspond to the columns that do not correspond +#' to the Ped slots. +#' +#' @param obj A character vector with the id of the individuals or a +#' `data.frame` with all the informations in corresponding columns. +#' @param dadid A vector containing for each subject, the identifiers of the +#' biologicals fathers. +#' @param momid A vector containing for each subject, the identifiers of the +#' biologicals mothers. +#' @param famid A character vector with the family identifiers of the +#' individuals. If provide, will be aggregated to the individuals +#' identifiers separated by an underscore. +#' @param sex A character, factor or numeric vector corresponding to +#' the gender of the individuals. This will be transformed to an ordered factor +#' with the following levels: `male` < `female` < `unknown` < `terminated +#' The following values are recognized: +#' - character() or factor() : "f", "m", "woman", "man", "male", "female", +#' "unknown", "terminated" +#' - numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +#' @param steril A logical vector with the sterilisation status of the +#' individuals +#' (i.e. `FALSE` = not sterilised, `TRUE` = sterilised, `NA` = unknown). +#' @param status A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = alive, `TRUE` = dead, `NA` = unknown). +#' @param avail A logical vector with the availability status of the +#' individuals +#' (i.e. `FALSE` = not available, `TRUE` = available, `NA` = unknown). +#' @param affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = unaffected, `TRUE` = affected, `NA` = unknown). +#' @param missid A character vector with the missing values identifiers. +#' All the id, dadid and momid corresponding to those values will be set +#' to `NA_character_`. +#' @inheritParams check_columns +#' @return A Ped object. +#' @rdname Ped-class +#' @export +#' @include utils.R +#' @usage NULL +setGeneric("Ped", signature = "obj", function(obj, ...) { + standardGeneric("Ped") +}) + +#' @rdname Ped-class +#' @examples +#' +#' data(sampleped) +#' Ped(sampleped) +#' @export +setMethod("Ped", "data.frame", + function(obj, cols_used_init = FALSE, cols_used_del = FALSE) { + col_need <- c("id", "sex", "dadid", "momid") + col_to_use <- c( + "famid", "steril", "status", "avail", "affected", + "kin", "isinf", "useful" + ) + col_used <- c( + "num_child_tot", "num_child_dir", "num_child_ind", + "elementMetadata" + ) + df <- check_columns( + obj, col_need, col_used, col_to_use, + others_cols = TRUE, cols_to_use_init = TRUE, + cols_used_init = cols_used_init, cols_used_del = cols_used_del + ) + + df$famid[is.na(df$famid)] <- NA_character_ + + df$steril <- vect_to_binary(df$steril, logical = TRUE) + df$status <- vect_to_binary(df$status, logical = TRUE) + df$avail <- vect_to_binary(df$avail, logical = TRUE) + df$affected <- vect_to_binary(df$affected, logical = TRUE) + myped <- with(df, Ped( + obj = id, sex = sex, dadid = dadid, momid = momid, + famid = famid, + steril = steril, status = status, avail = avail, + affected = affected + )) + mcols(myped) <- df[, + colnames(df)[!colnames(df) %in% slotNames(myped)] + ] + rownames(mcols(myped)) <- df$id + myped + } +) + +#' @rdname Ped-class +#' @examples +#' +#' Ped( +#' obj = c("1", "2", "3", "4", "5", "6"), +#' dadid = c("4", "4", "6", "0", "0", "0"), +#' momid = c("5", "5", "5", "0", "0", "0"), +#' sex = c(1, 2, 3, 1, 2, 1), +#' missid = "0" +#' ) +#' @export +setMethod("Ped", "character_OR_integer", + function( + obj, sex, dadid, momid, famid = NA, + steril = NA, status = NA, avail = NA, + affected = NA, missid = NA_character_ + ) { + famid <- na_to_length(famid, obj, NA_character_) + id <- as.character(obj) + dadid <- as.character(dadid) + momid <- as.character(momid) + + id[id %in% missid] <- NA_character_ + dadid[dadid %in% missid] <- NA_character_ + momid[momid %in% missid] <- NA_character_ + + sex <- sex_to_factor(sex) + + steril <- na_to_length(steril, id, NA) + status <- na_to_length(status, id, NA) + avail <- na_to_length(avail, id, NA) + affected <- na_to_length(affected, id, NA) + useful <- na_to_length(NA, id, NA) + isinf <- na_to_length(NA, id, NA) + kin <- na_to_length(NA, id, NA_real_) + + df_child <- num_child(id, dadid, momid, rel_df = NULL) + + new( + "Ped", + id = id, dadid = dadid, momid = momid, famid = famid, + sex = sex, steril = steril, status = status, avail = avail, + affected = affected, + useful = useful, kin = kin, isinf = isinf, + num_child_tot = df_child$num_child_tot, + num_child_dir = df_child$num_child_dir, + num_child_ind = df_child$num_child_ind + ) + } +) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("Ped", "missing", + function(obj) { + new("Ped") + } +) + +#### S4 Rel constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide a vector of the same size for each slot +#' or a `data.frame` with the corresponding columns. +#' +#' @param obj A character vector with the id of the first individuals of each +#' pairs or a `data.frame` with all the informations in corresponding columns. +#' @param id2 A character vector with the id of the second individuals of each +#' pairs +#' @param code A character, factor or numeric vector corresponding to +#' the relation code of the individuals: +#' - MZ twin = Monozygotic twin +#' - DZ twin = Dizygotic twin +#' - UZ twin = twin of unknown zygosity +#' - Spouse = Spouse +#' The following values are recognized: +#' - character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with +#' of without space between the words. The case is not important. +#' - numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +#' @inheritParams Ped +#' +#' @return A Rel object. +#' @rdname Rel-class +#' @export +#' @usage NULL +setGeneric("Rel", signature = "obj", function(obj, ...) { + standardGeneric("Rel") +}) + +#' @rdname Rel-class +#' @export +#' @examples +#' +#' rel_df <- data.frame( +#' id1 = c("1", "2", "3"), +#' id2 = c("2", "3", "4"), +#' code = c(1, 2, 3) +#' ) +#' Rel(rel_df) +setMethod("Rel", "data.frame", + function(obj) { + col_need <- c("id1", "id2", "code") + col_to_use <- c("famid") + df <- check_columns( + obj, col_need, NULL, col_to_use, + cols_to_use_init = TRUE + ) + + with(df, Rel( + obj = id1, id2 = id2, code = code, famid = as.character(famid) + )) + } +) + +#' @rdname Rel-class +#' @export +#' @examples +#' +#' Rel( +#' obj = c("1", "2", "3"), +#' id2 = c("2", "3", "4"), +#' code = c(1, 2, 3) +#' ) +setMethod("Rel", "character_OR_integer", + function( + obj, id2, code, famid = NA_character_ + ) { + famid <- na_to_length(famid, obj, NA_character_) + id1 <- as.character(obj) + id2 <- as.character(id2) + + ## Reorder id1 and id2 + ## id1 is the first in the alphabetic order + ## id2 is the second in the alphabetic order + id1o <- pmin(id1, id2) + id2o <- pmax(id1, id2) + + code <- rel_code_to_factor(code) + + rel <- new( + "Rel", + id1 = id1o, id2 = id2o, code = code, famid = famid + ) + rel + } +) + +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("Rel", "missing", + function(obj) { + new("Rel") + } +) + +#### S4 Hints constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide **horder** or **spouse** in +#' the dedicated parameters (together or separately), or inside a list. +#' +#' @param horder A named numeric vector with one element per subject in the +#' Pedigree. It determines the relative horizontal order of subjects within a +#' sibship, as well as the relative order of processing for the founder couples. +#' (For this latter, the female founders are ordered as though +#' they were sisters). +#' The names of the vector should be the individual identifiers. +#' @param spouse A data.frame with one row per hinted marriage, usually only +#' a few marriages in a pedigree will need an added hint, for instance reverse +#' the plot order of a husband/wife pair. +#' Each row contains the id of the left spouse (i.e. `idl`), the id of the +#' right hand spouse (i.e. `idr`), and the anchor (i.e : `anchor` : +#' `1` = left, `2` = right, `0` = either). +#' Children will preferentially appear under the parents of the anchored spouse. +#' +#' @return A Hints object. +#' @rdname Hints-class +#' @export +setGeneric("Hints", function(horder, spouse) { + standardGeneric("Hints") +}) + +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("Hints", + signature(horder = "Hints", spouse = "missing_OR_NULL"), + function(horder, spouse) { + hints + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' list( +#' horder = c("1" = 1, "2" = 2, "3" = 3), +#' spouse = data.frame( +#' idl = c("1", "2"), +#' idr = c("2", "3"), +#' anchor = c(1, 2) +#' ) +#' ) +#' ) +setMethod("Hints", + signature(horder = "list", spouse = "missing_OR_NULL"), + function(horder, spouse) { + if (all(!c("horder", "spouse") %in% names(horder))) { + stop("hints is a list, ", + "but doesn't contains horder or spouse slot" + ) + } + if ("horder" %in% names(horder)) { + horder <- horder$horder + } else { + horder <- NULL + } + if ("spouse" %in% names(horder)) { + spouse <- horder$spouse + } else { + spouse <- NULL + } + Hints(horder, spouse) + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' horder = c("1" = 1, "2" = 2, "3" = 3), +#' spouse = data.frame( +#' idl = c("1", "2"), +#' idr = c("2", "3"), +#' anchor = c(1, 2) +#' ) +#' ) +setMethod("Hints", + signature(horder = "numeric", spouse = "data.frame"), + function(horder, spouse) { + if (length(horder) > 0 && ( + is.null(names(horder)) || any(!is.numeric(horder)) + )) { + stop("horder must be a named numeric vector") + } + spouse <- check_columns( + spouse, c("idl", "idr", "anchor"), NULL, NULL, + cols_to_use_init = TRUE + ) + spouse$anchor <- anchor_to_factor(spouse$anchor) + new("Hints", horder = horder, spouse = spouse) + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' horder = c("1" = 1, "2" = 2, "3" = 3) +#' ) +setMethod("Hints", + signature(horder = "numeric", spouse = "missing_OR_NULL"), + function(horder, spouse) { + if (length(horder) > 0 && ( + is.null(names(horder)) || any(!is.numeric(horder)) + )) { + stop("horder must be a named numeric vector") + } + dfe <- data.frame("idl" = character(), "idr" = character(), + "anchor" = factor() + ) + new("Hints", horder = horder, spouse = dfe) + } +) + +#' @rdname Hints-class +#' @export +#' @usage NULL +setMethod("Hints", + signature(horder = "missing_OR_NULL", spouse = "missing_OR_NULL"), + function(horder, spouse) { + dfe <- data.frame("idl" = character(), "idr" = character(), + "anchor" = factor() + ) + new("Hints", horder = numeric(), spouse = dfe) + } +) + +#### S4 Scales constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You need to provide both **fill** and **border** in the dedicated parameters. +#' However this is usually done using the [generate_colors()] function with a +#' Pedigree object. +#' +#' @param fill A data.frame with the informations for the affection status. +#' The columns needed are: +#' - 'order': the order of the affection to be used +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'affected': a logical value indicating if the mod correspond to an affected +#' individuals +#' - 'fill': the color to use for this mods +#' - 'density': the density of the shading +#' - 'angle': the angle of the shading +#' @param border A data.frame with the informations for the availability status. +#' The columns needed are: +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'border': the color to use for this mods +#' +#' @return A Scales object. +#' @seealso [generate_colors()] +#' @rdname Scales-class +#' @export +setGeneric("Scales", function(fill, border) { + standardGeneric("Scales") +}) + +#' @rdname Scales-class +#' @export +#' @examples +#' +#' Scales( +#' fill = data.frame( +#' order = 1, +#' column_values = "affected", +#' column_mods = "affected_mods", +#' mods = c(0, 1), +#' labels = c("unaffected", "affected"), +#' affected = c(FALSE, TRUE), +#' fill = c("white", "red"), +#' density = c(NA, 20), +#' angle = c(NA, 45) +#' ), +#' border = data.frame( +#' column_values = "avail", +#' column_mods = "avail_mods", +#' mods = c(0, 1), +#' labels = c("not available", "available"), +#' border = c("black", "blue") +#' ) +#' ) +setMethod("Scales", + signature(fill = "data.frame", border = "data.frame"), + function(fill, border) { + fill <- check_columns( + fill, c( + "order", "column_values", "column_mods", "mods", + "labels", "affected", "fill", "density", "angle" + ), NULL, NULL + ) + border <- check_columns( + border, + c("column_values", "column_mods", "mods", "labels", "border"), + NULL, NULL + ) + new("Scales", fill = fill, border = border) + } +) + +#' @rdname Scales-class +#' @export +#' @usage NULL +setMethod("Scales", + signature(fill = "missing", border = "missing"), + function(fill, border) { + fill <- data.frame( + order = numeric(), + column_values = character(), + column_mods = character(), + mods = numeric(), + labels = character(), + affected = logical(), + fill = character(), + density = numeric(), + angle = numeric() + ) + border <- data.frame( + column_values = character(), + column_mods = character(), + mods = numeric(), + labels = character(), + border = character() + ) + new("Scales", fill = fill, border = border) + } +) + +#### S4 Pedigree constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' Main constructor of the package. +#' This constructor help to create a `Pedigree` object from +#' different `data.frame` or a set of vectors. +#' +#' If any errors are found in the data, the function will return +#' the data.frame with the errors of the Ped object and the +#' Rel object. +#' +#' @details +#' If the normalization is set to `TRUE`, then the data will be +#' standardized using the function `norm_ped()` and `norm_rel()`. +#' +#' If a data.frame is given, the columns names needed will depend if +#' the normalization is selected or not. If the normalization is selected, +#' the columns names needed are as follow and if not the columns names +#' needed are in parenthesis: +#' - `indID`: the individual identifier (`id`) +#' - `fatherId`: the identifier of the biological father (`dadid`) +#' - `motherId`: the identifier of the biological mother (`momid`) +#' - `gender`: the sex of the individual (`sex`) +#' - `family`: the family identifier of the individual (`famid`) +#' - `sterilisation`: the sterilisation status of the individual (`steril`) +#' - `available`: the availability status of the individual (`avail`) +#' - `vitalStatus`: the death status of the individual (`status`) +#' - `affection`: the affection status of the individual (`affected`) +#' - `...`: other columns that will be stored in the `elementMetadata` slot +#' +#' The minimum columns required are : +#' - `indID` / `id` +#' - `fatherId` / `dadid` +#' - `motherId` / `momid` +#' - `gender` / `sex` +#' +#' The `family` / `famid` column can also be used to specify the family of the +#' individuals and will be merge to the `indId` / `id` field separated by an +#' underscore. +#' The columns `sterilisation`, `available`, `vitalStatus`, `affection` +#' will be transformed with the [vect_to_binary()] function when the +#' normalisation is selected. +#' If you do not use the normalisation, the columns will be checked to +#' be `0` or `1`. +#' +#' @param obj A vector of the individuals identifiers or a data.frame +#' with the individuals informations. See [Ped()] for more informations. +#' @param rel_df A data.frame with the special relationships between +#' individuals. See [Rel()] for more informations. +#' The minimum columns required are `id1`, `id2` and `code`. +#' The `famid` column can also be used to specify the family +#' of the individuals. +#' If a matrix is given, the columns needs to be ordered as +#' `id1`, `id2`, `code` and `famid`. +#' The code values are: +#' - `1` = Monozygotic twin +#' - `2` = Dizygotic twin +#' - `3` = twin of unknown zygosity +#' - `4` = Spouse +#' +#' The value relation code recognized by the function are the one defined +#' by the [rel_code_to_factor()] function. +#' +#' @param hints A Hints object or a named list containing `horder` and +#' `spouse`. +#' @param cols_ren_ped A named list with the columns to rename for the +#' pedigree dataframe. This is useful if you want to use a dataframe with +#' different column names. The names of the list should be the new column +#' names and the values should be the old column names. The default values +#' are to be used with `normalize = TRUE`. +#' @param cols_ren_rel A named list with the columns to rename for the +#' relationship matrix. This is useful if you want to use a dataframe with +#' different column names. The names of the list should be the new column +#' names and the values should be the old column names. +#' @param normalize A logical to know if the data should be normalised. +#' @inheritDotParams generate_colors +#' @inheritParams Ped +#' @inheritParams is_informative +#' +#' @return A Pedigree object. +#' @export +#' @rdname Pedigree-class +#' @seealso [Ped()], [Rel()], [Scales()] +setGeneric("Pedigree", signature = "obj", + function(obj, ...) standardGeneric("Pedigree") +) + +#' @export +#' @rdname Pedigree-class +#' @param affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = unaffected, `TRUE` = affected, `NA` = unknown). +#' Can also be a data.frame with the same length as `obj`. If it is a +#' matrix, it will be converted to a data.frame and the columns will be +#' named after the `col_aff` argument. +#' @details +#' If `affected` is a data.frame, **col_aff** will be overwritten by the column +#' names of the data.frame. +#' @inheritParams generate_colors +#' @examples +#' +#' Pedigree( +#' obj = c("1", "2", "3", "4", "5", "6"), +#' dadid = c("4", "4", "6", "0", "0", "0"), +#' momid = c("5", "5", "5", "0", "0", "0"), +#' sex = c(1, 2, 3, 1, 2, 1), +#' avail = c(0, 1, 0, 1, 0, 1), +#' affected = matrix(c( +#' 0, 1, 0, 1, 0, 1, +#' 1, 1, 1, 1, 1, 1 +#' ), ncol = 2), +#' col_aff = c("aff1", "aff2"), +#' missid = "0", +#' rel_df = matrix(c( +#' "1", "2", 2 +#' ), ncol = 3, byrow = TRUE), +#' ) +setMethod("Pedigree", "character_OR_integer", function(obj, dadid, momid, + sex, famid = NA, avail = NULL, affected = NULL, status = NULL, + steril = NULL, rel_df = NULL, + missid = NA_character_, col_aff = "affection", normalize = TRUE, ... +) { + n <- length(obj) + ## Code transferred from noweb to markdown vignette. + ## Sections from the noweb/vignettes are noted here with + ## Doc: Error and Data Checks + ## Doc: Errors1 + if (length(momid) != n) stop("Mismatched lengths, id and momid") + if (length(dadid) != n) stop("Mismatched lengths, id and momid") + if (length(sex) != n) stop("Mismatched lengths, id and sex") + if (length(steril) != n & !is.null(steril)) { + stop("Mismatched lengths, id and steril") + } + + if (length(avail) != n & !is.null(avail)) { + stop("Mismatched lengths, id and avail") + } + if (length(status) != n & !is.null(status)) { + stop("Mismatched lengths, id and status") + } + + ped_df <- data.frame( + family = famid, + indId = obj, + fatherId = dadid, + motherId = momid, + gender = sex + ) + + if (is.null(affected)) { + ped_df[col_aff] <- NA + } else if (any(!is.na(affected))) { + if (is.vector(affected)) { + ped_df[col_aff] <- affected + } else if (is.data.frame(affected)) { + ped_df <- cbind(ped_df, affected) + col_aff <- colnames(affected) + } else if (is.matrix(affected)) { + affected <- as.data.frame(affected) + if (is.null(colnames(affected))) { + if (length(col_aff) != ncol(affected)) { + stop("The length of col_aff should be equal to the number", + "of columns of affected" + ) + } + colnames(affected) <- col_aff + } + ped_df <- cbind(ped_df, affected) + col_aff <- colnames(affected) + } else { + stop("Affected must be a vector or a data.frame, got:", + class(affected) + ) + } + } + if (any(!is.na(avail))) { + ped_df$available <- avail + } + if (any(!is.na(status))) { + ped_df$vitalStatus <- status + } + if (any(!is.na(steril))) { + ped_df$sterilisation <- steril + } + if (is.null(rel_df)) { + rel_df <- data.frame( + id1 = character(), + id2 = character(), + code = numeric(), + family = character() + ) + } + Pedigree(ped_df, rel_df = rel_df, + missid = missid, col_aff = col_aff, ... + ) +}) + +#' @export +#' @rdname Pedigree-class +#' @examples +#' +#' data(sampleped) +#' Pedigree(sampleped) +setMethod("Pedigree", "data.frame", function( + obj = data.frame( + indId = character(), + fatherId = character(), + motherId = character(), + gender = numeric(), + family = character(), + available = numeric(), + vitalStatus = numeric(), + affection = numeric(), + sterilisation = numeric() + ), + rel_df = data.frame( + id1 = character(), + id2 = character(), + code = numeric(), + famid = character() + ), + cols_ren_ped = list( + indId = "id", + fatherId = "dadid", + motherId = "momid", + family = "famid", + gender = "sex", + sterilisation = "steril", + affection = "affected", + available = "avail", + vitalStatus = "status" + ), + cols_ren_rel = list( + id1 = "indId1", + id2 = "indId2", + famid = "family" + ), + hints = list( + horder = NULL, + spouse = NULL + ), + normalize = TRUE, + missid = NA_character_, + col_aff = "affection", + ... +) { + ped_df <- obj + if (!is.data.frame(ped_df)) { + stop("ped_df must be a data.frame") + } + + if (is.matrix(rel_df)) { + rel_mat <- rel_df + rel_df <- data.frame( + id1 = rel_mat[, 1], + id2 = rel_mat[, 2], + code = rel_mat[, 3] + ) + if (dim(rel_mat)[2] > 3) { + rel_df$family <- rel_mat[, 4] + } + } + + if (!is.data.frame(rel_df)) { + stop("relation must be a matrix or a data.frame") + } + + ## Rename columns ped + old_cols <- as.vector(unlist(cols_ren_ped)) + new_cols <- names(cols_ren_ped) + cols_to_ren <- match(old_cols, names(ped_df)) + names(ped_df)[cols_to_ren[!is.na(cols_to_ren)]] <- + new_cols[!is.na(cols_to_ren)] + + ## Rename columns rel + old_cols <- as.vector(unlist(cols_ren_rel)) + new_cols <- names(cols_ren_rel) + cols_to_ren <- match(old_cols, names(rel_df)) + names(rel_df)[cols_to_ren[!is.na(cols_to_ren)]] <- + new_cols[!is.na(cols_to_ren)] + + ## Set family, id, dadid and momid to character + to_char <- c("family", "indId", "fatherId", "motherId") + to_char <- colnames(ped_df)[colnames(ped_df) %in% to_char] + ped_df[to_char] <- lapply(ped_df[to_char], as.character) + + ## Normalise the data before creating the object + if (normalize) { + ped_df <- norm_ped(ped_df, missid = missid) + rel_df <- norm_rel(rel_df, missid = missid) + } else { + cols_need <- c("id", "dadid", "momid", "sex") + cols_to_use <- c("steril", "avail", "famid", "status", "affected") + ped_df <- check_columns( + ped_df, cols_need, "", cols_to_use, + others_cols = TRUE, cols_to_use_init = TRUE + ) + cols_need <- c("id1", "id2", "code") + cols_to_use <- c("famid") + rel_df <- check_columns( + rel_df, cols_need, "", cols_to_use, cols_to_use_init = TRUE + ) + } + if (any(!is.na(ped_df$error))) { + warning("The Pedigree informations are not valid. ", + "Here is the normalised Pedigree informations ", + "with the identified problems" + ) + return(ped_df) + } + + if (any(!is.na(rel_df$error))) { + warning("The relationship informations are not valid. ", + "Here is the normalised relationship informations ", + "with the identified problems" + ) + return(rel_df) + } + + ped <- Ped(ped_df) + rel <- Rel(rel_df) + hints <- Hints(hints) + scales <- Scales() + + ## Create the object + ped <- new("Pedigree", + ped = ped, rel = rel, + hints = hints, scales = scales + ) + generate_colors(ped, col_aff = col_aff, ...) +} +) + +#' @export +#' @rdname Pedigree-class +#' @usage NULL +setMethod("Pedigree", "missing", function(obj) { + ped <- new("Pedigree", + ped = Ped(), rel = Rel(), + hints = Hints(), scales = Scales() + ) + ped +}) diff --git a/R/AllGeneric.R b/R/AllGeneric.R new file mode 100644 index 00000000..1d8c3bd1 --- /dev/null +++ b/R/AllGeneric.R @@ -0,0 +1,402 @@ +#### S4 Ped generics #### + +#' @section Generics: +#' - `summary(x)`: Compute the summary of a Ped object +#' @export +#' @importFrom S4Vectors classNameForDisplay +#' @importFrom S4Vectors summary +#' @rdname Ped-class +#' @usage NULL +setMethod("summary", "Ped", + function(object) { + object_class <- classNameForDisplay(object) + object_len <- length(object) + object_mcols <- mcols(object, use.names = FALSE) + object_nmc <- if (is.null(object_mcols)) 0L else ncol(object_mcols) + paste0(object_class, " object with ", object_len, " ", + ifelse(object_len == 1L, "individual", "individuals"), + " and ", object_nmc, " metadata ", + ifelse(object_nmc == 1L, "column", "columns") + ) + } +) + +#' @section Generics: +#' - `show(x)`: Convert the Ped object to a data.frame +#' and print it with its summary. +#' @export +#' @importFrom S4Vectors cbind_mcols_for_display +#' @importFrom methods show +#' @importFrom S4Vectors makeClassinfoRowForCompactPrinting +#' @rdname Ped-class +#' @usage NULL +setMethod("show", "Ped", + function(object) { + cat(summary(object), ":\n", sep = "") + df <- as.data.frame(object) + df <- df[, !colnames(df) %in% colnames(mcols(object))] + out <- S4Vectors::cbind_mcols_for_display(df, object) + class_df <- lapply(df, class) + classinfo <- S4Vectors::makeClassinfoRowForCompactPrinting( + object, class_df + ) + stopifnot(identical(colnames(classinfo), colnames(out))) + out <- rbind(classinfo, out) + print(out, quote = FALSE, right = TRUE) + } +) + +#' @section Generics: +#' - `as.list(x)`: Convert a Ped object to a list with +#' the metadata columns at the end. +#' @rdname Ped-class +#' @importFrom methods slotNames +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Ped", function(x) { + to <- list() + for (slot in slotNames(x)) { + if (slot %in% c("metadata", "elementMetadata")) { + next + } else { + to[[slot]] <- slot(x, slot) + } + } + # Add the metadata in separate slot + c(to, as.list(mcols(x))) +}) + +#' @section Generics: +#' - `as.data.frame(x)`: Convert a Ped object to a data.frame with +#' the metadata columns at the end. +#' @rdname Ped-class +#' @importFrom S4Vectors as.data.frame +#' @export +#' @usage NULL +setMethod("as.data.frame", "Ped", function(x) { + lst <- as.list(x) + if (length(unique(lapply(lst, length))) != 1) { + stop("All slots should have the same length") + } + ped_df <- data.frame(lst) + rownames(ped_df) <- ped_df$id + ped_df +}) + +#' @section Generics: +#' - `subset(x, i, del_parents = FALSE, keep = TRUE)`: Subset a Ped object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `del_parents` : A logical value indicating if the parents +#' of the individuals should be deleted. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Ped-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Ped", function(x, i, del_parents = FALSE, keep = TRUE) { + if (is.factor(i)) { + i <- as.character(i) + } + if (is.character(i)) { + i <- x@id %in% i + } else if (!is.numeric(i) & !is.logical(i)) { + stop("i must be a character, an integer or a logical vector") + } + if (!keep) { + i <- !i + } + col_computed <- c( + "num_child_tot", "num_child_dir", "num_child_ind" + ) + ped_df <- as.data.frame(x)[i, ] + ped_df <- ped_df[, ! colnames(ped_df) %in% col_computed] + + if (del_parents) { + ped_df$dadid[!ped_df$dadid %in% ped_df$id] <- NA_character_ + ped_df$momid[!ped_df$momid %in% ped_df$id] <- NA_character_ + } + new_ped <- Ped(ped_df) + validObject(new_ped) + new_ped +}) + +#### S4 Rel generics #### + +#' @section Generics: +#' - `summary(x)`: Compute the summary of a Rel object +#' @export +#' @importFrom S4Vectors classNameForDisplay +#' @importFrom S4Vectors summary +#' @rdname Rel-class +#' @usage NULL +setMethod("summary", "Rel", + function(object) { + object_class <- classNameForDisplay(object) + object_len <- length(object) + tbl <- table(object@code) + if (length(tbl) == 0L) { + sum_codes <- 0L + } else { + sum_codes <- paste0( + paste(tbl, levels(object@code)), collapse = ", " + ) + } + paste0(object_class, " object with ", object_len, " ", + ifelse(object_len == 1L, "relationship", "relationships"), + ifelse(sum_codes == 0L, "", paste0("with ", sum_codes)) + ) + } +) + +#' @section Generics: +#' - `show(x)`: Convert the Rel object to a data.frame +#' and print it with its summary. +#' @export +#' @importFrom S4Vectors cbind_mcols_for_display +#' @importFrom S4Vectors makeClassinfoRowForCompactPrinting +#' @importFrom methods show +#' @rdname Rel-class +#' @usage NULL +setMethod("show", signature(object = "Rel"), + function(object) { + cat(summary(object), ":\n", sep = "") + df <- as.data.frame(object) + df <- df[, !colnames(df) %in% colnames(mcols(object))] + out <- S4Vectors::cbind_mcols_for_display(df, object) + class_df <- lapply(df, class) + classinfo <- S4Vectors::makeClassinfoRowForCompactPrinting( + object, class_df + ) + stopifnot(identical(colnames(classinfo), colnames(out))) + out <- rbind(classinfo, out) + print(out, quote = FALSE, right = TRUE) + } +) + +#' @section Generics: +#' - `as.list(x)`: Convert a Rel object to a list +#' @rdname Rel-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Rel", function(x) { + to <- list() + for (slot in slotNames(x)) { + if (slot %in% c("metadata", "elementMetadata")) { + next + } else { + to[[slot]] <- slot(x, slot) + } + } + # Add the metadata in separate slot + c(to, as.list(mcols(x))) +}) + +#' @section Generics: +#' - `as.data.frame(x)`: Convert a Rel object to a data.frame +#' @rdname Rel-class +#' @importFrom S4Vectors as.data.frame +#' @export +#' @usage NULL +setMethod("as.data.frame", "Rel", function(x) { + lst <- as.list(x) + if (length(unique(lapply(lst, length))) != 1) { + stop("All slots should have the same length") + } + data.frame(lst) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Rel object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Rel-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Rel", function(x, idlist, keep = TRUE) { + if (is.factor(idlist)) { + idlist <- as.character(idlist) + } + if (! is.character(idlist)) { + stop("idlist must be a character") + } + + if (! keep) { + id_all <- c(id1(x), id2(x)) + idlist <- setdiff(id_all, idlist) + } + + rel_df <- as.data.frame(x) + + id1 <- rel_df$id1 %in% idlist + id2 <- rel_df$id2 %in% idlist + rel_df <- rel_df[id1 & id2, ] + new_rel <- Rel(rel_df) + validObject(new_rel) + new_rel +}) + +#### S4 Hints generics #### + +#' @section Generics: +#' - `as.list(x)`: Convert a Hints object to a list +#' @rdname Hints-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Hints", function(x) { + list(horder = x@horder, spouse = x@spouse) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Hints object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Hints-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Hints", function(x, idlist, keep = TRUE) { + horder <- horder(x) + spouse <- spouse(x) + + if (is.factor(idlist)) { + idlist <- as.character(idlist) + } + if (! is.character(idlist)) { + stop("idlist must be a character") + } + + if (length(horder) > 0) { + if (! keep) { + idlist <- setdiff(names(horder), idlist) + } + horder <- horder[names(horder) %in% idlist] + } + + if (nrow(spouse) > 0) { + if (! keep) { + id_all <- c(spouse$idl, spouse$idr) + idlist <- setdiff(id_all, idlist) + } + spouse <- spouse[spouse$idl %in% idlist & spouse$idr %in% idlist, ] + } + new_hints <- Hints(horder = horder, spouse = spouse) + validObject(new_hints) + new_hints +}) + +#### S4 Scales generics #### + +#' @section Generics: +#' - `as.list(x)`: Convert a Scales object to a list +#' @rdname Scales-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Scales", function(x) { + list(fill = x@fill, border = x@border) +}) + +#### S4 Pedigree generics #### + +#' @section Generics: +#' - `length(x)`: Get the length of a Pedigree object. +#' Wrapper of `length(ped(x))`. +#' @rdname Pedigree-class +#' @export +#' @usage NULL +setMethod("length", c(x = "Pedigree"), + function(x) { + length(ped(x)) + } +) + +#' @section Generics: +#' - `show(x)`: Print the information of the Ped and Rel +#' object inside the Pedigree object. +#' @export +#' @importFrom methods show +#' @rdname Pedigree-class +#' @usage NULL +setMethod("show", signature(object = "Pedigree"), function(object) { + cat("Pedigree object with: \n") + print(ped(object)) + print(rel(object)) +}) + +#' @section Generics: +#' - `summary(x)`: Compute the summary of the Ped and Rel object +#' inside the Pedigree object. +#' @export +#' @rdname Pedigree-class +#' @usage NULL +setMethod("summary", signature(object = "Pedigree"), function(object) { + cat("Pedigree object with \n") + print(summary(ped(object))) + print(summary(rel(object))) +}) + +#' @section Generics: +#' - `as.list(x)`: Convert a Pedigree object to a list +#' @rdname Pedigree-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Pedigree", function(x) { + list( + ped = as.list(ped(x)), + rel = as.list(rel(x)), + scales = as.list(scales(x)), + hints = as.list(hints(x)) + ) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Pedigree object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `del_parents` : A logical value indicating if the parents +#' of the individuals should be deleted. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Pedigree-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Pedigree", + function(x, i, del_parents = FALSE, keep = TRUE) { + new_ped <- subset(ped(x), i, del_parents = del_parents, keep = keep) + all_id <- id(new_ped) + new_rel <- subset(rel(x), all_id) + new_hints <- subset(hints(x), all_id) + + new_pedi <- new("Pedigree", + ped = new_ped, rel = new_rel, + hints = new_hints, scales = scales(x) + ) + validObject(new_pedi) + new_pedi + } +) + +#' @section Generics: +#' - `x[i, del_parents, keep]`: Subset a Pedigree object +#' based on the individuals identifiers given. +#' @rdname Pedigree-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("[", c(x = "Pedigree", i = "ANY", j = "missing"), + function(x, i, j, del_parents = FALSE, keep = TRUE, drop = TRUE) { + subset(x, i, del_parents, keep) + } +) diff --git a/R/AllValidity.R b/R/AllValidity.R new file mode 100644 index 00000000..ce6b27fe --- /dev/null +++ b/R/AllValidity.R @@ -0,0 +1,577 @@ +#' Print0 to max +#' +#' Print0 the elements inside a vector until a maximum is reached. +#' +#' @param x A vector. +#' @param max The maximum number of elements to print. +#' @param ... Additional arguments passed to print0 +#' +#' @return The character vector aggregated until the maximum is reached. +#' @keywords internal +paste0max <- function(x, max = 5, sep = "", ...) { + lgt <- min(length(x), max) + ext <- ifelse(length(x) > max, "...", "") + paste( + "'", paste0(x[seq_len(lgt)], collapse = "', '"), + "'", ext, sep = sep, ... + ) +} + +#' Check if the fields are present in an object slot +#' +#' @param obj An object. +#' @param slot A slot of object. +#' @param fields A character vector with the fields to check. +#' +#' @return A character vector with the errors if any. +#' @keywords internal +check_slot_fd <- function(obj, slot = NULL, fields = character()) { + if (is.object(obj)) { + obj <- as.list(obj) + } + if (is.null(slot)) { + array_names <- names(obj) + } else if (is.data.frame(obj[[slot]])) { + array_names <- colnames(obj[[slot]]) + } else if (is.list(obj[[slot]])) { + array_names <- names(obj[[slot]]) + } else { + stop( + "Slot ", slot, " is not a data.frame or a list. ", + class(obj[[slot]]), " found." + ) + } + if (length(array_names) == 0) { + paste0( + "No fields in ", slot, + " slot. See Pedigree documentation." + ) + } else if (any(!fields %in% array_names)) { + paste0( + paste0max(fields[!fields %in% array_names]), + " column(s) is not present in slot ", slot, "." + ) + } +} + +#' Check values in a slot +#' +#' Check if the all the values in a slot are in a vector of values. +#' +#' @param val A vector of values to check. +#' @param ref A vector of reference values. +#' @param name A character vector with the name of the values to check. +#' @param present A logical value indicating if the values should be present +#' or not +#' +#' @return A character vector with the errors if any. +#' @keywords internal +check_values <- function(val, ref, name = NULL, present = TRUE) { + if (length(dim(val)) > 1) { + stop("val must be a vector") + } + + if (present) { + val_abs <- !val %in% ref + should <- " should be in " + } else { + val_abs <- val %in% ref + should <- " should not be in " + } + + val_name <- ifelse(is.null(name), "Values ", paste(name, "values ")) + + if (any(val_abs)) { + paste0( + val_name, paste0max(unique(val[val_abs])), should, + paste0max(unique(ref)) + ) + } +} + +#' Check if a Hints object is valid +#' +#' Check if horder and spouse slots are valid: +#' - horder is named numeric vector +#' - spouse is a data.frame +#' - Has the three `idr`, `idl`, `anchor` columns +#' - `idr` and `idl` are different and doesn't contains `NA` +#' - `idr` and `idl` couple are unique +#' - `anchor` column only have `right`, `left`, `either` values +#' - all ids in spouse needs to be in the names of the horder vector +#' @param object A Hints object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' @keywords internal +#' +is_valid_hints <- function(object) { + errors <- c() + + #### Check that the slots are of the right class #### + if (! is.numeric(object@horder)) { + errors <- c(errors, "horder slot must be numeric") + } + if (length(object@horder) > 0 && is.null(names(object@horder))) { + errors <- c(errors, "horder slot should be named") + } + if (! is.data.frame(object@spouse)) { + errors <- c(errors, "spouse slot must be a data.frame") + } + + #### Check that the horder slot is valid #### + errors <- c(errors, check_values( + object@horder, NA_real_, "horder", present = FALSE + )) + + #### Check that the hints spouse data.frame is valid #### + errors <- c(errors, check_slot_fd( + object, "spouse", c("idl", "idr", "anchor") + )) + + if (!is.factor(object@spouse$anchor)) { + errors <- c(errors, "anchor column must be a factor") + } + + errors <- c(errors, check_values( + object@spouse$anchor, c("left", "right", "either"), "anchor" + )) + errors <- c(errors, check_values( + object@spouse$idl, NA_character_, "idl", present = FALSE + )) + errors <- c(errors, check_values( + object@spouse$idr, NA_character_, "idr", present = FALSE + )) + + if (any(object@spouse$idl == object@spouse$idr, na.rm = TRUE)) { + errors <- c(errors, "idl and idr should be different") + } + + idmin <- pmin(object@spouse$idl, object@spouse$idr, na.rm = TRUE) + idmax <- pmax(object@spouse$idl, object@spouse$idr, na.rm = TRUE) + dup <- anyDuplicated(cbind(idmin, idmax)) + if (dup) { + dup <- paste(idmin[dup], idmax[dup], sep = "_") + errors <- c(errors, paste( + "idl and idr couple should be unique:", + paste(dup, collapse = ", "), + "couples are present more than once in the spouse slot." + )) + } + + ## All idl and idr should be in the names of horder + if (length(object@horder) > 0) { + id <- c(object@spouse$idl, object@spouse$idr) + if (any(!id %in% names(object@horder))) { + errors <- c(errors, paste( + "All idl and idr should be in the names of horder" + )) + } + } else { + if (nrow(spouse(object)) > 0) { + errors <- c(errors, paste( + "horder slot should be non empty if spouse slot is non empty" + )) + } + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Scales object is valid +#' +#' Check if the fill and border slots are valid: +#' - fill slot is a data.frame with "order", "column_values", +#' "column_mods", "mods", "labels", "affected", "fill", +#' "density", "angle" columns. +#' - "affected" is logical. +#' - "density", "angle", "order", "mods" are numeric. +#' - "column_values", "column_mods", "labels", "fill" are +#' character. +#' - border slot is a data.frame with "column_values", +#' "column_mods", "mods", "labels", "border" columns. +#' - "column_values", "column_mods", "labels", "border" are +#' character. +#' - "mods" is numeric. +#' +#' @param object A Scales object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' @keywords internal +#' +#' @export +is_valid_scales <- function(object) { + errors <- c() + + fill_cols <- c( + "order", "column_values", "column_mods", "mods", + "labels", "affected", "fill", "density", "angle" + ) + border_cols <- c( + "column_values", "column_mods", "mods", "labels", "border" + ) + errors <- c(errors, check_slot_fd(object, NULL, c("fill", "border"))) + errors <- c(errors, check_slot_fd(object, "fill", fill_cols)) + errors <- c(errors, check_slot_fd(object, "border", border_cols)) + + #### Check that the fill columns have the right values #### + ## Check for logical columns + col_log <- c("affected") + err_log <- col_log[!unlist(lapply(object@fill[col_log], is.logical))] + if (length(err_log) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_log, collapse = ", "), + " column(s) must be logical", sep = "" + )) + } + + ## Check for numeric columns + col_num <- c("density", "angle", "order", "mods") + err_num <- col_num[!unlist(lapply(object@fill[col_num], is.numeric))] + if (length(err_num) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_num, collapse = ", "), + " column(s) must be numeric", sep = "" + )) + } + + ## Check for character columns + col_char <- c( + "column_values", "column_mods", "labels", "fill" + ) + err_char <- col_char[!unlist(lapply( + object@fill[col_char], is.character + ))] + if (length(err_char) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_char, collapse = ", "), + " column(s) must be character", sep = "" + )) + } + + #### Check that the border columns have the right values #### + ## Check for character columns + col_char <- c("column_values", "column_mods", "labels", "border") + err_char <- col_char[!unlist(lapply( + object@border[col_char], is.character + ))] + if (length(err_char) > 0) { + errors <- c(errors, paste("Border slot ", + paste(err_char, collapse = ", "), + " column(s) must be character", sep = "" + )) + } + + ## Check for numeric columns + col_num <- c("mods") + err_num <- col_num[!unlist(lapply( + object@border[col_num], is.numeric + ))] + if (length(err_num) > 0) { + errors <- c(errors, paste("Border slot ", + paste(err_num, collapse = ", "), + " column(s) must be numeric", sep = "" + )) + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Ped object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the ped ids slots have the right values +#' 2. Check that the sex, steril, status, avail and affected slots have the +#' right values +#' 3. Check that dad are male and mom are female +#' 4. Check that individuals have both parents or none +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_ped <- function(object) { + missid <- NA_character_ + errors <- c() + + #### Check that the ped columns have the right values #### + # Check for ped@id uniqueness + if (any(duplicated(object@id))) { + errors <- c(errors, "Id in ped slot must be unique") + } + + # Control values for ids + famid <- unique(object@famid) + errors <- c(errors, check_values( + famid, c(""), "famid", present = FALSE + )) + errors <- c(errors, check_values( + object@id, + c( + missid, "", + paste(as.character(missid), famid, sep = "_"), + paste("", famid, sep = "_") + ), + "id", present = FALSE + )) + errors <- c(errors, check_values( + object@dadid, c(object@id, missid), "dadid" + )) + errors <- c(errors, check_values( + object@momid, c(object@id, missid), "momid" + )) + + # Control values for sex, steril, status, avail and affected + sex_code <- c("male", "female", "unknown", "terminated") + errors <- c(errors, check_values(object@sex, sex_code)) + errors <- c(errors, check_values(object@steril, c(0, 1, NA))) + errors <- c(errors, check_values(object@status, c(0, 1, NA))) + errors <- c(errors, check_values(object@avail, c(0, 1, NA))) + errors <- c(errors, check_values(object@affected, c(0, 1, NA))) + + # Control sex for parents + id <- object@id + momid <- object@momid + dadid <- object@dadid + sex <- object@sex + is_dad <- id %in% dadid + is_mom <- id %in% momid + + if (any(sex[is_dad] != "male")) { + errors <- c(errors, "Some dad are not male") + } + if (any(sex[is_mom] != "female")) { + errors <- c(errors, "Some mom are not female") + } + if (any( + (dadid %in% missid & (! momid %in% missid)) | + ((! dadid %in% missid) & momid %in% missid) + )) { + errors <- c(errors, "Individuals should have both parents or none") + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } + + return(errors) +} + +#' Check if a Rel object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the "id1", "id2", "code", "famid" slots exist +#' 2. Check that the "code" slots have the right values +#' (i.e. "MZ twin", "DZ twin", "UZ twin", "Spouse") +#' 3. Check that all "id1" are different to "id2" +#' 4. Check that all "id1" are smaller than "id2" +#' 5. Check that no duplicate relation are present +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_rel <- function(object) { + errors <- c() + + rel_cols <- c("id1", "id2", "code", "famid") + #### Check that the slots have the right columns #### + errors <- c(errors, check_slot_fd(object, NULL, rel_cols)) + + #### Check that the rel columns have the right values #### + codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") + errors <- c(errors, check_values(object@code, codes)) + + #### Check that id1 is different from id2 #### + if (any(object@id1 == object@id2)) { + id1e <- object@id1[object@id1 == object@id2] + id2e <- object@id2[object@id1 == object@id2] + errors <- c(errors, paste( + "id1 '", paste0(id1e, collapse = "', '"), + "' should be different to id2 '", paste0(id2e, collapse = "', '"), + "'.", sep = "" + )) + } + + #### Check that all id1 is smaller than id2 #### + if (any(object@id1 > object@id2)) { + id1b <- object@id1[object@id1 > object@id2] + id2b <- object@id2[object@id1 > object@id2] + errors <- c(errors, paste( + "id1 '", paste0(id1b, collapse = "', '"), + "' should be smaller than id2 '", paste0(id2b, collapse = "', '"), + "'.", sep = "" + )) + } + + #### Check absence of duplicate #### + idr <- paste(object@id1, object@id2, sep = "_") + if (any(duplicated(idr))) { + idd <- idr[duplicated(idr)] + errors <- c(errors, paste( + "Pairs of individuals should be unique", + " ('", paste0(idd, collapse = "', '"), "').", sep = "" + )) + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Pedigree object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the all Rel id are in the Ped object +#' 2. Check that twins have same parents +#' 3. Check that MZ twins have same sex +#' 4. Check that all columns used in scales are in the Ped object +#' 5. Check that all fill & border modalities are in the Ped object column +#' 6. Check that all id used in Hints object are in the Ped object +#' 7. Check that all spouse in Hints object are male / female +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_pedigree <- function(object) { + errors <- c() + + #### Check Rel Id in Ped Id #### + errors <- c(errors, check_values( + object@rel@famid, c(object@ped@famid, NA), "Rel famid" + )) + errors <- c(errors, check_values(object@rel@id1, object@ped@id, "Rel id1")) + errors <- c(errors, check_values(object@rel@id2, object@ped@id, "Rel id2")) + + #### Check if twins has same parents #### + code <- object@rel@code + ncode <- as.numeric(code) + id1 <- object@rel@id1 + id2 <- object@rel@id2 + id <- object@ped@id + momid <- object@ped@momid + dadid <- object@ped@dadid + sex <- object@ped@sex + temp1 <- match(id1, id, nomatch = 0) + temp2 <- match(id2, id, nomatch = 0) + if (any(ncode < 4)) { + twins <- (ncode < 4) + if (any(momid[temp1[twins]] != momid[temp2[twins]])) { + errors <- c(errors, "twins found with different mothers") + } + if (any(dadid[temp1[twins]] != dadid[temp2[twins]])) { + errors <- c(errors, "twins found with different fathers") + } + } + + #### Check if the monozygote twins has same gender #### + if (any(ncode == 1)) { + mztwins <- (ncode == 1) + if (any(sex[temp1[mztwins]] != sex[temp2[mztwins]])) { + errors <- c(errors, "MZ twins with different genders") + } + } + + #### Check that the scales columns have the right values #### + ped <- as.data.frame(ped(object)) + errors <- c(errors, check_values( + fill(object)$column_values, colnames(ped), + "fill column_values" + )) + errors <- c(errors, check_values( + fill(object)$column_mods, colnames(ped), + "fill column_mods" + )) + errors <- c(errors, check_values( + border(object)$column, colnames(ped), + "border column" + )) + + #### Check that all fill modalities are present in the pedigree data #### + for (col in unique(fill(object)$column)){ + errors <- c(errors, check_values( + ped[[col]], + fill(object)[fill(object)$column_mods == col, "mods"], + paste("fill column", col) + )) + } + #### Check that all borders modalities are present in the pedigree data #### + for (col in unique(border(object)$column)){ + errors <- c(errors, check_values( + ped[[col]], + border(object)[border(object)$column == col, "mods"], + paste("border column", col) + )) + } + + #### Check that the hints are valid #### + if (length(horder(object)) > 0 && + length(horder(object)) != length(object) + ) { + errors <- c(errors, + "Length for horder component should be equal to Pedigree length" + ) + } + idh <- names(horder(object)) + idh_abs <- idh[!idh %in% id(ped(object))] + if (length(idh_abs) > 0) { + errors <- c(errors, + paste( + "Hints horder id", + paste(idh_abs, sep = ","), + "not present in the Ped object" + ) + ) + } + + idl <- spouse(object)$idl + idr <- spouse(object)$idr + + ## Check for presence of spouses in Ped object + ids_abs <- c(idl, idr)[!c(idl, idr) %in% id(ped(object))] + if (length(ids_abs) > 0) { + errors <- c(errors, paste( + "Hints spouse(s)", + paste(ids_abs, sep = ","), + "not present in the Ped object" + )) + } + + ## Check for sex of spouses + idls <- sex(ped(object))[match(idl, id(ped(object)))] + idlr <- sex(ped(object))[match(idr, id(ped(object)))] + sps <- paste(idls, idlr, sep = "_") + sps <- sps %in% c("female_male", "male_female") + if (any(!sps)) { + errors <- c(errors, paste( + "Hints spouse(s)", + paste(paste(idl[!sps], idr[!sps], sep = "_"), sep = ","), + "not female, male" + )) + } + + #### Errors #### + if (length(errors) == 0) { + TRUE + } else { + errors + } +} \ No newline at end of file diff --git a/R/Pedixplorer-package.R b/R/Pedixplorer-package.R index 3326ede4..2931422a 100644 --- a/R/Pedixplorer-package.R +++ b/R/Pedixplorer-package.R @@ -11,7 +11,8 @@ #' additional functionality and bug fixes. #' #' The package download, NEWS, and README are available on CRAN: -#' \\url{https://cran.r-project.org/package=kinship2} +#' \\url{https://cran.r-project.org/package=kinship2} for the +#' previous version of the package. #' #' @section Functions: #' Below are listed some of the most widely used functions available @@ -24,12 +25,14 @@ #' probability having an allele sampled from two individuals #' be the same via IBD. #' -#' [ped_to_plotdf()] : Method to transform a Pedigree -#' object into a dataframe of graphical elements. -#' Allows extra information to be included in the id under the plot symbol -#' -#' [plot_fromdf()] : Method to plot a Pedigree from a -#' dataframe of graphical elements. +#' [Pedixplorer::plot()] : Method to transform a Pedigree +#' object into a graphical plot. +#' Allows extra information to be included in the id under the +#' plot symbol. +#' This method use the [plot_fromdf()] function to transform the Pedigree +#' object into a data frame of graphical elements, the same is done for the +#' legend with the [ped_to_legdf()] function. +#' When done, the data frames are plotted with the [plot_fromdf()] function. #' #' [shrink()]: Shrink a Pedigree to a specific bit size, #' removing non-informative members first. @@ -49,5 +52,4 @@ #' #' @docType package #' @rdname Pedixplorer_package -#' @keywords internal "_PACKAGE" diff --git a/R/align.R b/R/align.R index 6966b58f..88a1f8b5 100644 --- a/R/align.R +++ b/R/align.R @@ -1,11 +1,9 @@ -## Automatically generated from all.nw using noweb - -#' Routine function to get ancestors of a subject +#' Ancestors indexes of a subject #' #' @description Given the index of one or multiple individual(s), this #' function iterate through the mom and dad indexes to #' list out all the ancestors of the said individual(s). -#' This function is use in the `align()` function to +#' This function is use in the [align()] function to #' identify which spouse pairs has a common ancestor and #' therefore if they need to be connected with a double line #' (i.e. inbred). @@ -14,8 +12,13 @@ #' @param dadx Indexes of the fathers #' @param momx Indexes of the mothers #' +#' @examples +#' ancestors(c(1), c(3, 4, 5, 6), c(7, 8, 9, 10)) +#' ancestors(c(1, 2), c(3, 4, 5, 6), c(7, 8, 9, 10)) #' @return A vector of ancestor indexes +#' @keywords internal #' @seealso [align()] +#' @export ancestors <- function(idx, momx, dadx) { alist <- idx repeat { @@ -26,11 +29,11 @@ ancestors <- function(idx, momx, dadx) { } alist <- newlist } - alist[alist != idx] + alist[!alist %in% idx] } -#' Generate plotting information for a Pedigree +#' Align a Pedigree object #' #' @description #' Given a Pedigree, this function creates helper matrices that describe the @@ -38,61 +41,55 @@ ancestors <- function(idx, momx, dadx) { #' #' @details #' This is an internal routine, used almost exclusively by -#' `ped_to_plotdf()`. The subservient functions `auto_hint()`, -#' `alignped1()`, `alignped2()`, -#' `alignped3()`, and `alignped4()` +#' [ped_to_plotdf()]. +#' +#' The subservient functions [auto_hint()], +#' [alignped1()], [alignped2()], +#' [alignped3()], and [alignped4()] #' contain the bulk of the computation. -#' If the **hints** are missing the `auto_hint()` routine is called to +#' +#' If the **hints** are missing the [auto_hint()] routine is called to #' supply an initial guess. -#' If multiple families are present in the Pedigree, this routine is called -#' once for each family, and the results are combined in the list returned. -#' For more information you can read the associated vignette:align -#' `vignette("alignement_details")`. #' -#' @param ped A Pedigree object -#' @param packed Should the Pedigree be compressed, i.e., allow diagonal -#' lines connecting parents to children in order to have a smaller overall -#' width for the plot. -#' @param width for a packed output, the minimum width of the plot, in +#' If multiple families are present in the **obj** Pedigree, this routine +#' is called once for each family, and the results are combined in the +#' list returned. +#' +#' For more information you can read the associated vignette: +#' `vignette("pedigree_alignment")`. +#' +#' @param obj A Pedigree object +#' @param packed Should the Pedigree be compressed. +#' (i.e. allow diagonal lines connecting parents to children in order +#' to have a smaller overall width for the plot.) +#' @param width For a packed output, the minimum width of the plot, in #' inches. -#' @param align for a packed Pedigree, align children under parents `TRUE`, +#' @param align For a packed Pedigree, align children under parents `TRUE`, #' to the extent possible given the page width, or align to to the left #' margin `FALSE`. #' This argument can be a two element vector, giving the alignment #' parameters, or a logical value. -#' If `TRUE`, the default is `c(1.5, 2)`, or numeric the routine +#' If `TRUE`, the default is `c(1.5, 2)`, or if numeric the routine #' `alignped4()` will be called. -#' @param hints Plotting hints for the Pedigree. -#' This is a list with components `order` and `spouse`, the second one -#' is optional. -#' - **order** is a numeric vector with one element per subject in the -#' Pedigree. It determines the relative order of subjects within a sibship, as -#' well as the relative order of processing for the founder couples. (For this -#' latter, the female founders are ordered as though they were sisters). -#' - **spouse** is a matrix with one row per hinted marriage, usually -#' only a few marriages in a pedigree will need an added hint, for instance -#' reverse the plot order of a husband/wife pair. Each row contains the -#' index of the left spouse, the right hand spouse, and the anchor -#' (i.e : `1` = left, `2` = right, `0` = either). -#' Children will preferentially appear under the parents of the anchored -#' spouse. -#' @inheritParams is_parent +#' @param hints A Hints object or a named list containing `horder` and +#' `spouse`. If `NULL` then the Hints stored in **obj** will be used. +#' @inheritParams Ped #' #' @return A list with components -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n`: A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid`: A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos`: A matrix giving the horizontal position of each plot point +#' - `fam`: A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values +#' - `spouse`: A matrix with values #' - `0` = not a spouse #' - `1` = subject plotted to the immediate right is a spouse #' - `2` = subject plotted to the immediate right is an inbred spouse -#' - twins Optional matrix which will only be present if the Pedigree +#' - `twins`: Optional matrix which will only be present if the Pedigree #' contains twins : #' - `0` = not a twin #' - `1` = sibling to the right is a monozygotic twin @@ -112,143 +109,169 @@ ancestors <- function(idx, momx, dadx) { #' @export #' @include auto_hint.R #' @include kindepth.R -#' @include check_hints.R -#' @include pedigreeClass.R +#' @include AllClass.R #' @include alignped1.R #' @include alignped2.R #' @include alignped3.R #' @include alignped4.R -align <- function(ped, packed = TRUE, width = 10, - align = TRUE, hints = ped$hints, missid = "0" -) { - famlist <- unique(ped(ped)$family) - if (length(famlist) > 1) { - nfam <- length(famlist) - alignment <- vector("list", nfam) - for (i_fam in famlist) { - ped_fam <- ped[ped(ped)$family == i_fam] - alignment[[i_fam]] <- align(ped_fam, packed, width, align) +#' @rdname align +#' @usage NULL +setGeneric("align", signature = "obj", + function(obj, ...) standardGeneric("align") +) + +#' @rdname align +#' @docType methods +setMethod("align", "Pedigree", + function( + obj, packed = TRUE, width = 10, + align = TRUE, hints = NULL, missid = "NA_character_" + ) { + famlist <- unique(famid(obj)) + if (length(famlist) > 1) { + nfam <- length(famlist) + alignment <- vector("list", nfam) + for (i_fam in famlist) { + ped_fam <- obj[famid(obj) == i_fam] + alignment[[i_fam]] <- align(ped_fam, packed, width, align) + } + return(alignment) } - return(alignment) - } - if (is.null(hints$order)) { - hints <- try({ - auto_hint(ped) - }, silent = TRUE) - ## sometimes appears dim(ped) is empty (ped is NULL), so try fix here: - ## (JPS 6/6/17 - if ("try-error" %in% class(hints)) { - hints <- list(order = seq_len(max(1, dim(ped)))) + if (is.null(hints)) { + hints <- hints(obj) } - } else { - check_hints(hints, ped(ped)$sex) - } - ## Doc: Setup-align - n <- length(ped(ped)$id) - - level <- 1 + kindepth(ped, align = TRUE) - horder <- hints$order # relative order of siblings within a family + if (!is(hints, "Hints")) { + if (!is.list(hints)) { + stop("hints argument should be a Hints object or a list") + } + hints <- Hints(hints) + } + if (length(horder(hints)) == 0) { + hints <- try({ + auto_hint(obj) + }, silent = TRUE) + if ("try-error" %in% class(hints)) { + hints <- Hints(horder = setNames( + seq_len(length(ped(obj))), id(ped(obj)) + )) + } + } + ## Doc: Setup-align + n <- length(obj) - if (!is.null(hints$spouse)) { - # start with the hints list - tsex <- ped(ped)$sex[hints$spouse[, 1]] # sex of the left member - spouselist <- cbind(0, 0, 1 + (tsex != "male"), hints$spouse[, 3]) - spouselist[, 1] <- ifelse(tsex == "male", hints$spouse[, 1], - hints$spouse[, 2] - ) - spouselist[, 2] <- ifelse(tsex == "male", hints$spouse[, 2], - hints$spouse[, 1] - ) - } else { - spouselist <- matrix(0L, nrow = 0, ncol = 4) - } + level <- 1 + kindepth(obj, align_parents = TRUE) + ## relative order of siblings within a family + horder <- horder(hints) - if (nrow(rel(ped)) > 0 && any(rel(ped)$code == "Spouse")) { - # Add spouses from the relationship matrix - trel <- rel(ped)[ - rel(ped)$code == "Spouse", c("id1", "id2"), drop = FALSE - ] - trel$id1 <- match(trel$id1, ped(ped)$id) - trel$id2 <- match(trel$id2, ped(ped)$id) - tsex <- ped(ped)$sex[trel[, 1]] - trel[tsex != "male", seq_len(2)] <- trel[tsex != "male", 2:1] - spouselist <- rbind(spouselist, cbind(trel[, 1], trel[, 2], 0, 0)) - } - dad <- match(ped(ped)$dadid, ped(ped)$id, nomatch = 0) - mom <- match(ped(ped)$momid, ped(ped)$id, nomatch = 0) - is_child <- dad > 0 & mom > 0 - if (any(is_child)) { - # add parents - who <- which(is_child) - spouselist <- rbind(spouselist, cbind(dad[who], mom[who], 0, 0)) - } - hash <- spouselist[, 1] * n + spouselist[, 2] - spouselist <- spouselist[!duplicated(hash), , drop = FALSE] - ## Doc: Founders -align - noparents <- (dad[spouselist[, 1]] == 0 & dad[spouselist[, 2]] == 0) - ## Take duplicated mothers and fathers, then founder mothers - dupmom <- spouselist[noparents, 2][duplicated(spouselist[noparents, 2])] - ## Founding mothers with multiple marriages - dupdad <- spouselist[noparents, 1][duplicated(spouselist[noparents, 1])] - ## Founding fathers with multiple marriages - foundmom <- spouselist[ - noparents & !(spouselist[, 1] %in% c(dupmom, dupdad)), 2 - ] # founding mothers - founders <- unique(c(dupmom, dupdad, foundmom)) - # use the hints to order them - founders <- founders[order(horder[founders])] - rval <- alignped1(founders[1], dad, mom, level, horder, packed, spouselist) - if (length(founders) > 1) { - spouselist <- rval$spouselist - for (i in 2:length(founders)) { - rval2 <- alignped1(founders[i], dad, mom, level, horder, packed, - spouselist + if (nrow(spouse(hints)) > 0) { + # start with the hints list + idxl <- match(spouse(hints)$idl, id(ped(obj))) + idxr <- match(spouse(hints)$idr, id(ped(obj))) + tsex <- sex(ped(obj))[idxl] # sex of the left member + spouselist <- cbind( + 0, 0, 1 + (tsex != "male"), as.numeric(spouse(hints)$anchor) ) - spouselist <- rval2$spouselist - rval <- alignped3(rval, rval2, packed) + spouselist[, 1] <- ifelse(tsex == "male", idxl, idxr) + spouselist[, 2] <- ifelse(tsex == "male", idxr, idxl) + } else { + spouselist <- matrix(0L, nrow = 0, ncol = 4) + } + if (any(code(rel(obj)) == "Spouse")) { + # Add spouses from the relationship matrix + trel <- as.data.frame(rel(obj)) + trel <- trel[trel$code == "Spouse", c("id1", "id2")] + trel$id1 <- match(trel$id1, id(ped(obj))) + trel$id2 <- match(trel$id2, id(ped(obj))) + tsex <- sex(ped(obj))[trel$id1] + trel[tsex != "male", seq_len(2)] <- trel[tsex != "male", 2:1] + spouselist <- rbind(spouselist, cbind(trel[, 1], trel[, 2], 0, 0)) } - } - ## Doc: finish-align (1) Unhash out the spouse and nid arrays - nid <- matrix(as.integer(floor(rval$nid)), nrow = nrow(rval$nid)) - spouse <- 1L * (rval$nid != nid) - maxdepth <- nrow(nid) - for (i in (seq_along(spouse))[spouse > 0]) { - a1 <- ancestors(nid[i], mom, dad) - # matrices are in column order - a2 <- ancestors(nid[i + maxdepth], mom, dad) - if (any(duplicated(c(a1, a2)))) { - spouse[i] <- 2 + dad <- match(dadid(ped(obj)), id(ped(obj)), nomatch = 0) + mom <- match(momid(ped(obj)), id(ped(obj)), nomatch = 0) + is_child <- dad > 0 & mom > 0 + if (any(is_child)) { + # add parents + who <- which(is_child) + spouselist <- rbind(spouselist, cbind(dad[who], mom[who], 0, 0)) } - } - ## Doc: finish align(2) - if (nrow(rel(ped)) > 0 && any(rel(ped)$code != "Spouse")) { - twins <- 0 * nid - who <- (rel(ped)$code != "Spouse") - ltwin <- match(rel(ped)[who, "id1"], ped(ped)$id, nomatch = 0) - rtwin <- match(rel(ped)[who, "id2"], ped(ped)$id, nomatch = 0) - ttype <- rel(ped)[who, "code"] - # find where each of them is plotted (any twin only appears once with a - # family id, i.e., under their parents) - # matrix of connected-to-parent ids - ntemp <- ifelse(rval$fam > 0, nid, 0) - ltemp <- (seq_along(ntemp))[match(ltwin, ntemp, nomatch = 0)] - rtemp <- (seq_along(ntemp))[match(rtwin, ntemp, nomatch = 0)] - twins[pmin(ltemp, rtemp)] <- ttype - } else { - twins <- NULL - } - ## Doc: finish align(3) - if ((is.numeric(align) || align) && max(level) > 1) { - pos <- alignped4(rval, spouse > 0, level, width, align) - } else { - pos <- rval$pos - } - if (is.null(twins)) { - list(n = rval$n, nid = nid, pos = pos, fam = rval$fam, spouse = spouse) - } else { - list(n = rval$n, nid = nid, pos = pos, fam = rval$fam, spouse = spouse, - twins = twins + hash <- spouselist[, 1] * n + spouselist[, 2] + spouselist <- spouselist[!duplicated(hash), , drop = FALSE] + ## Doc: Founders -align + noparents <- (dad[spouselist[, 1]] == 0 & dad[spouselist[, 2]] == 0) + ## Take duplicated mothers and fathers, then founder mothers + dupmom <- spouselist[noparents, 2][duplicated(spouselist[noparents, 2])] + ## Founding mothers with multiple marriages + dupdad <- spouselist[noparents, 1][duplicated(spouselist[noparents, 1])] + ## Founding fathers with multiple marriages + foundmom <- spouselist[ + noparents & !(spouselist[, 1] %in% c(dupmom, dupdad)), 2 + ] # founding mothers + founders <- unique(c(dupmom, dupdad, foundmom)) + # use the hints to order them + founders <- founders[order(horder[founders])] + rval <- alignped1( + founders[1], dad, mom, level, horder, packed, spouselist ) + if (length(founders) > 1) { + spouselist <- rval$spouselist + for (i in 2:length(founders)) { + rval2 <- alignped1(founders[i], dad, mom, level, horder, packed, + spouselist + ) + spouselist <- rval2$spouselist + rval <- alignped3(rval, rval2, packed) + } + } + ## Doc: finish-align (1) Unhash out the spouse and nid arrays + nid <- matrix(as.integer(floor(rval$nid)), nrow = nrow(rval$nid)) + spouse <- 1L * (rval$nid != nid) + maxdepth <- nrow(nid) + + for (i in (seq_along(spouse))[spouse > 0]) { + a1 <- ancestors(nid[i], mom, dad) + # matrices are in column order + a2 <- ancestors(nid[i + maxdepth], mom, dad) + if (any(duplicated(c(a1, a2)))) { + spouse[i] <- 2 + } + } + + ## Doc: finish align(2) + if (length(rel(obj)) > 0 && any(code(rel(obj)) != "Spouse")) { + twins <- 0 * nid + who <- (code(rel(obj)) != "Spouse") + ltwin <- match(id1(rel(obj))[who], id(ped(obj)), nomatch = 0) + rtwin <- match(id2(rel(obj))[who], id(ped(obj)), nomatch = 0) + ttype <- code(rel(obj))[who] + ## find where each of them is plotted + ## (any twin only appears once with a + ## family id, i.e., under their parents) + ## matrix of connected-to-parent ids + ntemp <- ifelse(rval$fam > 0, nid, 0) + ltemp <- (seq_along(ntemp))[match(ltwin, ntemp, nomatch = 0)] + rtemp <- (seq_along(ntemp))[match(rtwin, ntemp, nomatch = 0)] + twins[pmin(ltemp, rtemp)] <- ttype + } else { + twins <- NULL + } + ## Doc: finish align(3) + if ((is.numeric(align) || align) && max(level) > 1) { + pos <- alignped4(rval, spouse > 0, level, width, align) + } else { + pos <- rval$pos + } + if (is.null(twins)) { + list( + n = rval$n, nid = nid, pos = pos, + fam = rval$fam, spouse = spouse + ) + } else { + list( + n = rval$n, nid = nid, pos = pos, + fam = rval$fam, spouse = spouse, + twins = twins + ) + } } -} +) \ No newline at end of file diff --git a/R/alignped1.R b/R/alignped1.R index e9d6511c..fe50703d 100644 --- a/R/alignped1.R +++ b/R/alignped1.R @@ -1,24 +1,30 @@ # Automatically generated from all.nw using noweb -#' First routine alignement +#' Alignment first routine #' #' @description -#' First alignement routine which create the subtree founded on a single +#' First alignment routine which create the subtree founded on a single #' subject as though it were the only tree. #' #' @details -#' 1. In this routine the **nid** array consists of the final -#' `nid array + 1/2` of the final spouse array. -#' Note that the **spouselist** matrix will only contain spouse pairs -#' that are not yet processed. The logic for anchoring is slightly tricky. -#' First, if row 4 of the spouselist matrix is 0, we anchor at the first +#' In this routine the **nid** array consists of the final +#' `nid array + 1/2` of the final spouse array. +#' Note that the **spouselist** matrix will only contain spouse pairs +#' that are not yet processed. The logic for anchoring is slightly tricky. +#' +#' ## 1. Anchoring: +#' First, if col 4 of the spouselist matrix is 0, we anchor at the first #' opportunity. Also note that if `spouselist[, 3] == spouselist[, 4]` #' it is the husband who is the anchor (just write out the possibilities). -#' 2. Create the set of 3 return structures, which will be matrices +#' +#' ## 2. Return values initialization: +#' Create the set of 3 return structures, which will be matrices #' with `1 + nspouse` columns. #' If there are children then other routines will widen the result. -#' 3. Create the two complimentary lists **lspouse** and **rspouse** -#' to denote those plotted on the left and on the right. +#' +#' ## 3. Create **lspouse** and **rspouse**: +#' This two complimentary lists denote the spouses plotted on the left +#' and on the right. #' For someone with lots of spouses we try to split them evenly. #' If the number of spouses is odd, then men should have more on #' the right than on the left, women more on the right. @@ -31,11 +37,15 @@ #' `length(rspouse) > 1`. This caused `nleft > length(indx)`. #' A fix was to not let **indx** to be indexed beyond its length, #' fix by JPS 5/2013. -#' 4. For each spouse get the list of children. If there are any we +#' +#' ## 4. List the children: +#' For each spouse get the list of children. If there are any we #' call [alignped2()] to generate their tree and #' then mark the connection to their parent. #' If multiple marriages have children we need to join the trees. -#' 5. To finish up we need to splice together the tree made up from +#' +#' ## 5. Splice the tree: +#' To finish up we need to splice together the tree made up from #' all the kids, which only has data from `lev + 1` down, with the data here. #' There are 3 cases: #' @@ -45,31 +55,28 @@ #' 3. The tree below is narrower, for instance an only child. #' #' @param level Vector of the level of each subject -#' @param horder Vector of the horizontal order of each subject -#' @param spouselist Matrix of the spouses with one row per hinted marriage, -#' usually only a few marriages in a pedigree will need an added hint, for -#' instance reverse the plot order of a husband/wife pair. -#' Each row contains the index of the left spouse, the right hand spouse -#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). -#' @inheritParams align +#' @param spouselist Matrix of spouses with 4 columns: +#' - `1`: husband index +#' - `2`: wife index +#' - `3`: husband anchor +#' - `4`: wife anchor #' @inheritParams ancestors +#' @inheritParams Hints +#' @inheritParams align #' #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) @@ -77,7 +84,7 @@ #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped1 <- function(idx, dadx, momx, level, horder, packed, spouselist) { # Set a few constants maxlev <- max(level) diff --git a/R/alignped2.R b/R/alignped2.R index 5ae238db..afb45533 100644 --- a/R/alignped2.R +++ b/R/alignped2.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Second routine alignement +#' Alignment second routine #' #' @description #' Second of the four co-routines which takes a collection of siblings, @@ -16,7 +16,7 @@ #' When the first sib is processed by `alignped1` then both partners #' (and any children) will be added to the rval structure below. #' When the second sib is processed they will come back as a 1 element tree -#' (the marriage will no longer be on the spouselist), which should be added +#' (the marriage will no longer be on the **spouselist**), which should be added #' onto rval. The rule thus is to not add any 1 element tree whose value #' (which must be `idx[i]` is already in the rval structure for this level. #' @@ -26,27 +26,24 @@ #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) #' ped <- Pedigree(sampleped) #' align(ped) #' -#' @seealso [align()], [alignped1()] -#' @export +#' @seealso [align()] +#' @keywords internal, alignment alignped2 <- function(idx, dadx, momx, level, horder, packed, spouselist) { idx <- idx[order(horder[idx])] # Use the hints to order the sibs rval <- alignped1(idx[1], dadx, momx, level, horder, packed, spouselist) diff --git a/R/alignped3.R b/R/alignped3.R index 153c38bb..31b0f0b4 100644 --- a/R/alignped3.R +++ b/R/alignped3.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Third routine alignement +#' Alignment third routine #' #' @description #' Third of the four co-routines to merges two pedigree trees which @@ -12,9 +12,9 @@ #' need not plot two copies of the same person side by side. #' (When initializing the output structures do not worry about this, #' there is no harm if they are a column bigger than finally needed.) -#' Beyond that the work is simple bookkeeping. +#' Beyond that the work is simple book keeping. #' -#' ## Slide +#' ## 1. Slide: #' #' For the unpacked case, which is the traditional way to draw #' a Pedigree when we can assume the paper is infinitely wide, all parents are @@ -22,31 +22,28 @@ #' merged as solid blocks. On input they both have a left margin of 0. #' Compute how far over we have to slide the right tree. #' -#' ## Merge +#' ## 2. Merge: #' #' Now merge the two trees. Start at the top level and work down. #' -#' @param alt1 Alignement of the first tree -#' @param alt2 Alignement of the second tree +#' @param alt1 Alignment of the first tree +#' @param alt2 Alignment of the second tree #' @param space Space between two subjects #' @inheritParams align #' #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) @@ -54,7 +51,7 @@ #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped3 <- function(alt1, alt2, packed, space = 1) { maxcol <- max(alt1$n + alt2$n) maxlev <- length(alt1$n) diff --git a/R/alignped4.R b/R/alignped4.R index 52e6098f..5b6e1166 100644 --- a/R/alignped4.R +++ b/R/alignped4.R @@ -2,7 +2,7 @@ #' @importFrom quadprog solve.QP NULL -#' Fourth and last routine alignement +#' Alignment fourth routine #' #' @description #' Last routines which attempts to line up children under parents and put @@ -23,13 +23,13 @@ NULL #' For each set of siblings `x` with parents at `p_1` and `p_2` #' the alignment penalty is : #' -#' \eqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} +#' \deqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} #' #' where `k` is the number of siblings in the set. #' -#' Using the fact that when `a = 1` : +#'Using the fact that when `a = 1` : #' -#' \eqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} +#' \deqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} #' #' then moving a sibship with `k` sibs one unit to the left or #' right of optimal will incur the same cost as moving one with only 1 or @@ -77,7 +77,7 @@ NULL #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped4 <- function(rval, spouse, level, width, align) { ## Doc: alignped4 -part1, spacing across page if (is.logical(align)) diff --git a/R/auto_hint.R b/R/auto_hint.R index e89240d1..cd144066 100644 --- a/R/auto_hint.R +++ b/R/auto_hint.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Routine to shift set of siblings to the left or right +#' Shift set of siblings to the left or right #' #' @details This routine is used by `auto_hint()`. #' It shifts a set of siblings to the left or right, so that the @@ -19,7 +19,7 @@ #' #' @return The updated hint vector #' @seealso [auto_hint()] -#' @keywords internal +#' @keywords internal, auto_hint shift <- function(id, sibs, goleft, hint, twinrel, twinset) { if (twinset[id] > 0) { # enough to avoid overlap @@ -74,31 +74,21 @@ shift <- function(id, sibs, goleft, hint, twinrel, twinset) { hint } -#' Routine to find the spouse of a subject +#' Find the spouse of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the spouse of a subject. #' #' @param idpos The position of the subject #' @param plist The alignment structure representing the Pedigree layout. -#' For the differents matrices present in the list, each row represents a -#' level of the Pedigree and each column a potential subject. -#' It contains the following components: -#' - n Vector of the number of subjects per level -#' - nid Matrix of the subjects indexes -#' - pos Matrix of the subjects positions -#' - fam Matrix of the siblings family identifiers -#' - spouse Matrix of the left spouses -#' - `0` = not spouse -#' - `1` = spouse -#' - `2` = inbred spouse. +#' See [align()] for details. #' @param lev The generation level of the subject #' @inheritParams align #' #' @return The position of the spouse #' @seealso [auto_hint()] -#' @keywords internal -findspouse <- function(idpos, plist, lev, ped) { +#' @keywords internal, auto_hint +findspouse <- function(idpos, plist, lev, obj) { lpos <- idpos while (lpos > 1 && plist$spouse[lev, lpos - 1]) { lpos <- lpos - 1 @@ -111,8 +101,8 @@ findspouse <- function(idpos, plist, lev, ped) { stop("auto_hint bug 3") } - opposite <- ped(ped)$sex[plist$nid[lev, lpos:rpos]] != - ped(ped)$sex[plist$nid[lev, idpos]] + opposite <- sex(ped(obj))[plist$nid[lev, lpos:rpos]] != + sex(ped(obj))[plist$nid[lev, idpos]] ## Can happen with a triple marriage if (!any(opposite)) { @@ -122,7 +112,7 @@ findspouse <- function(idpos, plist, lev, ped) { spouse } -#' Routine to find the siblings of a subject +#' Find the siblings of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the siblings of a subject. @@ -131,7 +121,7 @@ findspouse <- function(idpos, plist, lev, ped) { #' #' @return The positions of the siblings #' @seealso [auto_hint()] -#' @keywords internal +#' @keywords internal, auto_hint findsibs <- function(idpos, plist, lev) { family <- plist$fam[lev, idpos] if (family == 0) { @@ -140,20 +130,20 @@ findsibs <- function(idpos, plist, lev) { which(plist$fam[lev, ] == family) } -#' Routine to find the duplicate pairs of a subject +#' Find the duplicate pairs of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the duplicate pairs of a subject and returns them in #' the order they should be plotted. #' #' @param idlist List of individuals identifiers to be considered -#' @inheritParams align #' @inheritParams findspouse +#' @inheritParams align #' #' @return A matrix of duplicate pairs #' @seealso [auto_hint()] -#' @keywords internal -duporder <- function(idlist, plist, lev, ped) { +#' @keywords internal, auto_hint +duporder <- function(idlist, plist, lev, obj) { temp <- table(idlist) if (all(temp == 1)) { return(matrix(0L, nrow = 0, ncol = 3)) @@ -183,7 +173,7 @@ duporder <- function(idlist, plist, lev, ped) { if (plist$fam[lev, dmat[i, 1]] > 0) { sib1 <- max(findsibs(dmat[i, 1], plist, lev)) } else { - spouse <- findspouse(dmat[i, 1], plist, lev, ped) + spouse <- findspouse(dmat[i, 1], plist, lev, obj) ## If spouse is marry-in then move on without looking ## for sibs if (plist$fam[lev, spouse] == 0) { @@ -196,7 +186,7 @@ duporder <- function(idlist, plist, lev, ped) { if (plist$fam[lev, dmat[i, 2]] > 0) { sib2 <- min(findsibs(dmat[i, 2], plist, lev)) } else { - spouse <- findspouse(dmat[i, 2], plist, lev, ped) + spouse <- findspouse(dmat[i, 2], plist, lev, obj) ## If spouse is marry-in then move on without looking ## for sibs if (plist$fam[lev, spouse] == 0) { @@ -210,38 +200,38 @@ duporder <- function(idlist, plist, lev, ped) { dmat[order(famtouch, dmat[, 1] - dmat[, 2]), , drop = FALSE] } -#' Routine to get twin relationships +#' Get twin relationships #' #' @details This routine function determine the twin relationships -#' in a Pedigree. It complete the missing twin relationships for -#' triplets, quads, etc. It also determine the order of the twins +#' in a Pedigree. It determine the order of the twins #' in the Pedigree. #' It is used by `auto_hint()`. #' #' @inheritParams align -#' @keywords internal +#' +#' @keywords internal, auto_hint #' @return A list containing components #' 1. `twinset` the set of twins #' 2. `twinrel` the twins relationships #' 3. `twinord` the order of the twins #' @seealso [auto_hint()] -get_twin_rel <- function(ped) { - if (is.null(rel(ped))) { +get_twin_rel <- function(obj) { + if (length(rel(obj)) == 0) { relation <- NULL } else { - relation <- cbind( - as.matrix(rel(ped)[, c("id1", "id2")]), - as.numeric(rel(ped)[, "code"]) - ) + relation <- as.data.frame(rel(obj))[, c("id1", "id2", "code")] + relation$code <- as.numeric(relation$code) } - n <- length(ped(ped)$id) - twinset <- rep(0, n) - twinord <- rep(1, n) + n <- length(obj) + twinset <- setNames(rep(0, n), id(ped(obj))) + twinord <- setNames(rep(1, n), id(ped(obj))) twinrel <- NULL - if (!is.null(relation) && any(relation[, 3] < 4)) { + + if (!is.null(relation) && any(relation$code < 4)) { ## Select only siblings relationships - temp <- (relation[, 3] < 4) - twinlist <- unique(c(relation[temp, seq_len(2)])) # list of twin id's + temp <- (relation$code < 4) + ## list of twin id's + twinlist <- unique(unlist(c(relation[temp, seq_len(2)]))) twinrel <- relation[temp, , drop = FALSE] for (i in 2:length(twinlist)) { # Now, for any pair of twins on a line of twinrel, give both @@ -256,18 +246,19 @@ get_twin_rel <- function(ped) { twinord[twinrel[, 2]], twinord[twinrel[, 1]] + 1 ) + twinrel[, 1] <- twinset[twinrel[, 1]] } } list(twinset = twinset, twinrel = twinrel, twinord = twinord) } -#' First initial guess for the alignment of a Pedigree +#' Initial hint for a Pedigree alignment #' #' @description #' Compute an initial guess for the alignment of a Pedigree #' #' @details -#' A Pedigree structure can contain a `hints` object which helps to +#' A Pedigree structure can contain a [Hints-class] object which helps to #' reorder the Pedigree (e.g. left-to-right order of children within family) so #' as to plot with minimal distortion. This routine is used to create an #' initial version of the hints. They can then be modified if desired. @@ -276,64 +267,68 @@ get_twin_rel <- function(ped) { #' within families, so that marriages are on the "edge" of a set children, #' closest to the spouse. For pedigrees that have only a single connection #' between two families this simple-minded approach works surprisingly well. -#' For more complex structures hand-tuning of the hints matrix may be required. +#' For more complex structures hand-tuning of the hints may be required. #' -#' The Pedigree in the example below is one where rearranging the founders -#' greatly decreases the number of extra connections. When `auto_hint()` is -#' called with a a vector of numbers as the second argument, the values for the -#' founder females are used to order the founder families left to right across -#' the plot. The values within a sibship are used as the preliminary order of +#' When `auto_hint()` is called with a a vector of numbers as the **hints** +#' argument, the values for the founder females are used to order the founder +#' families left to right across the plot. +#' The values within a sibship are used as the preliminary order of #' siblings within a family; this may be changed to move one of them to the #' edge so as to match up with a spouse. The actual values in the vector are #' not important, only their order. #' -#' @param reset If `TRUE`, then even if `ped` object has hints, reset -#' them to the initial values +#' @param reset If `TRUE`, then even if the Ped object has Hints, reset +#' them to the initial values. #' @inheritParams align #' -#' @return The **hints** list containing components `order` and `spouse` +#' @return The initial [Hints-class] object. #' -#' @seealso [align()], [best_hint()] +#' @seealso [align()], [best_hint()], [Hints-class] #' @examples #' data(sampleped) -#' ped <- Pedigree(sampleped[sampleped$family == 1, ]) +#' ped <- Pedigree(sampleped[sampleped$famid == 1, ]) #' auto_hint(ped) #' @export -auto_hint <- function( - ped, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE +#' @keywords internal, alignment, auto_hint +#' @usage NULL +setGeneric("auto_hint", signature = "obj", + function(obj, ...) standardGeneric("auto_hint") +) + +#' @rdname auto_hint +#' @export +setMethod("auto_hint", "Pedigree", function(obj, + hints = NULL, packed = TRUE, align = FALSE, reset = FALSE ) { ## full documentation now in vignette: align_code_details.Rmd ## References to those sections appear here as: ## Doc: auto_hint - if ((!is.null(hints(ped)$order) || - !is.null(hints(ped)$spouse) - ) && !reset - ) { - return(hints(ped)) + if (!is.null(hints) && is(hints, "Hints")) { + if ( + (length(horder(hints)) != 0 || length(spouse(hints)) != 0) && !reset + ) { + return(hints(obj)) + } } # nothing to do - if (length(unique(ped(ped)$family)) > 1) { + if (length(unique(famid(ped(obj)))) > 1) { stop("auto_hint only works on Pedigrees with a single family") } - n <- length(ped(ped)$id) - depth <- kindepth(ped, align_parents = TRUE) + n <- length(obj) + depth <- kindepth(obj, align_parents = TRUE) - ## Doc: init-auto_hint + ## Doc: init-auto_hint horder + horder <- setNames(rep(0, n), id(ped(obj))) if (!is.null(hints)) { - if (is.vector(hints)) { - hints <- list(order = hints) - } - if (is.matrix(hints)) { - hints <- list(spouse = hints) - } - if (is.null(hints$order)) { - horder <- integer(n) - } else { - horder <- hints$order + if (is.list(hints)) { + hints <- Hints(hints) + } else if (!is(hints, "Hints")) { + stop("hints must be a list or a Hints object") } + horder <- horder(hints) } else { - horder <- integer(n) + hints <- Hints(horder = horder) } for (i in unique(depth)) { @@ -344,7 +339,7 @@ auto_hint <- function( } } - twin_rel <- get_twin_rel(ped = ped) + twin_rel <- get_twin_rel(obj) twinset <- twin_rel$twinset twinord <- twin_rel$twinord twinrel <- twin_rel$twinrel @@ -360,32 +355,32 @@ auto_hint <- function( } # Then reset to integers - for (i in unique(ped$depth)) { - who <- (ped$depth == i) + for (i in unique(depth)) { + who <- (depth == i) horder[who] <- rank(horder[who]) # there should be no ties } } - if (!is.null(hints)) { - sptemp <- hints$spouse + if (nrow(spouse(hints)) > 0) { + sptemp <- spouse(hints) } else { sptemp <- NULL } - plist <- align(ped, + plist <- align(obj, packed = packed, align = align, - hints = list(order = horder, spouse = sptemp) + hints = Hints(horder = horder, spouse = sptemp) ) ## Doc: fixup-2 - ## Fix if duplicate individuales present + ## Fix if duplicate individuals present maxlev <- nrow(plist$nid) for (lev in seq_len(maxlev)) { # subjects on this level idlist <- plist$nid[lev, seq_len(plist$n[lev])] # duplicates to be dealt with - dpairs <- duporder(idlist, plist, lev, ped) + dpairs <- duporder(idlist, plist, lev, obj) if (nrow(dpairs) == 0) next for (i in seq_len(nrow(dpairs))) { anchor <- spouse <- rep(0, 2) @@ -404,7 +399,7 @@ auto_hint <- function( } } else { # spouse at this location connected to parents ? - spouse[j] <- findspouse(idpos, plist, lev, ped) + spouse[j] <- findspouse(idpos, plist, lev, obj) if (plist$fam[lev, spouse[j]] > 0) { # Yes they are anchor[j] <- 2 # spousal anchor sibs <- idlist[findsibs(spouse[j], plist, lev)] @@ -439,18 +434,39 @@ auto_hint <- function( warning("Unexpected result in auto_hint,", "please contact developer" ) - return(list(order = seq_len(n))) # punt + return(Hints(horder = seq_len(n))) # punt } else { + if (is.vector(temp)) { + temp <- data.frame( + idl = temp[1], idr = temp[2], anchor = temp[3] + ) + } else if (is.matrix(temp)) { + temp <- data.frame( + idl = temp[, 1], idr = temp[, 2], + anchor = temp[, 3] + ) + } sptemp <- rbind(sptemp, temp) } } # # Recompute, since this shifts things on levels below # - plist <- align(ped, + new_spouse <- data.frame( + idl = id(ped(obj))[sptemp$idl], + idr = id(ped(obj))[sptemp$idr], + anchor = anchor_to_factor(sptemp$anchor) + ) + plist <- align(obj, packed = packed, align = align, - hints = list(order = horder, spouse = sptemp) + hints = Hints(horder = horder, spouse = new_spouse) ) } - list(order = horder, spouse = sptemp) -} + + new_spouse <- data.frame( + idl = id(ped(obj))[sptemp$idl], + idr = id(ped(obj))[sptemp$idr], + anchor = anchor_to_factor(sptemp$anchor) + ) + Hints(horder = horder, spouse = new_spouse) +}) diff --git a/R/best_hint.R b/R/best_hint.R index 31b1ff23..a4632cc9 100644 --- a/R/best_hint.R +++ b/R/best_hint.R @@ -1,14 +1,43 @@ -#' Best hint for alignement +#' Generate all possible permutation +#' +#' Given a vector of length **n**, generate all possible permutations of +#' the numbers 1 to **n**. +#' This is a recursive routine, and is not very efficient. +#' +#' @param x A vector of length **n** +#' @return A matrix with **n** cols and **n!** rows +#' @keywords internal, auto_hint +permute <- function(x) { + n <- length(x) + if (n == 1) { + x + } else if (n == 2) { + rbind(x, x[c(2, 1)]) + } else if (n == 3) { + rbind(x[seq_len(3)], x[c(2, 1, 3)], x[c(3, 1, 2)]) + } else { + temp <- paste( + "cbind(x[", seq_len(n), "], permute(x[-", seq_len(n), "]))", + collapse = "," + ) + temp <- paste("rbind(", temp, ")") + eval(parse(text = temp)) + } +} + +#' Best hint for a Pedigree alignment #' #' @description -#' When computer time is cheap, use this routine to get a 'best' Pedigree. +#' When computer time is cheap, use this routine to get a *best* +#' Pedigree alignment. #' This routine will try all possible founder orders, and return the one -#' with the least 'stress'. +#' with the least **stress**. #' #' @details -#' The auto_hint routine will rearrange sibling order, but not founder order. -#' This calls auto_hint with every possible founder order, and finds that -#' plot with the least 'stress'. +#' The [auto_hint()] routine will rearrange sibling order, but not +#' founder order. +#' This calls [auto_hint()] with every possible founder order, and finds that +#' plot with the least "stress". #' The stress is computed as a weighted sum of three error measures: #' #' - nbArcs The number of duplicate individuals in the plot @@ -17,117 +46,125 @@ #' - lgParentsChilds The sum of the absolute values of the differences between #' the center of the children and the parents #' -#' \eqn{stress = -#' wt[1] \times nbArcs + -#' wt[2] \times lgArcs + -#' wt[3] \times lgParentsChilds +#' \deqn{stress = +#' wt[1] * nbArcs + +#' wt[2] * lgArcs + +#' wt[3] * lgParentsChilds #'} #' #' If during the search, a plot is found with a stress level less than #' **tolerance**, the search is terminated. #' -#' @param wt A vector of three weights for the three error measures -#' - The number of duplicate individuals in the plot -#' - The sum of the absolute values of the differences in the -#' positions of duplicate individuals -#' - The sum of the absolute values of the differences between -#' the center of the children and the parents +#' @param wt A vector of three weights for the three error measures. #' Default is `c(1000, 10, 1)`. -#' @param tolerance The maximum stress level to accept. Default is `0` +#' 1. The number of duplicate individuals in the plot +#' 2. The sum of the absolute values of the differences in the +#' positions of duplicate individuals +#' 3. The sum of the absolute values of the differences between +#' the center of the children and the parents. +#' +#' @param tolerance The maximum stress level to accept. +#' Default is `0` #' @inheritParams align #' -#' @return The best hint object out of all the permutations +#' @return The best Hints object out of all the permutations #' -#' @seealso [auto_hint()] +#' @seealso [auto_hint()], [align()] #' @export #' @examples #' data(sampleped) -#' ped <- Pedigree(sampleped[sampleped$family == 1,]) +#' ped <- Pedigree(sampleped[sampleped$famid == 1,]) #' best_hint(ped) #' @include auto_hint.R #' @include align.R -best_hint <- function(ped, wt = c(1000, 10, 1), tolerance = 0) { - # find founders married to founders the female of such pairs - # determines the plot order of founders - mom <- match(ped(ped)$momid, ped(ped)$id) - dad <- match(ped(ped)$dadid, ped(ped)$id) - # founders and marry-ins - founders <- ped(ped)$id[is.na(mom) & is.na(dad)] - fpair <- !(is.na(match(ped(ped)$momid, founders)) | - is.na(match(ped(ped)$dadid, founders)) - ) - # row num of founding moms - fmom <- unique(match(ped(ped)$momid[fpair], ped(ped)$id)) - - # This function generates the permutations one after the other - permute <- function(x) { - n <- length(x) - if (n == 3) { - rbind(x[seq_len(3)], x[c(2, 1, 3)], x[c(3, 1, 2)]) - } else { - temp <- paste( - "cbind(x[", seq_len(n), "], permute(x[-", seq_len(n), "]))", - collapse = "," - ) - temp <- paste("rbind(", temp, ")") - eval(parse(text = temp)) - } +#' @keywords alignment, auto_hint +#' @usage NULL +setGeneric( + "best_hint", signature = "obj", + function(obj, ...) { + standardGeneric("best_hint") } - pmat <- permute(seq_along(fmom)) - # Put the subsets into a random order For most Pedigrees, - # there are several permutations that will give a tolerance - # or near tolerance plot. - # This way we should hit one of them soon. - pmat <- pmat[order(runif(nrow(pmat))), ] +) + +#' @rdname best_hint +setMethod( + "best_hint", "Pedigree", + function(obj, wt = c(1000, 10, 1), tolerance = 0) { - n <- length(ped(ped)$id) - for (perm in seq_len(nrow(pmat))) { - hint <- cbind(seq_len(n), rep(0, n)) - hint[fmom, 1] <- pmat[perm, ] - # this fixes up marriages and such - newhint <- auto_hint(ped, hints = hint[, 1]) - plist <- align( - ped, packed = TRUE, align = TRUE, width = 8, hints = newhint + # find founders married to founders the female of such pairs + # determines the plot order of founders + momid <- momid(ped(obj)) + dadid <- dadid(ped(obj)) + id <- id(ped(obj)) + mom <- match(momid, id) + dad <- match(dadid, id) + # founders and marry-ins + founders <- id[is.na(mom) & is.na(dad)] + fpair <- !(is.na(match(momid, founders)) | + is.na(match(dadid, founders)) ) + # row num of founding moms + fmom <- unique(match(momid[fpair], id)) + pmat <- permute(seq_along(fmom)) + # Put the subsets into a random order For most Pedigrees, + # there are several permutations that will give a tolerance + # or near tolerance plot. + # This way we should hit one of them soon. + pmat <- pmat[order(runif(nrow(pmat))), ] + + n <- length(obj) + for (perm in seq_len(nrow(pmat))) { + hint <- cbind(seq_len(n), rep(0, n)) + hint[fmom, 1] <- pmat[perm, ] + # this fixes up marriages and such + newhint <- auto_hint( + obj, hints = Hints( + horder = setNames(hint[, 1], id(ped(obj))) + ), reset = TRUE + ) + plist <- align( + obj, packed = TRUE, align = TRUE, width = 8, hints = newhint + ) - # Compute the error measures - err <- rep(0, 3) - maxlev <- nrow(plist$nid) - for (lev in seq_len(maxlev)) { - idlist <- plist$nid[lev, seq_len(plist$n[lev])] - dups <- duplicated(idlist) - if (any(dups)) { - err[1] <- err[1] + sum(dups) - for (i in idlist[dups]) { - who <- (seq_along(idlist))[match( - idlist, i, nomatch = 0 - ) > 0] - err[2] <- err[2] + abs(diff(plist$pos[lev, who])) + # Compute the error measures + err <- rep(0, 3) + maxlev <- nrow(plist$nid) + for (lev in seq_len(maxlev)) { + idlist <- plist$nid[lev, seq_len(plist$n[lev])] + dups <- duplicated(idlist) + if (any(dups)) { + err[1] <- err[1] + sum(dups) + for (i in idlist[dups]) { + who <- (seq_along(idlist))[match( + idlist, i, nomatch = 0 + ) > 0] + err[2] <- err[2] + abs(diff(plist$pos[lev, who])) + } } - } - # get parent-child pulls - fam2 <- plist$fam[lev, ] - if (any(fam2 > 0)) { - # center of kids - centers <- tapply(plist$pos[lev, ], fam2, mean) - if (any(fam2 == 0)) { - centers <- centers[-1] + # get parent-child pulls + fam2 <- plist$fam[lev, ] + if (any(fam2 > 0)) { + # center of kids + centers <- tapply(plist$pos[lev, ], fam2, mean) + if (any(fam2 == 0)) { + centers <- centers[-1] + } + # parents + above <- plist$pos[lev - 1, sort(unique(fam2))] + 0.5 + err[3] <- err[3] + sum(abs(centers - above)) } - # parents - above <- plist$pos[lev - 1, sort(unique(fam2))] + 0.5 - err[3] <- err[3] + sum(abs(centers - above)) } - } - # best one so far? - total <- sum(err * wt) - if (perm == 1 || total < besttot) { - besttot <- total - besthint <- newhint + # best one so far? + total <- sum(err * wt) + if (perm == 1 || total < besttot) { + besttot <- total + besthint <- newhint + } + if (besttot <= tolerance) + break # we needn't do better than this! } - if (besttot <= tolerance) - break # we needn't do better than this! + besthint } - besthint -} +) diff --git a/R/bit_size.R b/R/bit_size.R index d1e9e784..c30a70b8 100644 --- a/R/bit_size.R +++ b/R/bit_size.R @@ -1,63 +1,72 @@ -#' Get Pedigree bit_size +#' Bit size of a Pedigree #' -#' @description -#' Calculate Pedigree bit_size, defined as : +#' Utility function used in the `shrink()` function +#' to calculate the bit size of a Pedigree. #' -#' \eqn{ +#' @details +#' The bit size of a Pedigree is defined as : +#' +#' \deqn{ #' 2 \times NbNonFounders - NbFounders #' } #' -#' @details -#' This is a utility function used in `shrink()` -#' to calculate the bit_size of a Pedigree. +#' Where `NbNonFounders` is the number of non founders in the Pedigree +#' (i.e. individuals with identified parents) and +#' `NbFounders` is the number of founders in the Pedigree +#' (i.e. individuals without identified parents). #' -#' @inheritParams kinship -#' @inheritParams is_parent -#' @param obj A Pedigree object or a vector of fathers identifierss +#' @param obj A Ped or Pedigree object or a vector of fathers identifiers +#' @inheritParams Ped #' #' @return A list with the following components: #' -#' - bit_size The bit_size of input Pedigree +#' - bit_size The bit size of the Pedigree #' - nFounder The number of founders in the Pedigree -#' - nNonFounder The number of nonfounders in the Pedigree +#' - nNonFounder The number of non founders in the Pedigree #' #' @seealso [shrink()] -#' @include pedigreeClass.R -#' @docType methods +#' @include AllClass.R #' @examples #' data(sampleped) #' ped <- Pedigree(sampleped) #' bit_size(ped) #' @export -#' @keywords internal +#' @keywords internal, shrink +#' @usage NULL setGeneric("bit_size", signature = "obj", function(obj, ...) standardGeneric("bit_size") ) -#' @docType methods -#' @aliases bit_size,character #' @rdname bit_size -setMethod("bit_size", "character", function(obj, momid, missid = "0") { - dadid <- obj - if (length(dadid) != length(momid)) { - stop("dadid and momid should have the same length") +setMethod("bit_size", + "character_OR_integer", + function(obj, momid, missid = NA_character_) { + dadid <- obj + if (length(dadid) != length(momid)) { + stop("dadid and momid should have the same length") + } + founder <- dadid %in% missid & momid %in% missid + ped_size <- length(dadid) + n_founder <- sum(founder) + n_non_founder <- ped_size - n_founder + bit_size <- 2 * n_non_founder - n_founder + list( + bit_size = bit_size, nFounder = n_founder, + nNonFounder = n_non_founder + ) } - founder <- dadid == missid & momid == missid - ped_size <- length(dadid) - n_founder <- sum(founder) - n_non_founder <- ped_size - n_founder - bit_size <- 2 * n_non_founder - n_founder - list( - bit_size = bit_size, nFounder = n_founder, - nNonFounder = n_non_founder - ) -}) +) -#' @docType methods -#' @aliases bit_size,Pedigree #' @rdname bit_size setMethod("bit_size", "Pedigree", - function(obj, missid = "0") { - bit_size(obj$ped$dadid, obj$ped$momid, missid) + function(obj) { + bit_size(ped(obj)) + } +) + +#' @rdname bit_size +setMethod("bit_size", "Ped", + function(obj) { + bit_size(dadid(obj), momid(obj), NA_character_) } ) \ No newline at end of file diff --git a/R/check_hints.R b/R/check_hints.R deleted file mode 100644 index 7b428f0a..00000000 --- a/R/check_hints.R +++ /dev/null @@ -1,60 +0,0 @@ -## Extracted from checks.Rnw - -#' Detect hints inconsistencies -#' -#' @description -#' This routine tries to detect inconsistencies in spousal hints. -#' -#' @details -#' These arise in `auto_hint()` with complex Pedigrees. -#' One can have ABA (subject A is on both the left and the right of B), -#' cycles, etc. -#' Users can introduce problems as well if they modify the hints. -#' -#' @inheritParams sex_to_factor -#' @inheritParams align -#' -#' @return Nothing, but will stop if there is a problem. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ht1 <- auto_hint(ped1) -#' check_hints(ht1, ped1$ped$sex) -#' -#' @seealso [auto_hint()], [best_hint()] -#' @keywords internal -#' @export -check_hints <- function(hints, sex) { - if (is.null(hints$order)) { - stop("Order component must be present in hints") - } - if (!is.numeric(hints$order)) { - stop("Order component must be numeric") - } - n <- length(sex) - if (length(hints$order) != n) { - stop("Length for order component should be equal to sex length") - } - sex <- as.character(sex_to_factor(sex)) - spouse <- hints$spouse - if (!is.null(spouse)) { - lspouse <- spouse[, 1] - rspouse <- spouse[, 2] - if (any(lspouse < 1 | lspouse > n | rspouse < 1 | rspouse > n)) { - stop("Invalid spouse value, should be between 1 and", n) - } - - temp1 <- (sex[lspouse] == "female" & sex[rspouse] == "male") - temp2 <- (sex[rspouse] == "female" & sex[lspouse] == "male") - if (!all(temp1 | temp2)) { - stop("A marriage is not male/female") - } - - hash <- n * pmax(lspouse, rspouse) + pmin(lspouse, rspouse) - # Turn off this check for now - is set off if someone is married to two - # siblings if (any(duplicated(hash))) stop('Duplicate marriage') - - # TODO Break any loops: A left of B, B left of C, C left of A. Not yet - # done - } -} diff --git a/R/data.R b/R/data.R index 63c843f1..3e32879c 100644 --- a/R/data.R +++ b/R/data.R @@ -2,7 +2,7 @@ #' #' @description Data from the Minnesota Breast Cancer Family Study. #' This contains extended pedigrees from 426 families, each identified by -#' a single proband in 1945-52, with follow up for incident breast cancer. +#' a single proband in 1945-1952, with follow up for incident breast cancer. #' #' @details The original study was conducted by Dr. Elving Anderson at the #' Dight Institute for Human Genetics at the University of Minnesota. @@ -11,11 +11,13 @@ #' siblings, offspring, aunts / uncles, and grandparents with the goal of #' understanding possible familial aspects of brest cancer. In 1991 the #' study was resurrected by Dr Tom Sellers. +#' #' Of the original 544 he excluded 58 prevalent cases, along with another 19 #' who had less than 2 living relatives at the time of Dr Anderson's survey. #' Of the remaining 462 families 10 had no living members, 23 could not be #' located and 8 refused, leaving 426 families on whom updated pedigrees #' were obtained. +#' #' This gave a study with 13351 males and 12699 females (5183 marry-ins). #' Primary questions were the relationship of early life exposures, breast #' density, and pharmacogenomics on incident breast cancer risk. @@ -29,34 +31,31 @@ #' @format A data frame with 28081 observations, one line per subject, on the #' following 14 variables. #' -#' - `id` subject identifier -#' - `proband` if 1, this subject is one of the original +#' - `id` : Subject identifier +#' - `proband` : If 1, this subject is one of the original #' 426 probands -#' - `fatherid` identifier of the father, if the father is part of +#' - `fatherid` : Identifier of the father, if the father is part of #' the data set; zero otherwise -#' - `motherid` identifier of the mother, if the mother is part of +#' - `motherid` : Identifier of the mother, if the mother is part of #' the data set; zero otherwise -#' - `famid` family identifier -#' - `endage` age at last follow-up or incident cancer -#' - `cancer` 1= breast cancer (females) or prostate cancer (males), -#' 0=censored -#' - `yob` year of birth -#' - `education` amount of education: 1-8 years, 9-12 years, high -#' school graduate, vocational education -#' beyond high school, some college but did not graduate, college graduate, -#' post-graduate education, refused to -#' answer on the questionnaire -#' - `marstat` marital status: married, living with someone in a +#' - `famid` : Family identifier +#' - `endage` : Age at last follow-up or incident cancer +#' - `cancer` : `1` = breast cancer (females) or prostate cancer (males), +#' `0` = censored +#' - `yob` : Year of birth +#' - `education` : Amount of education: 1-8 years, 9-12 years, high +#' school graduate, vocational education beyond high school, +#' some college but did not graduate, college graduate, +#' post-graduate education, refused to answer on the questionnaire +#' - `marstat` : Marital status: married, living with someone in a #' marriage-like relationship, separated #' or divorced, widowed, never married, refused to answer the questionaire -#' - `everpreg` ever pregnant: never pregnant at the time of -#' baseline survey, ever pregnant at the time -#' of baseline survey -#' - `parity` number of births -#' - `nbreast` number of breast biopsies -#' - `sex` M or F -#' - `bcpc` part of one of the families in the breast/prostate -#' cancer substudy: 0=no, 1=yes. +#' - `everpreg` : Ever pregnant at the time of baseline survey +#' - `parity` : Number of births +#' - `nbreast` : Number of breast biopsies +#' - `sex` : `M` or `F` +#' - `bcpc` : Part of one of the families in the breast / prostate +#' cancer substudy: `0` = no, `1` = yes. #' Note that subjects who were recruited to the overall study after the date of #' the BP substudy are coded as zero. #' @@ -89,15 +88,17 @@ #' cols_ren_ped = list( #' "indId" = "id", "fatherId" = "fatherid", #' "motherId" = "motherid", "gender" = "sex", "family" = "famid" -#' ) +#' ), missid = "0", col_aff = "cancer" #' ) -#' print(breastped) -#' #plot(breastped) #plot family 8, proband is solid, slash for cancers +#' summary(breastped) +#' scales(breastped) +#' #plot family 8, proband is solid, slash for cancers +#' #plot(breastped[famid(breastped) == "8"]) "minnbreast" -#' samplepedigree data +#' Sampleped data #' -#' @description Small sample pedigree data set. +#' @description Small sample pedigree data set for testing purposes. #' #' @details This is a small fictive pedigree data set, with 55 #' individuals in 2 families. @@ -105,15 +106,15 @@ #' #' @format A data frame with 55 observations, one line per subject, on the #' following 7 variables. -#' - `family` family identifier -#' - `id` subject identifier -#' - `dadid` identifier of the father, if the father is part of the +#' - `famid` : Family identifier +#' - `id` : Subject identifier +#' - `dadid` : Identifier of the father, if the father is part of the #' data set; zero otherwise -#' - `momid` identifier of the mother, if the mother is part of the +#' - `momid` : Identifier of the mother, if the mother is part of the #' data set; zero otherwise -#' - `sex` 1 for male or 2 for F -#' - `affected` 1 or 0 -#' - `available` 1 or 0 +#' - `sex` : `1` for male or `2` for female +#' - `affected` : `1` or `0` +#' - `avail` : `1` or `0` #' #' @usage #' data("sampleped") diff --git a/R/descendants.R b/R/descendants.R index a46d5008..8b362603 100644 --- a/R/descendants.R +++ b/R/descendants.R @@ -1,11 +1,13 @@ -#' Find all the descendants +#' Descendants of individuals #' #' @description #' Find all the descendants of a particular list of individuals -#' given a Pedigree +#' given a Pedigree object. #' +#' @param obj A Ped or Pedigree object or a vector of the +#' individuals identifiers. #' @inheritParams duporder -#' @inheritParams kinship +#' @inheritParams Ped #' #' @return #' Vector of all descendants of the individuals in idlist. @@ -15,24 +17,34 @@ #' data("sampleped") #' ped <- Pedigree(sampleped) #' descendants(c("1_101", "2_208"), ped) -#' @include pedigreeClass.R +#' @include AllClass.R #' @export #' @keywords internal -#' @docType methods +#' @usage NULL setGeneric("descendants", function(idlist, obj, ...) standardGeneric("descendants") ) #' @rdname descendants -#' @docType methods -#' @aliases descendants,character -setMethod("descendants", signature(idlist = "character", obj = "character"), +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "character_OR_integer"), function(idlist, obj, dadid, momid) { - id <- obj - child <- id[!(is.na(match(dadid, idlist)) & - is.na(match(momid, idlist)) + id <- as.character(obj) + idlist <- as.character(idlist) + + if (any(!idlist %in% id)) { + stop( + "All individuals in idlist should be in id ", + idlist[!idlist %in% id] ) - ] + } + + dadid <- as.character(dadid) + momid <- as.character(momid) + child <- id[!( + is.na(match(dadid, idlist)) & + is.na(match(momid, idlist)) + )] descend <- NULL while (length(child > 0)) { newchild <- id[!(is.na(match(dadid, child)) & @@ -47,10 +59,17 @@ setMethod("descendants", signature(idlist = "character", obj = "character"), ) #' @rdname descendants -#' @docType methods -#' @aliases descendants,Pedigree -setMethod("descendants", signature(idlist = "character", obj = "Pedigree"), +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "Pedigree"), + function(idlist, obj) { + descendants(idlist, ped(obj)) + } +) + +#' @rdname descendants +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "Ped"), function(idlist, obj) { - descendants(idlist, obj$ped$id, obj$ped$dadid, obj$ped$momid) + descendants(as.character(idlist), id(obj), dadid(obj), momid(obj)) } ) diff --git a/R/family_check.R b/R/family_check.R index ea0c6485..91afe72e 100644 --- a/R/family_check.R +++ b/R/family_check.R @@ -9,23 +9,22 @@ #' Given a family id vector, also compute the familial grouping from first #' principles using the parenting data, and compare the results. #' -#' The `make_famid` function is used to create a de novo family id from the +#' The [make_famid()] function is used to create a de novo family id from the #' parentage data, and this is compared to the family id given in the data. #' #' If there are any joins, then an attribute 'join' is attached. #' It will be a matrix with family as row labels, new-family-id as the columns, #' and the number of subjects as entries. #' -#' @inheritParams kinship -#' @param family A vector of family identifiers +#' @inheritParams Ped #' @param newfam The result of a call to `make_famid()`. If this has already #' been computed by the user, adding it as an argument shortens the running #' time somewhat. #' #' @return a data frame with one row for each unique family id in the -#' `family` argument or the one detected in the Pedigree object. +#' `famid` argument or the one detected in the Pedigree object. #' Components of the output are: -#' - `family` : The family id, as entered into the data set +#' - `famid` : The family id, as entered into the data set #' - `n` : Number of subjects in the family #' - `unrelated` : Number of them that appear to be unrelated to #' anyone else in the entire Pedigree. This is usually marry-ins with no @@ -53,45 +52,32 @@ #' rep(1, nrow(sampleped)))) #' fcheck.combined #' -#' # make person 120's father be her son. -#' sampleped[20, 3] <- 131 -#' fcheck1.bad <- try( -#' { -#' with(sampleped, family_check(id, father, mother, family)) -#' }, -#' silent = FALSE -#' ) -#' -#' ## fcheck1.bad is a try-error -#' -#' @seealso [make_famid()], [kinship()] -#' @include pedigreeClass.R +#' @seealso [make_famid()] +#' @include AllClass.R #' @keywords internal -#' @docType methods #' @export +#' @usage NULL setGeneric("family_check", signature = "obj", function(obj, ...) standardGeneric("family_check") ) #' @rdname family_check -#' @include make_famid.R -#' @aliases family_check,character #' @export -setMethod("family_check", "character", - function(obj, dadid, momid, family, newfam) { +setMethod("family_check", "character_OR_integer", + function(obj, dadid, momid, famid, newfam) { id <- obj - if (is.numeric(family) && any(is.na(family))) { + if (is.numeric(famid) && any(is.na(famid))) { stop("Family id of missing not allowed") } - nfam <- length(unique(family)) + nfam <- length(unique(famid)) if (missing(newfam)) { newfam <- make_famid(id, dadid, momid) - } else if (length(newfam) != length(family)) { + } else if (length(newfam) != length(famid)) { stop("Invalid length for newfam") } - xtab <- table(family, newfam) + xtab <- table(famid, newfam) if (any(newfam == 0)) { unrelated <- xtab[, 1] xtab <- xtab[, -1, drop = FALSE] @@ -105,8 +91,8 @@ setMethod("family_check", "character", temp <- apply((xtab > 0) * outer(rep(1, nfam), joins - 1), 1, sum) - out <- data.frame(family = dimnames(xtab)[[1]], - n = as.vector(table(family)), unrelated = as.vector(unrelated), + out <- data.frame(famid = dimnames(xtab)[[1]], + n = as.vector(table(famid)), unrelated = as.vector(unrelated), split = as.vector(splits), join = temp, row.names = seq_len(nfam) ) if (any(joins > 1)) { @@ -121,10 +107,17 @@ setMethod("family_check", "character", ) #' @rdname family_check -#' @docType methods -#' @aliases family_check,Pedigree +#' @export setMethod("family_check", "Pedigree", function(obj) { - family_check(obj$ped$id, obj$ped$dadid, obj$ped$momid, obj$ped$family) + family_check(ped(obj)) + } +) + +#' @rdname family_check +#' @export +setMethod("family_check", "Ped", + function(obj) { + family_check(id(obj), dadid(obj), momid(obj), famid(obj)) } ) diff --git a/R/find_avail_affected.R b/R/find_avail_affected.R index 55c209b2..1b88972e 100644 --- a/R/find_avail_affected.R +++ b/R/find_avail_affected.R @@ -1,26 +1,27 @@ -# Automatically generated from all.nw using noweb - -#' Find a single person to trim from a Pedigree whose is available +#' Find single affected and available individual from a Pedigree #' #' @description #' Finds one subject from among available non-parents with indicated affection #' status. #' #' @details -#' When used within pedigree.shrink, this function is called with the first +#' When used within [shrink()], this function is called with the first #' affected indicator, if the affected item in the Pedigree is a matrix of #' multiple affected indicators. #' +#' If **avail** or **affected** is null, then the function will use the +#' corresponding Ped accessor. +#' #' @param affstatus Affection status to search for. -#' @inheritParams align -#' @inheritParams is_informative +#' @param obj A Ped or Pedigree object. +#' @inheritParams Ped #' #' @return A list is returned with the following components -#' - ped The new Pedigree object +#' - ped The new Ped object #' - newAvail Vector of availability status of trimmed individuals #' - idTrimmed Vector of IDs of trimmed individuals -#' - isTrimmed logical value indicating whether Pedigree has been trimmed -#' - bit_size Bit size of the trimmed Pedigree +#' - isTrimmed logical value indicating whether Ped object has been trimmed +#' - bit_size Bit size of the trimmed Ped #' #' @examples #' data(sampleped) @@ -30,61 +31,83 @@ #' @include bit_size.R #' @include utils.R #' @include find_unavailable.R +#' @keywords internal, shrink #' @export -find_avail_affected <- function(ped, avail = ped(ped)$avail, affstatus = NA) { - ped_df <- ped(ped) - ped_df$avail <- avail - not_parent <- !is_parent(ped_df$id, ped_df$dadid, ped_df$momid) +#' @usage NULL +setGeneric("find_avail_affected", signature = "obj", + function(obj, ...) standardGeneric("find_avail_affected") +) - if (is.na(affstatus)) { - possibl_trim <- ped_df$id[not_parent & avail == 1 & - is.na(ped_df$affected) - ] - } else { - possibl_trim <- ped_df$id[not_parent & avail == 1 & - ped_df$affected == affstatus - ] - } - n_trim <- length(possibl_trim) +#' @rdname find_avail_affected +#' @export +setMethod("find_avail_affected", "Ped", + function(obj, avail = NULL, affected = NULL, affstatus = NA) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + affected <- affected(obj) + } + not_parent <- !is_parent(id(obj), dadid(obj), momid(obj)) - if (n_trim == 0) { - return(list( - ped = ped, id_trimmed = NA, is_trimmed = FALSE, - bit_size = bit_size(ped)$bit_size - )) - } + if (is.na(affstatus)) { + possibl_trim <- id(obj)[not_parent & avail == 1 & + is.na(affected) + ] + } else { + possibl_trim <- id(obj)[not_parent & avail == 1 & + affected == affstatus + ] + } + n_trim <- length(possibl_trim) - trim_dat <- NULL + if (n_trim == 0) { + return(list( + ped = obj, id_trimmed = NA, is_trimmed = FALSE, + bit_size = bit_size(obj)$bit_size + )) + } - for (id_trim in possibl_trim) { - tmp_avail <- avail - tmp_avail[ped_df$id == id_trim] <- FALSE - id_rm <- find_unavailable(ped, tmp_avail) - new_ped <- trim(ped, id_rm) - trim_dat <- rbind(trim_dat, c(id = id_trim, - bit_size = bit_size(new_ped)$bit_size - )) - } + trim_dat <- NULL - bits <- trim_dat[, 2] + for (id_trim in possibl_trim) { + tmp_avail <- avail + tmp_avail[id(obj) == id_trim] <- FALSE + id_rm <- find_unavailable(obj, tmp_avail) + new_ped <- subset(obj, id_rm, keep = FALSE, del_parents = TRUE) + trim_dat <- rbind(trim_dat, c(id = id_trim, + bit_size = bit_size(new_ped)$bit_size + )) + } - # trim by subject with min bits. This trims fewer subject than using - # max(bits). - id_trim <- trim_dat[, 1][bits == min(bits)] + bits <- trim_dat[, 2] - ## break ties by random choice - if (length(id_trim) > 1) { - rord <- order(runif(length(id_trim))) - id_trim <- id_trim[rord][1] - } + # trim by subject with min bits. This trims fewer subject than using + # max(bits). + id_trim <- trim_dat[, 1][bits == min(bits)] + + ## break ties by random choice + if (length(id_trim) > 1) { + rord <- order(runif(length(id_trim))) + id_trim <- id_trim[rord][1] + } + + avail[id(obj) == id_trim] <- FALSE + id_rm <- find_unavailable(obj, avail) + new_ped <- subset(obj, id_rm, keep = FALSE, del_parents = TRUE) + new_size <- bit_size(new_ped)$bit_size + avail <- avail[!(id(obj) %in% id_rm)] - avail[ped_df$id == id_trim] <- FALSE - id_rm <- find_unavailable(ped, avail) - new_ped <- trim(ped, id_rm) - new_size <- bit_size(new_ped)$bit_size - avail <- avail[!(ped_df$id %in% id_rm)] + list(ped = new_ped, new_avail = avail, id_trimmed = id_trim, + is_trimmed = TRUE, bit_size = new_size + ) + } +) - list(ped = new_ped, new_avail = avail, id_trimmed = id_trim, - is_trimmed = TRUE, bit_size = new_size - ) -} +#' @rdname find_avail_affected +#' @export +setMethod("find_avail_affected", "Pedigree", + function(obj, avail = NULL, affected = NULL, affstatus = NA) { + find_avail_affected(ped(obj), avail, affected, affstatus) + } +) diff --git a/R/find_avail_noninform.R b/R/find_avail_noninform.R index bff4c84d..35d5907a 100644 --- a/R/find_avail_noninform.R +++ b/R/find_avail_noninform.R @@ -1,18 +1,17 @@ -# Automatically generated from all.nw using noweb - #' Find uninformative but available subject #' -#' @details -#' Find subjects from a Pedigree who are available and uninformative +#' Finds subjects from among available non-parents with all affection +#' equal to `0`. #' #' @details #' Identify subjects to remove from a Pedigree who are available but -#' non-informative. This is the second step to remove subjects in -#' pedigree.shrink if the Pedigree does not meet the desired bit size. +#' non-informative (unaffected). This is the second step to remove subjects in +#' [shrink()] if the Pedigree does not meet the desired bit size. +#' +#' If **avail** or **affected** is null, then the function will use the +#' corresponding Ped accessor. #' -#' @inheritParams align -#' @inheritParams is_informative -#' @inheritParams is_parent +#' @inheritParams find_avail_affected #' #' @examples #' data(sampleped) @@ -23,27 +22,50 @@ #' informativeness. #' #' @seealso [shrink()] +#' @keywords internal, shrink +#' @export +#' @usage NULL +setGeneric("find_avail_noninform", signature = "obj", + function(obj, ...) standardGeneric("find_avail_noninform") +) + +#' @rdname find_avail_noninform #' @export -find_avail_noninform <- function(ped, avail = ped(ped)$avail, missid = "0") { - ## trim persons who are available but not informative b/c not parent by - ## setting their availability to FALSE, then call find_unavailable() JPS - ## 3/10/14 add strings check in case of char ids - ped_df <- ped(ped) - ped_df$avail <- avail +setMethod("find_avail_noninform", "Ped", + function(obj, avail = NULL, affected = NULL) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + ## TODO affected() may need to give back data.frame + affected <- affected(obj) + } + check_parent <- is_parent(id(obj), dadid(obj), momid(obj)) - check_parent <- is_parent(ped_df$id, ped_df$dadid, ped_df$momid) - for (i in seq_along(nrow(ped_df))) { - if (check_parent[i] == FALSE && avail[i] == 1 && - all(ped_df$affected[i] == 0, na.rm = TRUE)) { - ## could use ped$affected[i,] if keep matrix - fa <- ped_df$dadid[i] - mo <- ped_df$momid[i] - if (avail[ped_df$id == fa] && avail[ped_df$id == mo] || - fa == missid || mo == missid) { - ped_df$avail[i] <- FALSE + # For each individual if not a parent and unaffected + # Set its avail to FALSE if both parent avail + # or if one is absent + for (i in seq_along(length(obj))) { + if (check_parent[i] == FALSE && avail[i] == 1 && + all(affected[i] == 0, na.rm = TRUE)) { + ## could use ped$affected[i,] if keep matrix + fa <- dadid(obj)[i] + mo <- momid(obj)[i] + if (avail[id(obj) == fa] && avail[id(obj) == mo] || + is.na(fa) || is.na(mo)) { + avail[i] <- FALSE + } } } + + find_unavailable(obj, avail) } +) - find_unavailable(ped, ped_df$avail) -} +#' @rdname find_avail_noninform +#' @export +setMethod("find_avail_noninform", "Pedigree", + function(obj, avail = NULL, affected = NULL) { + find_avail_noninform(ped(obj), avail, affected) + } +) diff --git a/R/find_unavailable.R b/R/find_unavailable.R index a8f55101..d2785259 100644 --- a/R/find_unavailable.R +++ b/R/find_unavailable.R @@ -1,13 +1,14 @@ -# Automatically generated from all.nw using noweb - #' Find unavailable subjects in a Pedigree #' #' @description -#' Find the ID of subjects in a Pedigree iteratively, as anyone who is not -#' available and does not have an available descendant by successively removing -#' unavailable terminal nodes. +#' Find the identifiers of subjects in a Pedigree iteratively, +#' as anyone who is not available and does not have an available +#' descendant by successively removing unavailable terminal nodes. #' #' @details +#' If **avail** is null, then the function will use the +#' corresponding Ped accessor. +#' #' Originally written as pedTrim by Steve Iturria, modified by Dan Schaid 2007, #' and now split into the two separate functions: #' `find_unavailable()`, and `trim()` to do the tasks separately. @@ -19,77 +20,99 @@ #' If the subject ids are character, make sure none of the characters in the #' ids is a colon (":"), which is a special character #' used to concatenate and split subjects within the utility. +#' The `trim()` functions is now replaced by the `subset()` function. #' #' @inheritParams find_avail_affected #' #' @return Returns a vector of subject ids for who can be #' removed. #' -#' @section Side Effects: relation matrix from `trim` is trimmed of any +#' @section Side Effects: +#' Relation matrix from subsetting is trimmed of any #' special relations that include the subjects to trim. #' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) #' find_unavailable(ped1) #' #' @seealso [shrink()] +#' @keywords internal, shrink #' @include utils.R #' @export -find_unavailable <- function(ped, avail = ped(ped)$avail) { - ## find id within Pedigree anyone who is not available and - ## does not have an available descendant - - ## avail = TRUE/1 if available, FALSE/0 if not +#' @usage NULL +setGeneric("find_unavailable", signature = "obj", + function(obj, ...) standardGeneric("find_unavailable") +) - ## will do this iteratively by successively removing unavailable - ## terminal nodes - ## Steve Iturria, PhD, modified by Dan Schaid - df <- ped(ped) - df$avail <- avail - cont <- TRUE # flag for whether to keep iterating - - df$is_terminal <- (is_parent(df$id, df$dadid, df$momid) == FALSE) - ## JPS 3/10/14 add strings check in case of char ids - while (cont) { - id_to_remove <- df$id[df$is_terminal & df$avail == 0] - if (length(id_to_remove) > 0) { - idx_to_remove <- match(id_to_remove, df$id) - df <- df[-idx_to_remove, ] - df$is_terminal <- - (is_parent(df$id, df$dadid, df$momid) == FALSE) - } else { - cont <- FALSE +#' @rdname find_unavailable +#' @export +setMethod("find_unavailable", "Ped", + function(obj, avail = NULL) { + if (is.null(avail)) { + avail <- avail(obj) } - } + ## find id within Pedigree anyone who is not available and + ## does not have an available descendant + + ## avail = TRUE/1 if available, FALSE/0 if not + + ## will do this iteratively by successively removing unavailable + ## terminal nodes + ## Steve Iturria, PhD, modified by Dan Schaid + + cont <- TRUE # flag for whether to keep iterating + + is_terminal <- (is_parent(obj) == FALSE) + ## JPS 3/10/14 add strings check in case of char ids + obji <- obj + avail(obji) <- avail + while (cont) { + id_to_remove <- id(obji)[is_terminal & avail(obji) == 0] + if (length(id_to_remove) > 0) { + obji <- subset(obji, id_to_remove, keep = FALSE) + is_terminal <- (is_parent(obji) == FALSE) + } else { + cont <- FALSE + } + } + ## A few more clean up steps - ## A few more clean up steps + ## remove unavailable founders + tmp_ped <- exclude_unavail_founders( + id(obji), dadid(obji), momid(obji), avail(obji) + ) - ## remove unavailable founders - tmp_ped <- exclude_unavail_founders( - df$id, - df$dadid, df$momid, df$avail - ) + ## remove stray marry-ins + tmp_ped <- exclude_stray_marryin( + tmp_ped$id, tmp_ped$dadid, tmp_ped$momid + ) - ## remove stray marry-ins - tmp_ped <- exclude_stray_marryin(tmp_ped$id, tmp_ped$dadid, tmp_ped$momid) + id(obj)[is.na(match(id(obj), tmp_ped$id))] + } +) - ped(ped)$id[is.na(match(ped(ped)$id, tmp_ped$id))] -} +#' @rdname find_unavailable +#' @export +setMethod("find_unavailable", "Pedigree", + function(obj, avail = NULL) { + find_unavailable(ped(obj), avail) + } +) #' Exclude stray marry-ins #' #' @description -#' Exclude from a Pedigree any founders who are not parents. +#' Exclude any founders who are not parents. #' -#' @param id Vector of subject identifiers -#' @inheritParams descendants +#' @inheritParams exclude_unavail_founders #' #' @return #' Returns a data frame of subject identifiers and their parents. #' The data frame is trimmed of any founders who are not parents. #' -#' @keywords internal +#' @keywords internal, shrink +#' @seealso [shrink()] exclude_stray_marryin <- function(id, dadid, momid) { # get rid of founders who are not parents (stray available marryins # who are isolated after trimming their unavailable offspring) @@ -108,14 +131,10 @@ exclude_stray_marryin <- function(id, dadid, momid) { #' Exclude unavailable founders #' #' @description -#' Exclude from a Pedigree any unavailable founders. +#' Exclude any unavailable founders. #' -#' @param id Vector of subject identifiers -#' @inheritParams descendants -#' @inheritParams find_avail_affected -#' @param missid Character defining the missing ids -#' -#' @keywords internal +#' @param id A character vector with the identifiers of each individuals +#' @inheritParams Ped #' #' @return #' Returns a list with the following components: @@ -124,11 +143,16 @@ exclude_stray_marryin <- function(id, dadid, momid) { #' - id Vector of subject identifiers #' - dadid Vector of father identifiers #' - momid Vector of mother identifiers -exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { +#' +#' @keywords internal, shrink +#' @seealso [shrink()] +exclude_unavail_founders <- function( + id, dadid, momid, avail, missid = NA_character_ +) { n_old <- length(id) ## zed = TRUE if both parents are present - zed <- dadid != missid & momid != missid + zed <- (!dadid %in% missid) & (!momid %in% missid) ## concat ids to represent marriages. ## Bug if there is ":" in char subj ids marriage <- paste(dadid[zed], momid[zed], sep = ":") @@ -182,8 +206,10 @@ exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { mom <- mom[zed] for (i in seq_len(n)) { ## check if mom and dad are founders (where their parents = 0) - dad_f <- (dadid[id == dad[i]] == 0) & (momid[id == dad[i]] == 0) - mom_f <- (dadid[id == mom[i]] == 0) & (momid[id == mom[i]] == 0) + dad_f <- (dadid[id == dad[i]] %in% missid) & + (momid[id == dad[i]] %in% missid) + mom_f <- (dadid[id == mom[i]] %in% missid) & + (momid[id == mom[i]] %in% missid) both_f <- dad_f & mom_f ## check if mom and dad have avail @@ -191,7 +217,7 @@ exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { mom_avail <- avail[id == mom[i]] ## define both_unavail = T if both mom & dad not avail - both_unavail <- (dad_avail == FALSE & mom_avail == FALSE) + both_unavail <- (dad_avail == 0 & mom_avail == 0) if (both_f && both_unavail) { ## remove mom and dad from ped, and zero-out parent diff --git a/R/fix_parents.R b/R/fix_parents.R index 32cd2bd5..d2bf0050 100644 --- a/R/fix_parents.R +++ b/R/fix_parents.R @@ -1,10 +1,10 @@ #' @importFrom stringr str_split_i NULL -#' Fix details on the parents for children of the Pedigree +#' Fix parents relationship and gender #' #' @description -#' Fix the sex of parents, add parents that are missing from the Pedigree +#' Fix the sex of parents, add parents that are missing from the data. #' Can be used with a dataframe or a vector of the #' different individuals informations. #' @@ -12,24 +12,21 @@ NULL #' First look to add parents whose ids are given in momid/dadid. Second, fix #' sex of parents. Last look to add second parent for children for whom only #' one parent id is given. -#' If a family vector is given the family id will be added to the ids of all -#' individuals (id, dadid, momid) separated by an underscore befor proceeding. +#' If a **famid** vector is given the family id will be added to the ids of all +#' individuals (`id`, `dadid`, `momid`) +#' separated by an underscore before proceeding. #' #' ## Special case for dataframe -#' Check for presence of both parents id in the `id` field. -#' If not both presence behaviour depend of `delete` parameter -#' - If TRUE then use fix_parents function and merge back the other fields +#' Check for presence of both parents id in the **id** field. +#' If not both presence behaviour depend of **delete** parameter +#' - If `TRUE` then use fix_parents function and merge back the other fields #' in the dataframe then set availability to O for non available parents. -#' - If FALSE then delete the id of missing parents +#' - If `FALSE` then delete the id of missing parents #' -#' @param family Optional family identification set it to NULL to invalidate. -#' If used it will modify the ids of the individuals by pasting it with an _. -#' @inheritParams kinship -#' @inheritParams is_parent -#' @inheritParams sex_to_factor +#' @inheritParams Ped #' @param obj A data.frame or a vector of the individuals identifiers. If a #' dataframe is given it must contain the columns `id`, `dadid`, -#' `momid`, `sex` and `family`. Family is optional. +#' `momid`, `sex` and `famid` (optional). #' #' @return A data.frame with id, dadid, momid, sex as columns with the #' relationships fixed. @@ -50,22 +47,21 @@ NULL #' ) #' test1newmom <- with(test1char, fix_parents(id, father, mother, #' sex, -#' missid = '0' +#' missid = NA_character_ #' )) -#' newped <- Pedigree(test1newmom) -#' as.data.frame(newped) +#' Pedigree(test1newmom) #' #' @author Jason Sinnwell #' @export +#' @usage NULL setGeneric("fix_parents", signature = "obj", function(obj, ...) standardGeneric("fix_parents") ) -#' @export #' @rdname fix_parents -#' @aliases fix_parents,character +#' @export setMethod("fix_parents", "character", function( - obj, dadid, momid, sex, family = NULL, missid = "0" + obj, dadid, momid, sex, famid = NULL, missid = NA_character_ ) { ## fix sex of parents add parents that are missing n <- length(obj) @@ -79,7 +75,8 @@ setMethod("fix_parents", "character", function( if (length(sex) != n) { stop("Mismatched lengths, id and sex") } - if (length(family) != n & length(family) > 0) { + + if (length(famid) != n & length(famid) > 0) { stop("Mismatched lengths, id and sex") } @@ -96,20 +93,23 @@ setMethod("fix_parents", "character", function( } else if (mean(sex == 3) > 0.25) { warning("More than 25% of the gender values are 'unknown'") } - if (any(is.na(id) | id == missid)) { + if (any(is.na(id) | id %in% missid)) { stop("Missing value for the id variable") } - id <- prefix_famid(family, id, missid) - dadid <- prefix_famid(family, dadid, missid) - momid <- prefix_famid(family, momid, missid) + if (!is.null(famid)) { + id <- upd_famid_id(id, famid, missid) + dadid <- upd_famid_id(dadid, famid, missid) + momid <- upd_famid_id(momid, famid, missid) + } + addids <- paste("addin", seq_along(id), sep = "-") if (length(grep("^ *$", id)) > 0) { stop("A blank or empty string is not allowed as the id variable") } - nofather <- (is.na(dadid) | dadid == missid) - nomother <- (is.na(momid) | momid == missid) + nofather <- (is.na(dadid) | dadid %in% missid) + nomother <- (is.na(momid) | momid %in% missid) if (any(duplicated(id))) { duplist <- id[duplicated(id)] msg_nb <- min(length(duplist), 6) @@ -117,20 +117,21 @@ setMethod("fix_parents", "character", function( } findex <- match(dadid, id, nomatch = 0) mindex <- match(momid, id, nomatch = 0) + ## dadid given, not found id, so add if (any(findex == 0 & !nofather)) { dadnotfound <- unique(dadid[which(findex == 0 & !nofather)]) id <- c(id, dadnotfound) sex <- c(sex, rep(1, length(dadnotfound))) - dadid <- c(dadid, rep(0, length(dadnotfound))) - momid <- c(momid, rep(0, length(dadnotfound))) + dadid <- c(dadid, rep(missid, length(dadnotfound))) + momid <- c(momid, rep(missid, length(dadnotfound))) } if (any(mindex == 0 & !nomother)) { momnotfound <- unique(momid[which(mindex == 0 & !nomother)]) id <- c(id, momnotfound) sex <- c(sex, rep(2, length(momnotfound))) - dadid <- c(dadid, rep(0, length(momnotfound))) - momid <- c(momid, rep(0, length(momnotfound))) + dadid <- c(dadid, rep(missid, length(momnotfound))) + momid <- c(momid, rep(missid, length(momnotfound))) } if (any(sex[mindex] != 1)) { dadnotmale <- unique((id[findex])[sex[findex] != 1]) @@ -150,8 +151,8 @@ setMethod("fix_parents", "character", function( dadid[nodad_idx] <- addids[seq_along(nodad_idx)] id <- c(id, addids[seq_along(nodad_idx)]) sex <- c(sex, rep(1, length(nodad_idx))) - dadid <- c(dadid, rep(0, length(nodad_idx))) - momid <- c(momid, rep(0, length(nodad_idx))) + dadid <- c(dadid, rep(missid, length(nodad_idx))) + momid <- c(momid, rep(missid, length(nodad_idx))) } ## children with dad in ped, mom missing addids <- addids[!(addids %in% id)] @@ -160,16 +161,16 @@ setMethod("fix_parents", "character", function( momid[nodad_idx] <- addids[seq_along(nodad_idx)] id <- c(id, addids[seq_along(nodad_idx)]) sex <- c(sex, rep(2, length(nodad_idx))) - dadid <- c(dadid, rep(0, length(nodad_idx))) - momid <- c(momid, rep(0, length(nodad_idx))) + dadid <- c(dadid, rep(missid, length(nodad_idx))) + momid <- c(momid, rep(missid, length(nodad_idx))) } - if (is.null(family)) { + if (is.null(famid)) { data.frame(id = id, momid = momid, dadid = dadid, sex = sex) } else { - family <- stringr::str_split_i(id, "_", i = 1) + famid <- make_famid(id, dadid, momid) data.frame( id = id, momid = momid, dadid = dadid, - sex = sex, family = family + sex = sex, famid = famid ) } }) @@ -179,14 +180,13 @@ setMethod("fix_parents", "character", function( #' - `FALSE` : be deleted #' @param filter Filtering column containing `0` or `1` for the #' rows to kept before proceeding. -#' -#' @export #' @rdname fix_parents +#' @export setMethod("fix_parents", "data.frame", function( - obj, delete = FALSE, filter = NULL, missid = "0" + obj, delete = FALSE, filter = NULL, missid = NA_character_ ) { cols_needed <- c("id", "dadid", "momid", "sex", filter) - df <- check_columns(obj, cols_needed, NULL, "family", others_cols = TRUE, + df <- check_columns(obj, cols_needed, NULL, "famid", others_cols = TRUE, cols_to_use_init = TRUE ) df_old <- df @@ -202,18 +202,18 @@ setMethod("fix_parents", "data.frame", function( # One of the parents doesn't not have a line in id dad_present <- match(df$dadid, df$id, nomatch = missid) mom_present <- match(df$momid, df$id, nomatch = missid) - df[dad_present == missid | - mom_present == missid, c("momid", "dadid") + df[dad_present %in% missid | + mom_present %in% missid, c("momid", "dadid") ] <- missid } df_fix <- fix_parents( df$id, df$dadid, df$momid, - df$sex, missid = missid, family = df$family + df$sex, missid = missid, famid = df$famid ) col_used <- which(names(df_old) == "momid" | names(df_old) == "dadid" | names(df_old) == "sex" | - names(df_old) == "family" + names(df_old) == "famid" ) df <- merge(df_old[, -col_used], df_fix, by = "id", all.y = TRUE, all.x = FALSE diff --git a/R/generate_aff_inds.R b/R/generate_aff_inds.R index e9dd61dc..4ca5c67f 100644 --- a/R/generate_aff_inds.R +++ b/R/generate_aff_inds.R @@ -1,11 +1,25 @@ #' @importFrom plyr revalue NULL -#' Process the information for affection +#' Process the affection informations #' -#' @details Perform transformation uppon a column given as the one -#' containing affection status to get an `affected` column usable for -#' the rest of the script +#' @description Perform transformation uppon a vector given as the one +#' containing the affection status to obtain an `affected` binary state. +#' +#' @details This function helps to configure a binary state from a character or +#' numeric variable. +#' +#' ## If the variable is a `character` or a `factor`: +#' +#' In this case the affected state will depend on the modality provided as +#' an affected status. All individuals with a value corresponding to one of the +#' element in the vector **mods_aff** will be considered as affected. +#' +#' ## If the variable is `numeric`: +#' +#' In this case the affected state will be `TRUE` if the value of the individual +#' is above the **threshold** if **sup_thres_aff** is `TRUE` and `FALSE` +#' otherwise. #' #' @param values Vector containing the values of the column to process. #' @param mods_aff Vector of modality to consider as affected in the case @@ -28,6 +42,8 @@ NULL #' @examples #' generate_aff_inds(c(1, 2, 3, 4, 5), threshold = 3, sup_thres_aff = TRUE) #' generate_aff_inds(c("A", "B", "C", "A", "V", "B"), mods_aff = c("A", "B")) +#' @author Louis Le Nézet +#' @keywords generate_scales #' @export generate_aff_inds <- function(values, mods_aff = NULL, threshold = NULL, sup_thres_aff = NULL @@ -58,7 +74,7 @@ generate_aff_inds <- function(values, mods_aff = NULL, } else { # Separate for factors by levels mods_non_aff <- levels(droplevels(as.factor( - values[!values %in% mods_aff] + values[!values %in% as.character(mods_aff)] ))) if (length(mods_non_aff) == 0) { mods_non_aff <- "None" @@ -79,7 +95,6 @@ generate_aff_inds <- function(values, mods_aff = NULL, names(aff_to_use) <- c(aff_lab, healthy_lab) labels <- revalue(as.character(mods), labels_to_use, warn_missing = FALSE) - affected <- revalue(labels, aff_to_use, warn_missing = FALSE) + affected <- as.logical(revalue(labels, aff_to_use, warn_missing = FALSE)) as.data.frame(list(mods = mods, labels = labels, affected = affected)) } -TRUE diff --git a/R/generate_colors.R b/R/generate_colors.R index fde099c1..ae740034 100644 --- a/R/generate_colors.R +++ b/R/generate_colors.R @@ -1,13 +1,33 @@ #' @importFrom plyr revalue NULL -#' Process the colors based on affection +#' Process the filling colors based on affection #' -#' @details Perform transformation uppon a column given as the one +#' @description Perform transformation uppon a column given as the one #' containing affection status to compute the filling color. #' +#' @details The colors will be set using the +#' [grDevices::colorRampPalette()] function +#' with the colors given as parameters. +#' +#' The colors will be set as follow: +#' +#' - If **keep_full_scale** is `FALSE`: +#' Then the affected individuals will get the first color of the +#' **colors_aff** vector and the unaffected individuals will get the +#' first color of the **colors_unaff** vector. +#' - If **keep_full_scale** is `TRUE`: +#' - If **values** isn't numeric: +#' Each levels of the affected **values** vector will get it's own color from +#' the **colors_aff** vector using the [grDevices::colorRampPalette()] and +#' the same will be done for the unaffected individuals using the +#' **colors_unaff**. +#' - If **values** is numeric: +#' The mean of the affected individuals will be compared to the mean of the +#' unaffected individuals and the colors will be set up such as the color +#' gradient follow the direction of the affection. +#' #' @param values The vector containing the values to process as affection. -#' @param affected The vector containing the affection status TRUE/FALSE. #' @param labels The vector containing the labels to use for the affection. #' @param keep_full_scale Boolean defining if the affection values need to #' be set as a scale. If `values` is numeric the filling scale will be @@ -20,15 +40,20 @@ NULL #' affected individuls. #' @param colors_unaff Set of increasing colors to use for the filling of the #' unaffected individuls. +#' @inheritParams Ped #' -#' @return A list of two elements -#' - The processed values column as a numeric factor -#' - A dataframe containing the description of each modality of the scale +#' @return A list of three elements +#' - `mods` : The processed values column as a numeric factor +#' - `affected` : A logical vector indicating if the individual is affected +#' - `sc_fill` : A dataframe containing the description of each modality of the +#' scale #' #' @examples #' aff <- generate_aff_inds(seq_len(5), threshold = 3, sup_thres_aff = TRUE) #' generate_fill(seq_len(5), aff$affected, aff$labels) #' generate_fill(seq_len(5), aff$affected, aff$labels, keep_full_scale = TRUE) +#' +#' @keywords generate_scales #' @export generate_fill <- function( values, affected, labels, @@ -48,7 +73,6 @@ generate_fill <- function( } mods <- fill <- rep(NA, n) - # Affection modality previously used scale <- unique(as.data.frame( list(mods_aff = labels, affected = affected, fill = fill) @@ -63,8 +87,11 @@ generate_fill <- function( # last of aff fill_to_use <- c(colors_unaff[1], colors_aff[-1], "grey") names(fill_to_use) <- c("FALSE", "TRUE", NA) - fill <- revalue(affected, fill_to_use, warn_missing = FALSE) - mods <- revalue(affected, c("FALSE" = 0, "TRUE" = 1), + fill <- revalue( + as.character(affected), fill_to_use, warn_missing = FALSE + ) + mods <- revalue( + as.character(affected), c("FALSE" = 0, "TRUE" = 1), warn_missing = FALSE ) } else { @@ -130,128 +157,140 @@ generate_fill <- function( fill[is.na(fill)] <- "grey" mods <- as.numeric(mods) - scale <- unique(as.data.frame( + sc_fill <- unique(as.data.frame( list( mods = mods, labels = labels, affected = affected, fill = fill, - density = rep(NA, n), angle = rep(NA, n) + density = rep(NA_integer_, n), angle = rep(NA_integer_, n) ) )) - - list(mods = mods, fill_scale = scale) + list(mods = mods, affected = affected, sc_fill = sc_fill) } -#' Process the colors based on affection and availability +#' Process the border colors based on availability #' -#' @details Perform transformation uppon a column given as the one +#' @description Perform transformation uppon a vector given as the one #' containing the availability status to compute the border color. +#' The vector given will be transformed using the [vect_to_binary()] +#' function. #' -#' @param avail The vector containing the availability status. -#' The values need to be numeric and can only be 0, 1 or NA. +#' @param values The vector containing the values to process as available. #' @param colors_avail Set of 2 colors to use for the box's border of an -#' individual. The first color will be used for available individual (avail -#' == 1) and the second for the unavailable individual (avail == 0). +#' individual. The first color will be used for available individual +#' (`avail == 1`) and the second for the unavailable individual +#' (`avail == 0`). #' -#' @return A dataframe containing the scale to use for the availability -#' status. +#' @return A list of three elements +#' - `mods` : The processed values column as a numeric factor +#' - `avail` : A logical vector indicating if the individual is available +#' - `sc_bord` : A dataframe containing the description of each modality of the +#' scale #' #' @examples #' generate_border(c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA)) #' +#' @keywords generate_scales #' @export -generate_border <- function(avail, colors_avail = c("green", "black")) { - if (length(avail) > 0) { - if (! is.numeric(avail) && ! all(is.na(avail))) { - stop("Available variable need to be numeric") - } - - if (!all(levels(as.factor(avail)) %in% c("0", "1", "NA"))) { - stop("Available variable need to have only 0, 1 or NA") - } - } - +generate_border <- function(values, colors_avail = c("green", "black")) { # Set border colors if (length(colors_avail) != 2) { stop("Variable `colors_avail` need to be a vector of 2 colors") } - as.data.frame(list( + mods <- vect_to_binary(values) + avail <- vect_to_binary(values, logical = TRUE) + + sc_bord <- data.frame( column = "avail", mods = c(NA, 1, 0), border = c("grey", colors_avail[1], colors_avail[2]), labels = c("NA", "Available", "Non Available") - )) + ) + + list(mods = mods, avail = avail, sc_bord = sc_bord) } -#' Process the colors based on affection and availability +#' Process the filling and border colors based on affection and availability #' -#' @details Perform transformation uppon a dataframe given to compute +#' @description Perform transformation uppon a dataframe given to compute #' the colors for the filling and the border of the individuals based #' on the affection and availability status. #' +#' @details The colors will be set using the [generate_fill()] and the +#' [generate_border()] functions respectively for the filling and the border. +#' #' @param obj A Pedigree object or a vector containing the affection status for -#' each individuals. The affection status can be numeric, logical or character. -#' @param col_avail The name of the column containing the availability status. -#' @inheritParams is_informative +#' each individuals. The affection status can be numeric or a character. #' @inheritParams generate_fill #' @inheritParams generate_border #' @inheritParams generate_aff_inds +#' @inheritParams Ped #' #' @return #' ## When used with a vector -#' A list of three elements -#' - A vector containing the transformed filling modalities -#' - A dataframe containing the description of each filling modalities -#' - A dataframe containing the description of the border modalities +#' +#' A list of two elements +#' - The list containing the filling colors processed and their description +#' - The list containing the border colors processed and their description #' #' ## When used with a Pedigree object -#' The Pedigree object with the `affected` and `avail` columns -#' processed accordingly. #' -#' The Pedigree scales slots are updated +#' The Pedigree object with the `affected` and `avail` columns +#' processed accordingly as well as the `scales` slot updated. #' -#' @examples -#' data("sampleped") -#' ped <- Pedigree(sampleped) -#' generate_colors(ped, "affected", add_to_scale=FALSE)$scales +#' @keywords generate_scales #' @export +#' @usage NULL setGeneric("generate_colors", signature = "obj", function(obj, ...) standardGeneric("generate_colors") ) -#' @export -#' @aliases generate_colors,character #' @rdname generate_colors +#' @examples +#' +#' generate_colors( +#' c("A", "B", "A", "B", NA, "A", "B", "A", "B", NA), +#' c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), +#' mods_aff = "A", +#' ) +#' @export setMethod("generate_colors", "character", function( obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, - keep_full_scale = FALSE, breaks = 3, + mods_aff = NULL, + keep_full_scale = FALSE, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), colors_avail = c("green", "black") ) { affected_val <- obj affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff + mods_aff = mods_aff ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( + lst_bord <- generate_border(avail, colors_avail) + lst_aff <- generate_fill( affected_val, affected$affected, affected$labels, - keep_full_scale, breaks, colors_aff, colors_unaff + keep_full_scale, NULL, colors_aff, colors_unaff ) - lst_sc$border_scale <- border - lst_sc + list( + fill = lst_aff, + bord = lst_bord + ) } ) -#' @export -#' @aliases generate_colors,numeric #' @rdname generate_colors +#' @examples +#' +#' generate_colors( +#' c(10, 0, 5, 7, NA, 6, 2, 1, 3, NA), +#' c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), +#' threshold = 3, keep_full_scale = TRUE +#' ) +#' @export setMethod("generate_colors", "numeric", function( - obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, + obj, avail, threshold = 0.5, sup_thres_aff = TRUE, keep_full_scale = FALSE, breaks = 3, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), @@ -259,56 +298,38 @@ setMethod("generate_colors", "numeric", ) { affected_val <- obj affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff + mods_aff = NULL, threshold, sup_thres_aff ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( - affected_val, affected$affected, affected$labels, - keep_full_scale, breaks, colors_aff, colors_unaff - ) - - lst_sc$border_scale <- border - lst_sc - } -) -#' @export -#' @aliases generate_colors,logical -#' @rdname generate_colors -setMethod("generate_colors", "logical", - function( - obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, - keep_full_scale = FALSE, breaks = 3, - colors_aff = c("yellow2", "red"), - colors_unaff = c("white", "steelblue4"), - colors_avail = c("green", "black") - ) { - affected_val <- obj - affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff - ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( + lst_bord <- generate_border(avail, colors_avail) + lst_aff <- generate_fill( affected_val, affected$affected, affected$labels, keep_full_scale, breaks, colors_aff, colors_unaff ) - lst_sc$border_scale <- border - lst_sc + list( + fill = lst_aff, + bord = lst_bord + ) } ) #' @importFrom plyr rbind.fill -#' @include pedigreeClass.R -#' @docType methods -#' @aliases generate_colors,Pedigree #' @param add_to_scale Boolean defining if the scales need to be added to the #' existing scales or if they need to replace the existing scales. #' @param reset If `TRUE` the scale of the specified column will be reset if #' already present. -#' @param ... Other parameters to pass to the `generate_colors` function +#' @param col_avail A character vector with the name of the column to be used +#' for the availability status. +#' @param col_aff A character vector with the name of the column to be used +#' for the affection status. +#' @examples +#' data("sampleped") +#' ped <- Pedigree(sampleped) +#' ped <- generate_colors(ped, "affected", add_to_scale=FALSE) +#' scales(ped) #' @rdname generate_colors +#' @include AllClass.R #' @export setMethod("generate_colors", "Pedigree", function(obj, @@ -321,7 +342,7 @@ setMethod("generate_colors", "Pedigree", colors_avail = c("green", "black"), reset = TRUE ) { - if (nrow(obj$ped) == 0) { + if (length(obj) == 0) { return(obj) } @@ -337,51 +358,74 @@ setMethod("generate_colors", "Pedigree", return(obj) } - new_col <- paste0(col_aff, "_aff") - df <- check_columns(obj$ped, c(col_aff, col_avail), - "", new_col, others_cols = TRUE + new_fill <- paste0(col_aff, "_mods") + new_bord <- paste0(col_avail, "_mods") + df <- check_columns(as.data.frame(ped(obj)), c(col_aff, col_avail), + "", c(new_fill, new_bord), others_cols = TRUE ) - lst_sc <- generate_colors(df[[col_aff]], df[[col_avail]], - mods_aff, threshold, sup_thres_aff, - keep_full_scale, breaks, - colors_aff, colors_unaff, colors_avail + ## Generate affected individuals + lst_inds <- generate_aff_inds(df[[col_aff]], + mods_aff, threshold, sup_thres_aff ) - df[new_col] <- lst_sc$mods - if (nrow(lst_sc$fill_scale) > 0) { - lst_sc$fill_scale$column_mods <- new_col - lst_sc$fill_scale$column_values <- col_aff - } + ## Create border and fill scales + lst_bord <- generate_border( + df[[col_avail]], colors_avail + ) + lst_fill <- generate_fill( + df[[col_aff]], lst_inds$affected, lst_inds$labels, + keep_full_scale, breaks, colors_aff, colors_unaff + ) - scales <- list( - fill = lst_sc$fill_scale, - border = lst_sc$border_scale + lst_sc <- list( + fill = lst_fill$sc_fill, + border = lst_bord$sc_bord ) - obj$ped <- df + ## Add mods to Pedigree object and update slots + mcols(obj)[new_fill] <- lst_fill$mods + mcols(obj)[new_bord] <- lst_bord$mods + affected(ped(obj)) <- lst_fill$affected + avail(ped(obj)) <- lst_bord$avail + + ## Update scales with correct names + if (nrow(lst_sc$fill) > 0) { + lst_sc$fill$column_mods <- new_fill + lst_sc$fill$column_values <- col_aff + } + + if (nrow(lst_sc$bord) > 0) { + lst_sc$border$column_mods <- new_bord + lst_sc$border$column_values <- col_avail + } + + ## Should the affected scales be added to existing one if (add_to_scale) { - if (col_aff %in% obj$scales$fill$column_values & - !reset) { - stop("The column ", col_aff, " is already in the scales") - } else if (col_aff %in% obj$scales$fill$column_values & reset) { - obj$scales$fill <- obj$scales$fill[ - obj$scales$fill$column_values != col_aff, - ] + if (col_aff %in% fill(obj)$column_values) { + if (!reset) { + stop("The column ", col_aff, " is already in the scales") + } else { + new_order <- unique(fill(obj)[ + fill(obj)$column_values == col_aff, + "order" + ]) + fill(obj) <- fill(obj)[ + fill(obj)$column_values != col_aff, + ] + } + } else { + new_order <- ifelse(nrow(fill(obj)) > 0, + max(fill(obj)$order) + 1, 1 + ) } - new_order <- ifelse(nrow(obj$scales$fill) > 0, - max(obj$scales$fill$order) + 1, 1 - ) - scales$fill$order <- new_order - scales$fill <- rbind.fill(obj$scales$fill, - scales$fill - ) + lst_sc$fill$order <- new_order + lst_sc$fill <- rbind.fill(fill(obj), lst_sc$fill) } else { - scales$fill$order <- 1 + lst_sc$fill$order <- 1 } - - obj$scales <- scales + scales(obj) <- Scales(lst_sc$fill, lst_sc$border) validObject(obj) obj } diff --git a/R/ibd_matrix.R b/R/ibd_matrix.R index 769c0e3d..7166ae47 100644 --- a/R/ibd_matrix.R +++ b/R/ibd_matrix.R @@ -2,7 +2,7 @@ #' @importFrom Matrix sparseMatrix NULL -#' Create an IBD matrix +#' IBD matrix #' #' @description #' Transform identity by descent (IBD) matrix data from the form produced by @@ -21,8 +21,10 @@ NULL #' index file contains the mapping between this new id and the original one. #' The final matrix should be labeled with the original identifiers. #' -#' @param id1 First subject identifiers -#' @param id2 Second subject identifiers +#' @param id1 A character vector with the id of the first individuals of each +#' pairs +#' @param id2 A character vector with the id of the second individuals of each +#' pairs #' @param ibd the IBD value for that pair #' @param idmap an optional 2 column matrix or data frame whose first element #' is the internal value (as found in `id1` and `id2`, and whose @@ -133,4 +135,3 @@ ibd_matrix <- function(id1, id2, ibd, idmap, diagonal) { dimnames = list(dimid, dimid) ) } -TRUE diff --git a/R/is_informative.R b/R/is_informative.R index 35c1f291..90891496 100644 --- a/R/is_informative.R +++ b/R/is_informative.R @@ -1,26 +1,17 @@ -#' @title Is informative +#' Find informative individuals #' #' @description Select the ids of the informative individuals. #' -#' @details Depending on the informative parameter, the function will extract -#' the ids of the informative individuals. In the case of a numeric vector, -#' the function will return the same vector. In the case of a boolean, the -#' function will return the ids of the individuals if TRUE, NA otherwise. +#' @details Depending on the **informative** parameter, the function will +#' extract the ids of the informative individuals. In the case of a +#' numeric vector, the function will return the same vector. +#' In the case of a boolean, the function will return the ids of the +#' individuals if TRUE, NA otherwise. #' In the case of a string, the function will return the ids of the #' corresponding informative individuals based on the avail and affected #' columns. #' -#' @param avail A numeric vector of availability status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : unavailable -#' - `1` : available -#' - `NA` : availability not known -#' @param affected A numeric vector of affection status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : unaffected -#' - `1` : affected -#' - `NA` : affection status not known -#' @param col_aff A string with the column name to use for the affection status. +#' @inheritParams Ped #' @param informative Informative individuals selection can take 5 values: #' - 'AvAf' (available and affected), #' - 'AvOrAf' (available or affected), @@ -29,42 +20,60 @@ #' - 'All' (all individuals) #' - A numeric/character vector of individuals id #' - A boolean -#' @inheritParams kinship -#' @inheritParams is_parent +#' @inheritParams generate_colors #' @return #' #' ## When obj is a vector -#' A vector of individuals informative identifiers +#' A vector of individuals informative identifiers. #' #' ## When obj is a Pedigree -#' A list containing the Pedigree object and the vector of individuals -#' identifiers. -#' The Pedigree object will have a new column named 'id_inf' containing 1 for -#' informative individuals and 0 otherwise. -#' -#' @examples -#' data("sampleped") -#' ped <- Pedigree(sampleped) -#' is_informative(ped, col_aff = "affection_aff") +#' The Pedigree object with its `isinf` slot updated. #' #' @export #' @docType methods +#' @usage NULL setGeneric("is_informative", signature = "obj", function(obj, ...) standardGeneric("is_informative") ) -#' @export #' @rdname is_informative -#' @aliases is_informative,character -#' @docType methods -setMethod("is_informative", "character", function( - obj, avail, affected, informative = "AvAf", missid = "0" +#' @examples +#' +#' is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +#' is_informative(c("A", "B", "C", "D", "E"), informative = c(1, 2)) +#' is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +#' is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), +#' affected = c(0, 1, 0, 1, 1), informative = "AvAf") +#' is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), +#' affected = c(0, 1, 0, 1, 1), informative = "AvOrAf") +#' is_informative(c("A", "B", "C", "D", "E"), +#' informative = c(TRUE, FALSE, TRUE, FALSE, TRUE)) +#' @export +setMethod("is_informative", "character_OR_integer", function( + obj, avail, affected, informative = "AvAf" ) { id <- obj # Selection of all informative individuals depending of the informative # parameter - if (is.numeric(informative)) { - id_inf <- id[match(id, informative, nomatch = 0) != 0] + if (length(informative) > 1) { + if (is.character(informative)) { + id_inf <- id[match(id, informative, nomatch = 0) != 0] + } else if (is.numeric(informative)) { + id_inf <- id[informative] + } else if (is.logical(informative)) { + if (length(informative) != length(id)) { + stop("The length of a logical informative parameter must be", + "equal to the length of the id vector" + ) + } + id_inf <- id[informative] + } else { + stop("The informative parameter must be a character, ", + "logical or numeric" + ) + } + } else if (is.numeric(informative)) { + id_inf <- id[informative] } else if (is.logical(informative)) { id_inf <- ifelse(informative, id, NA) id_inf <- id_inf[!is.na(id_inf)] @@ -90,19 +99,32 @@ setMethod("is_informative", "character", function( unique(id_inf) }) -#' @export #' @rdname is_informative -#' @aliases is_informative,Pedigree -#' @docType methods -#' @param reset If `TRUE`, the `id_inf` column is reset +#' @param reset If `TRUE`, the `isinf` slot is reset +#' @examples +#' +#' data("sampleped") +#' ped <- Pedigree(sampleped) +#' ped <- is_informative(ped, col_aff = "affection_mods") +#' isinf(ped(ped)) +#' @export setMethod("is_informative", "Pedigree", function( - obj, col_aff = NULL, informative = "AvAf", missid = "0", reset = FALSE + obj, col_aff = NULL, informative = "AvAf", reset = FALSE ) { - obj$ped$affected <- NA - aff_scl <- obj$scales$fill + if (!reset & any(!is.na(isinf(ped(obj))))) { + warning( + "The isinf slot already has values in the Ped object", + " and reset is set to FALSE" + ) + return(obj) + } + affected(ped(obj)) <- NA + aff_scl <- fill(obj) + ped_df <- as.data.frame(ped(obj)) if (is.null(col_aff)) { stop("The col_aff argument is required") } + # TODO use the affected columns if (col_aff %in% aff_scl$column_mods) { aff <- aff_scl$mods[aff_scl$affected == TRUE & aff_scl$column_mods == col_aff @@ -110,22 +132,20 @@ setMethod("is_informative", "Pedigree", function( unaff <- aff_scl$mods[aff_scl$affected == FALSE & aff_scl$column_mods == col_aff ] - obj$ped$affected[obj$ped[, col_aff] %in% aff] <- 1 - obj$ped$affected[obj$ped[, col_aff] %in% unaff] <- 0 + ped_df$affected[ped_df[, col_aff] %in% aff] <- 1 + ped_df$affected[ped_df[, col_aff] %in% unaff] <- 0 } else { stop("The column ", col_aff, " is not in the scales fill") } cols_needed <- c("id", "avail", "affected") - obj$ped <- check_columns(obj$ped, cols_needed, "", "", others_cols = TRUE) - id_inf <- is_informative(obj$ped$id, obj$ped$avail, obj$ped$affected, - informative, missid + check_columns(ped_df, cols_needed, "", "", others_cols = TRUE) + id_inf <- is_informative(ped_df$id, ped_df$avail, ped_df$affected, + informative = informative ) - if (!reset) { - check_columns(obj$ped, NULL, "id_inf", NULL) - } - - obj$ped$id_inf <- ifelse(obj$ped$id %in% id_inf, 1, 0) + isinf(ped(obj)) <- vect_to_binary( + ifelse(ped_df$id %in% id_inf, 1, 0), logical = TRUE + ) obj }) diff --git a/R/kindepth.R b/R/kindepth.R index e3827410..636d7334 100644 --- a/R/kindepth.R +++ b/R/kindepth.R @@ -1,42 +1,42 @@ -#' Compute the depth of each subject in a Pedigree +#' Individual's depth in a pedigree #' #' @description #' Computes the depth of each subject in the Pedigree. #' #' @details -#' Mark each person as to their depth in a Pedigree; 0 for a founder, otherwise +#' Mark each person as to their depth in a Pedigree; `0` for a founder, +#' otherwise : #' -#' \eqn{depth = 1 + \max(fatherDepth, motherDepth)} +#' \deqn{depth = 1 + \max(fatherDepth, motherDepth)} #' -#' In the case of an inbred Pedigree a perfect alignment obeying -#' `extra=TRUE` may not exist. +#' In the case of an inbred Pedigree a perfect alignment may not exist. #' -#' @inheritParams kinship -#' @param align_parents If `align_parents=T`, go one step further and try to -#' make both parents of each child have the same depth. +#' @param ... Additional arguments +#' @inheritParams Ped +#' @param align_parents If `align_parents = TRUE`, go one step further +#' and try to make both parents of each child have the same depth. #' (This is not always possible). -#' It helps the drawing program by lining up pedigrees that 'join in the middle' -#' via a marriage. +#' It helps the drawing program by lining up pedigrees that +#' 'join in the middle' via a marriage. #' -#' @return an integer vector containing the depth for each subject +#' @return An integer vector containing the depth for each subject #' -#' @author Terry Therneau +#' @author Terry Therneau, updated by Louis Le Nézet #' @seealso [align()] -#' @include pedigreeClass.R -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' kindepth(ped1) +#' @include AllClass.R #' @export setGeneric("kindepth", signature = "obj", function(obj, ...) standardGeneric("kindepth") ) -#' @export #' @rdname kindepth -#' @aliases kindepth,character -#' @docType methods -setMethod("kindepth", "character", function(obj, dadid, momid, +#' @examples +#' kindepth( +#' c("A", "B", "C", "D", "E"), +#' c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0") +#' ) +setMethod("kindepth", "character_OR_integer", function(obj, dadid, momid, align_parents = FALSE ) { id <- obj @@ -62,7 +62,6 @@ setMethod("kindepth", "character", function(obj, dadid, momid, for (i in seq_len(n)) { child <- match(midx, parents, nomatch = 0) + match(didx, parents, nomatch = 0) # Index of parent's childs - ## version 1.8.5 did not have this check with child_old. ## Keeping it here because it was not the issue being fixed in 9/2023. if (all(child == child_old)) { @@ -112,17 +111,6 @@ setMethod("kindepth", "character", function(obj, dadid, momid, ## It may be possible to do better alignment when the Pedigree has loops, ## but it is definitely beyond this program, perhaps in auto_hint one day. - chaseup <- function(x, midx, didx) { - new <- c(midx[x], didx[x]) # mother and father - new <- new[new > 0] - while (length(new) > 1) { - x <- unique(c(x, new)) - new <- c(midx[new], didx[new]) - new <- new[new > 0] - } - x - } - ## First deal with any parents who are founders They all start with depth 0 dads <- didx[midx > 0 & didx > 0] # the father side of all spouse pairs moms <- midx[midx > 0 & didx > 0] @@ -156,46 +144,74 @@ setMethod("kindepth", "character", function(obj, dadid, momid, npair <- length(dads) done <- rep(FALSE, npair) # couples that are taken care of while (TRUE) { - pairs.to.fix <- which((depth[dads] != depth[moms]) & !done) - if (length(pairs.to.fix) == 0) { + ## Select parents pairs to fix + pairs_to_fix <- which((depth[dads] != depth[moms]) & !done) + + if (length(pairs_to_fix) == 0) { break } - temp <- pmax(depth[dads], depth[moms])[pairs.to.fix] - who <- min(pairs.to.fix[temp == min(temp)]) # the chosen couple + ## Get max depth of all pairs to fix + temp <- pmax(depth[dads], depth[moms])[pairs_to_fix] + ## Select the couple to fix + ## that have the minimal depth + who <- min(pairs_to_fix[temp == min(temp)]) + + ## Good is the individuals with the higher depth good <- moms[who] bad <- dads[who] if (depth[dads[who]] > depth[moms[who]]) { good <- dads[who] bad <- moms[who] } - abad <- chaseup(bad, midx, didx) + + ## Move depth of all bad individuals + ## All id linked to bad + abad <- c(bad, ancestors(bad, midx, didx)) + if (length(abad) == 1 && sum(c(dads, moms) == bad) == 1) { - # simple case, a solitary marry-in + ## Simple case, a solitary marry-in + ## Only one in ancestry and is dad or mom of only one depth[bad] <- depth[good] } else { - agood <- chaseup(good, midx, didx) # ancestors of the 'good' side + ## Ancestors of the 'good' side + agood <- c(good, ancestors(good, midx, didx)) + + ## If individual already in agood not bad + abad1 <- abad[!abad %in% agood] + ## For spouse chasing, I need to exclude the given pair tdad <- dads[-who] tmom <- moms[-who] - while (1) { - ## spouses of any on agood list + ## Get all individuals affiliated to agood + while (TRUE) { + ## Add spouse spouse <- c(tmom[!is.na(match(tdad, agood))], tdad[!is.na(match(tmom, agood))] ) temp <- unique(c(agood, spouse)) - temp <- unique(chaseup(temp, midx, didx)) # parents + + ## Add ancestors + temp <- unique(c(temp, ancestors(temp, midx, didx))) + + ## Add kids kids <- (!is.na(match(midx, temp)) | !is.na(match(didx, temp))) temp <- unique(c(temp, (seq_len(n))[ kids & depth <= depth[good] ])) + if (length(temp) == length(agood)) { + ## If no addition to good ancestors break break } else { + ## Else do other iteration agood <- temp } } + ## Update agood but only if not in abad1 + agood <- agood[!agood %in% abad1] + ## Change all depth if (all(match(abad, agood, nomatch = 0) == 0)) { ## shift it down depth[abad] <- depth[abad] + (depth[good] - depth[bad]) @@ -218,21 +234,31 @@ setMethod("kindepth", "character", function(obj, dadid, momid, done[who] <- TRUE ## This snunk into version 1.9.6, which was part of ## bug: done[dads == bad | moms == bad] <- TRUE - - } ## while(TRUE) + } if (all(depth > 0)) { - stop("You found a bug in kindepth's alignment code!") + stop( + "You found a bug in kindepth's alignment code!", + "Depth found:", depth + ) } depth } ) -#' @export #' @rdname kindepth -#' @aliases kindepth,Pedigree -#' @docType methods +#' @examples +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' kindepth(ped1) setMethod("kindepth", "Pedigree", function(obj, align_parents = FALSE) { - kindepth(obj$ped$id, obj$ped$dadid, obj$ped$momid, align_parents) + kindepth(ped(obj), align_parents) + } +) + +#' @rdname kindepth +setMethod("kindepth", "Ped", + function(obj, align_parents = FALSE) { + kindepth(id(obj), dadid(obj), momid(obj), align_parents) } ) \ No newline at end of file diff --git a/R/kinship.R b/R/kinship.R index 0b7dc18d..4bc4cd45 100644 --- a/R/kinship.R +++ b/R/kinship.R @@ -2,21 +2,21 @@ #' @importFrom Matrix forceSymmetric bdiag NULL -#' Compute a kinship matrix +#' Kinship matrix #' #' @description -#' Compute the kinship matrix for a set of related autosomal subjects. The -#' function is generic, and can accept a Pedigree, vector as +#' Compute the kinship matrix for a set of related autosomal subjects. +#' The function is generic, and can accept a Pedigree, a Ped or a vector as #' the first argument. #' #' @details -#' The function will usually be called with a Pedigree the -#' third form is provided for backwards compatibility with an earlier release -#' of the library that was less capable. The first argument is named `id` -#' for the same reason. Note that when using the third form any information on +#' The function will usually be called with a Pedigree. +#' The call with a Ped or a vector is provided for backwards compatibility +#' with an earlier release of the library that was less capable. +#' Note that when using with a Ped or a vector, any information on #' twins is not available to the function. #' -#' When called with a pedigree, the routine +#' When called with a Pedigree, the routine #' will create a block-diagonal-symmetric sparse matrix object of class #' `dsCMatrix`. Since the `[i, j]` value of the result is 0 for any two #' unrelated individuals i and j and a `Matrix` utilizes sparse @@ -37,40 +37,51 @@ NULL #' The computation is based on a recursive algorithm described in Lange, which #' assumes that the founder alleles are all independent. #' -#' @param obj A pedigree object or a vector of subject identifiers. +#' @param obj A Pedigree or Ped object or a vector of subject identifiers. #' @param chrtype chromosome type. The currently supported types are #' 'autosome' and 'X' or 'x'. -#' @param ... Additional arguments passed to methods -#' @inheritParams sex_to_factor -#' @inheritParams is_parent +#' @inheritParams Ped #' #' @return #' ## When obj is a vector #' A matrix of kinship coefficients. -#' ## When obj is a pedigree +#' ## When obj is a Pedigree #' A matrix of kinship coefficients ordered by families present -#' in the pedigree. +#' in the Pedigree object. #' -#' @examples -#' data(sampleped) -#' ped <- Pedigree(sampleped) -#' kinship(ped) -#' -#' @section References: K Lange, Mathematical and Statistical Methods for +#' @section References: +#' K Lange, Mathematical and Statistical Methods for #' Genetic Analysis, Springer-Verlag, New York, 1997. #' @seealso [make_famid()], [kindepth()] -#' @include pedigreeClass.R +#' @include AllClass.R #' @include utils.R #' @export -#' @docType methods +#' @usage NULL setGeneric("kinship", signature = "obj", function(obj, ...) standardGeneric("kinship") ) +#' @rdname kinship #' @export +setMethod("kinship", "Ped", + function(obj, chrtype = "autosome"){ + kinship( + id(obj), dadid(obj), momid(obj), + sex(obj), chrtype = chrtype + ) + } +) + #' @rdname kinship -#' @aliases kinship,character -#' @docType methods +#' @examples +#' +#' kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1)) +#' kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1), +#' chrtype = "x" +#' ) +#' @export setMethod("kinship", "character", function(obj, dadid, momid, sex, chrtype = "autosome") { id <- obj @@ -133,13 +144,16 @@ setMethod("kinship", "character", ) #' @include kindepth.R -#' @export #' @rdname kinship -#' @aliases kinship,Pedigree -#' @docType methods +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' kinship(ped) +#' @export setMethod("kinship", "Pedigree", function(obj, chrtype = "autosome") { - famlist <- unique(obj$ped$family) + famlist <- unique(famid(obj)) nfam <- length(famlist) matlist <- vector("list", nfam) ## The possibly reorderd list of id values @@ -147,26 +161,44 @@ setMethod("kinship", "Pedigree", for (i_fam in seq_along(famlist)) { if (is.na(famlist[i_fam])) { # If no family provided - tped <- obj[is.na(obj$ped$family)] + tped <- obj[is.na(famid(obj))] } else { ## Pedigree for this family - tped <- obj[obj$ped$family == famlist[i_fam]] + tped <- obj[famid(obj) == famlist[i_fam]] } temp <- try({ chrtype <- match.arg(casefold(chrtype), c("autosome", "x")) - n <- length(ped(tped)$id) + n <- length(id(ped(tped))) pdepth <- kindepth(tped) - mom_row <- match(ped(tped)$momid, ped(tped)$id, nomatch = n + 1) - dad_row <- match(ped(tped)$dadid, ped(tped)$id, nomatch = n + 1) + mom_row <- match( + momid(ped(tped)), + id(ped(tped)), + nomatch = n + 1 + ) + dad_row <- match( + dadid(ped(tped)), + id(ped(tped)), + nomatch = n + 1 + ) # Are there any MZ twins to worry about? have_mz <- FALSE - if (!is.null(rel(tped)) && any(rel(tped)$code == "MZ twin")) { + if (length(rel(tped)) > 0 && + any(code(rel(tped)) == "MZ twin") + ) { have_mz <- TRUE ## Doc: MakeMZIndex - temp <- which(rel(tped)$code == "MZ twin") + temp <- which(code(rel(tped)) == "MZ twin") ## drop=FALSE added in case only one MZ twin set - id1x <- match(rel(tped)$id1, ped(tped)$id, nomatch = NA) - id2x <- match(rel(tped)$id2, ped(tped)$id, nomatch = NA) + id1x <- match( + id1(rel(tped)), + id(ped(tped)), + nomatch = NA + ) + id2x <- match( + id2(rel(tped)), + id(ped(tped)), + nomatch = NA + ) if (any(is.na(id1x)) | any(is.na(id2x))) { stop("All individuals in relationship matrix", "should be present in the pedigree informations" @@ -209,7 +241,7 @@ setMethod("kinship", "Pedigree", if (chrtype == "autosome") { if (n == 1) { kmat <- matrix(0.5, 1, 1, - dimnames = list(ped(tped)$id, ped(tped)$id) + dimnames = list(id(ped(tped)), id(ped(tped))) ) } else { kmat <- diag(c(rep(0.5, n), 0)) # founders @@ -232,10 +264,10 @@ setMethod("kinship", "Pedigree", } } } else if (chrtype == "x") { - sex <- as.numeric(ped(tped)$sex) # 1=female, 2=male + sex <- as.numeric(sex(ped(tped))) # 1=female, 2=male if (n == 1) { kmat <- matrix(sex / 2, 1, 1, - dimnames = list(ped(tped)$id, ped(tped)$id) + dimnames = list(id(ped(tped)), id(ped(tped))) ) } else { ## 1 for males, 1/2 for females @@ -265,7 +297,7 @@ setMethod("kinship", "Pedigree", } if (n > 1) { kmat <- kmat[seq_len(n), seq_len(n)] - dimnames(kmat) <- list(ped(tped)$id, ped(tped)$id) + dimnames(kmat) <- list(id(ped(tped)), id(ped(tped))) } kmat }, silent = TRUE) @@ -275,7 +307,7 @@ setMethod("kinship", "Pedigree", matlist[[i_fam]] <- temp } ## deprecated in Matrix: as(forceSymmetric(temp), 'dsCMatrix') - idlist[[i_fam]] <- ped(tped)$id + idlist[[i_fam]] <- id(ped(tped)) } if (length(famlist) == 1) { as(matlist[[1]], diff --git a/R/make_famid.R b/R/make_famid.R index 339ce4a7..0832e671 100644 --- a/R/make_famid.R +++ b/R/make_famid.R @@ -1,9 +1,7 @@ -# $Id: make_famid.s,v 1.7 2003/01/07 15:47:08 therneau Exp - -#' Get family id +#' Compute family id #' #' @description -#' Construct a family id from Pedigree information +#' Construct a family identifier from pedigree information #' #' @details #' Create a vector of length n, giving the family 'tree' number of each @@ -11,7 +9,7 @@ #' tree 1, otherwise the tree numbers represent the disconnected subfamilies. #' Singleton subjects give a zero for family number. #' -#' @inheritParams kinship +#' @inheritParams Ped #' #' @return #' ## When used with a character vector @@ -19,21 +17,23 @@ #' #' ## When used with a Pedigree object #' An updated Pedigree object with the family id added +#' and with all ids updated #' #' @seealso [kinship()] -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[,-1]) -#' make_famid(ped1) #' @export +#' @usage NULL setGeneric("make_famid", signature = "obj", function(obj, ...) standardGeneric("make_famid") ) -#' @export #' @rdname make_famid -#' @aliases make_famid,character -#' @docType methods +#' @examples +#' +#' make_famid( +#' c("A", "B", "C", "D", "E", "F"), +#' c("C", "D", "0", "0", "0", "0"), +#' c("E", "E", "0", "0", "0", "0") +#' ) setMethod("make_famid", "character", function(obj, dadid, momid) { id <- obj @@ -83,9 +83,9 @@ setMethod("make_famid", "character", if (any(xx == 1)) { singles <- as.integer(names(xx[xx == 1])) # famid of singletons famid[!is.na(match(famid, singles))] <- 0 # set singletons to 0 - match(famid, sort(unique(famid))) - 1 # renumber + as.character(match(famid, sort(unique(famid))) - 1) # renumber } else { - match(famid, sort(unique(famid))) + as.character(match(famid, sort(unique(famid)))) } # renumber, no zeros } else { stop("Bug in routine: seem to have found an infinite loop") @@ -93,24 +93,21 @@ setMethod("make_famid", "character", } ) -#' @export #' @rdname make_famid -#' @aliases make_famid,Pedigree -#' @docType methods -#' @include pedigreeClass.R +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' make_famid(ped1) setMethod("make_famid", "Pedigree", function(obj) { - ped <- obj - family <- make_famid(ped(ped)$id, ped(ped)$dadid, ped(ped)$momid) - col_ped_compute <- c("sex", "avail", "id", "dadid", "momid", - "family", "momid", "error", "steril", "status" + famid <- make_famid( + id(ped(obj)), dadid(ped(obj)), momid(ped(obj)) ) - ped_df <- ped(ped)[! colnames(ped(ped)) %in% col_ped_compute] - ped_df$family <- family - col_rel_compute <- c("family", "error") - rel_df <- rel(ped)[! colnames(rel(ped)) %in% col_rel_compute] - fam_id1 <- family[match(rel_df$id1, ped(ped)$id)] - fam_id2 <- family[match(rel_df$id2, ped(ped)$id)] + obj@ped@famid <- famid + + fam_id1 <- famid[match(id1(rel(obj)), id(ped(obj)))] + fam_id2 <- famid[match(id2(rel(obj)), id(ped(obj)))] if (any(fam_id1 != fam_id2)) { stop("The two individuals in the relationship", @@ -118,9 +115,151 @@ setMethod("make_famid", "Pedigree", ) } - rel_df$family <- fam_id1 - Pedigree(ped_df, rel_df, - scales = scales(ped), normalize = TRUE + obj@rel@famid <- fam_id1 + obj <- upd_famid_id(obj) + validObject(obj) + obj + } +) + +#' Update family prefix in individuals id +#' +#' Update the family prefix in the individuals identifiers. +#' Individuals identifiers are constructed as follow **famid**_**id**. +#' Therefore to update their family prefix the ids are split by the +#' first underscore and the first part is overwritten by **famid**. +#' +#' If famid is *missing*, then the `famid()` function will be called on the +#' object. +#' +#' @param obj Ped or Pedigree object or a character vector of individual ids +#' @inheritParams Ped +#' +#' @return A character vector of individual ids with family prefix +#' updated +#' +#' @export +#' @usage NULL +setGeneric("upd_famid_id", + function(obj, famid, ...) standardGeneric("upd_famid_id") +) + +#' @rdname upd_famid_id +#' @examples +#' +#' upd_famid_id(c("1", "2", "B_3"), c("A", "B", "A")) +#' upd_famid_id(c("1", "B_2", "C_3", "4"), c("A", NA, "A", NA)) +#' @export +setMethod("upd_famid_id", "character", + function(obj, famid, missid = NA_character_) { + if (length(obj) != length(famid)) { + stop("id and famid must have the same length") + } + id <- obj[!obj %in% missid] + famid <- famid[!obj %in% missid] + if (! is.character(id)) { + stop("id must be a character vector") + } + id[!str_detect(id, "_")] <- paste0("_", id[!str_detect(id, "_")]) + ids <- str_split_fixed(id, "_", 2) + ids[, 1] <- famid + new_ids <- ifelse( + ids[, 1] %in% missid, + ids[, 2], + paste(ids[, 1], ids[, 2], sep = "_") ) + obj[!obj %in% missid] <- new_ids + obj + } +) + +#' @rdname upd_famid_id +#' @export +setMethod("upd_famid_id", + signature(obj = "Ped", famid = "character_OR_integer"), + function(obj, famid) { + obj@id <- upd_famid_id(id(obj), famid) + obj@dadid <- upd_famid_id(dadid(obj), famid) + obj@momid <- upd_famid_id(momid(obj), famid) + obj@famid <- famid + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Ped", famid = "missing"), + function(obj) { + obj@id <- upd_famid_id(id(obj), famid(obj)) + obj@dadid <- upd_famid_id(dadid(obj), famid(obj)) + obj@momid <- upd_famid_id(momid(obj), famid(obj)) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Rel", famid = "character_OR_integer"), + function(obj, famid) { + obj@id1 <- upd_famid_id(id1(obj), famid) + obj@id2 <- upd_famid_id(id2(obj), famid) + obj@famid <- famid + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Rel", famid = "missing"), + function(obj) { + obj@id1 <- upd_famid_id(id1(obj), famid(obj)) + obj@id2 <- upd_famid_id(id2(obj), famid(obj)) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' id(ped(ped1)) +#' new_fam <- make_famid(id(ped(ped1)), dadid(ped(ped1)), momid(ped(ped1))) +#' id(ped(upd_famid_id(ped1, new_fam))) +setMethod("upd_famid_id", + signature(obj = "Pedigree", famid = "character_OR_integer"), + function(obj, famid) { + old_id <- id(ped(obj)) + obj@ped <- upd_famid_id(ped(obj), famid) + fid1 <- famid[match(id1(rel(obj)), old_id)] + fid2 <- famid[match(id2(rel(obj)), old_id)] + if (any(fid1 != fid2)) { + stop("The two individuals in the relationship", + "are not in the same family" + ) + } + obj@rel <- upd_famid_id(rel(obj), fid1) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' make_famid(ped1) +setMethod("upd_famid_id", + signature(obj = "Pedigree", famid = "missing"), + function(obj) { + obj@ped <- upd_famid_id(ped(obj)) + obj@rel <- upd_famid_id(rel(obj)) + validObject(obj) + obj } ) \ No newline at end of file diff --git a/R/min_dist_inf.R b/R/min_dist_inf.R index 8304b95b..bdd6981b 100644 --- a/R/min_dist_inf.R +++ b/R/min_dist_inf.R @@ -1,7 +1,7 @@ #' @importFrom dplyr %>% NULL -#' @title Minimum distance to the informative individuals +#' Minimum distance to the informative individuals #' #' @description Compute the minimum distance between the informative #' individuals and all the others. @@ -18,11 +18,10 @@ NULL #' by 2, the minimum distance is increased by 1. #' #' -#' -#' @inheritParams sex_to_factor -#' @inheritParams kinship +#' @param ... Additional arguments +#' @param id_inf An identifiers vector of informative individuals. +#' @inheritParams Ped #' @inheritParams is_informative -#' @inheritParams is_parent #' #' @return #' ## When obj is a vector @@ -31,34 +30,36 @@ NULL #' `obj` vector. #' #' ## When obj is a Pedigree -#' The Pedigree object with a new column named 'kin' containing the kinship -#' degree. -#' -#' @examples -#' data(sampleped) -#' ped <- Pedigree(sampleped) -#' min_dist_inf(ped, col_aff = "affection_aff")$ped +#' The Pedigree object with a new slot named 'kin' containing the minimum +#' distance between each individuals and the informative individuals. +#' The `isinf` slot is also updated with the informative individuals. #' #' @seealso [kinship()] #' @include is_informative.R #' @include kinship.R -#' @docType methods #' @export +#' @usage NULL setGeneric("min_dist_inf", signature = "obj", function(obj, ...) standardGeneric("min_dist_inf") ) -#' @export #' @rdname min_dist_inf -#' @aliases min_dist_inf,character -#' @docType methods +#' @examples +#' +#' min_dist_inf( +#' c("A", "B", "C", "D", "E"), +#' c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), +#' sex = c(1, 2, 1, 2, 1), +#' id_inf = c("D", "E") +#' ) +#' @export setMethod("min_dist_inf", "character", function(obj, - dadid, momid, sex, avail, affected, informative = "AvAf" + dadid, momid, sex, id_inf ) { id <- obj # Selection of all informative individuals depending of the informative # parameter - id_inf <- is_informative(id, avail, affected, informative) if (any(is.na(id_inf)) || length(id_inf) == 0) { stop("No informative individuals detected") } @@ -73,30 +74,50 @@ setMethod("min_dist_inf", "character", function(obj, kin }) -#' @export #' @rdname min_dist_inf -#' @aliases min_dist_inf,Pedigree -#' @docType methods -#' @param reset If TRUE, the `kin` and if `id_inf` columns is reset +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' kin(ped(min_dist_inf(ped, col_aff = "affection_mods"))) +#' @export setMethod("min_dist_inf", "Pedigree", function(obj, - col_aff = NULL, informative = "AvAf", missid = "0", reset = FALSE, ... + col_aff = NULL, informative = "AvAf", reset = FALSE, ... ) { - ped <- is_informative(obj, col_aff, informative = informative, - missid, reset + obj_aff <- is_informative(obj, col_aff, informative = informative, + reset = reset ) - cols_needed <- c("avail", "affected") - check_columns(ped(ped), cols_needed, NULL, NULL, others_cols = TRUE) - - kin <- min_dist_inf( - ped(ped)$id, ped(ped)$dadid, ped(ped)$momid, ped(ped)$sex, - ped(ped)$avail, ped(ped)$affected, informative + new_ped <- min_dist_inf( + ped(obj_aff), + informative = informative, reset = reset ) - if (!reset) { - check_columns(ped(ped), NULL, "kin", NULL) + ped(obj_aff) <- new_ped + validObject(obj_aff) + obj_aff +}) + +#' @rdname min_dist_inf +#' @param reset If TRUE, the `kin` and if `isinf` columns is reset +#' @export +setMethod("min_dist_inf", "Ped", function( + obj, informative = "AvAf", reset = FALSE +) { + + if (!reset & any(!is.na(kin(obj)))) { + stop( + "The kin slot already has values in the Ped object", + " and reset is set to FALSE" + ) } - ped(ped)$kin <- kin - ped + id_inf <- id(obj)[isinf(obj)] + kin <- min_dist_inf( + id(obj), dadid(obj), momid(obj), sex(obj), id_inf + ) + + kin(obj) <- kin + validObject(obj) + obj }) diff --git a/R/norm_data.R b/R/norm_data.R index 628c2952..eb471269 100644 --- a/R/norm_data.R +++ b/R/norm_data.R @@ -4,36 +4,15 @@ #' @importFrom stringr str_remove_all NULL -#' Compute id with family id +#' Normalise a Ped object dataframe #' -#' @description Compute id with family id if the family id available -#' -#' @param family_id The family id -#' @param ind_id The individual id -#' @inheritParams is_parent -#' @keywords internal -#' @return The id with the family id merged -prefix_famid <- function(family_id, ind_id, missid = "0") { - if (length(family_id) > 1 && length(family_id) != length(ind_id)) { - stop("family_id and ind_id must have the same length.") - } - - pre_famid <- ifelse( - is.na(family_id) | is.null(family_id), - "", paste0(as.character(family_id), "_") - ) - ifelse(ind_id == missid, missid, paste0(pre_famid, as.character(ind_id))) -} - -#' Normalise dataframe -#' -#' @description Normalise dataframe for Pedigree object +#' @description Normalise dataframe for a Ped object #' #' @details Normalise a dataframe and check for columns correspondance -#' to be able to use it as an input to create Pedigree object. +#' to be able to use it as an input to create a Ped object. #' Multiple test are done and errors are checked. -#' Sex is calculated based in the `gender` column the following notations -#' are accepted: f, woman, female, 2 and m, man, male, 1. +#' Sex is calculated based on the `gender` column. +#' #' The `steril` column need to be a boolean either TRUE, FALSE or 'NA'. #' Will be considered available any individual with no 'NA' values in the #' `available` column. @@ -41,25 +20,49 @@ prefix_famid <- function(family_id, ind_id, missid = "0") { #' All individuals with errors will be remove from the dataframe and will #' be transfered to the error dataframe. #' +#' A number of checks are done to ensure the dataframe is correct: +#' +#' ## On identifiers: +#' - All ids (id, dadid, momid, famid) are not empty (`!= ""`) +#' - All `id` are unique (no duplicated) +#' - All `dadid` and `momid` are unique in the id column (no duplicated) +#' - id is not the same as dadid or momid +#' - Either have both parents or none +#' +#' ## On sex +#' - All sex code are either `male`, `female`, `terminated` or `unknown`. +#' - No parents are steril +#' - All fathers are male +#' - All mothers are female +#' #' @param ped_df A data.frame with the individuals informations. -#' The minimum columns required are `indID`, `fatherId`, `motherId` and -#' `gender`. -#' The `family` column can also be used to specify the family of the -#' individuals and will be merge to the `id` field separated by an -#' underscore. -#' The following columns are also recognize `sterilisation`, `available`, -#' `vitalStatus`, `affection`. The four of them will be transformed with the -#' [vect_to_binary()] function. -#' They respectively correspond to the sterilisation status, -#' the availability status, the death status and the affection status -#' of the individuals. The values recognized for those columns are `1` or -#' `0`. -#' @param na_strings Vector of strings to be considered as NA values +#' The minimum columns required are: +#' +#' - `indID` individual identifiers -> `id` +#' - `fatherId` biological fathers identifiers -> `dadid` +#' - `motherId` biological mothers identifiers -> `momdid` +#' - `gender` sex of the individual -> `sex` +#' - `family` family identifiers -> `famid` +#' +#' The `family` column, if provided, will be merged to the *ids* field +#' separated by an underscore using the [upd_famid_id()] function. +#' +#' The following columns are also recognize and will be transformed with the +#' [vect_to_binary()] function: +#' +#' - `sterilisation` status -> `steril` +#' - `available` status -> `avail` +#' - `vitalStatus`, is the individual dead -> `status` +#' - `affection` status -> `affected` +#' +#' The values recognized for those columns are `1` or `0`, `TRUE` or `FALSE`. +#' @param na_strings Vector of strings to be considered as NA values. #' @param try_num Boolean defining if the function should try to convert #' all the columns to numeric. -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A dataframe with the errors identified in the `error` column +#' @return A dataframe with different variable correctly standardized +#' and with the errors identified in the `error` column #' #' @include utils.R #' @examples @@ -69,59 +72,72 @@ prefix_famid <- function(family_id, ind_id, missid = "0") { #' motherId = c(0, 0, 2, 2, 0, 5, 2, 0, 8, 8), #' gender = c(1, 2, "m", "man", "f", "male", "m", "m", "f", "f"), #' available = c("A", "1", 0, NA, 1, 0, 1, 0, 1, 0), -#' family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), +#' famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), #' sterilisation = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, "TRUE"), #' vitalStatus = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0), #' affection = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0) #' ) -#' norm_ped(df) +#' tryCatch( +#' norm_ped(df), +#' error = function(e) print(e) +#' ) +#' +#' @seealso [Ped()], [Ped-class], [Pedigree()] #' @export norm_ped <- function( - ped_df, na_strings = c("NA", ""), missid = "0", try_num = FALSE + ped_df, na_strings = c("NA", ""), missid = NA_character_, try_num = FALSE ) { err_cols <- c( "sexErrMoFa", "sexErrFa", "sexErrMo", "sexErrTer", "sexNA", - "sexError", "idErrFa", "idErrMo", "idErrSelf", "idErrOwnParent", - "idErrBothParent", "idError", "error" + "sexError", "idErr", "idErrFa", "idErrMo", "idErrSelf", + "idErrOwnParent", "idErrBothParent", "idError", "error" ) err <- data.frame(matrix(NA, nrow = nrow(ped_df), ncol = length(err_cols))) colnames(err) <- err_cols cols_need <- c("indId", "fatherId", "motherId", "gender") - cols_used <- c("sex", "steril", "avail", "id", "dadid", "momid", "error", - "affected", "status" + cols_used <- c( + "sex", "steril", "status", "avail", "id", "dadid", "momid", "famid", + "error", "affected" ) - cols_to_use <- c("sterilisation", "available", "family", - "vitalStatus", "affection" + cols_to_use <- c( + "available", "family", "sterilisation", "vitalStatus", "affection" ) ped_df <- check_columns( ped_df, cols_need, cols_used, cols_to_use, others_cols = TRUE, cols_to_use_init = TRUE, cols_used_init = TRUE ) + + ped_df$family[is.na(ped_df$family)] <- missid + if (nrow(ped_df) > 0) { ped_df <- mutate_if( - ped_df, is.character, ~replace(., . %in% na_strings, NA) + ped_df, is.character, ~replace(., . %in% na_strings, NA_character_) ) #### Id #### Check id type - for (id in c("indId", "fatherId", "motherId", "family")) { - if (!is.numeric(ped_df[[id]])) { - ped_df[[id]] <- as.character(ped_df[[id]]) - if (length(grep("^ *$", ped_df[[id]])) > 0) { - stop( - "A blank or empty string is not allowed as the ", - id, " variable" - ) + for (id in c("indId", "fatherId", "motherId")) { + ped_df[[id]] <- as.character(ped_df[[id]]) + } + err$idErr <- lapply( + as.data.frame(t(ped_df[, c( + "indId", "fatherId", "motherId", "family" + )])), + function(x) { + if (any(x == "" & !is.na(x))) { + "One id is Empty" + } else { + NA_character_ } } - } - + ) ## Make a new id from the family and subject pair - ped_df$id <- prefix_famid(ped_df$family, ped_df$indId, missid) - ped_df$dadid <- prefix_famid(ped_df$family, ped_df$fatherId, missid) - ped_df$momid <- prefix_famid(ped_df$family, ped_df$motherId, missid) + ped_df$famid <- ped_df$family + ped_df$id <- upd_famid_id(ped_df$indId, ped_df$famid, missid) + ped_df$dadid <- upd_famid_id(ped_df$fatherId, ped_df$famid, missid) + ped_df$momid <- upd_famid_id(ped_df$motherId, ped_df$famid, missid) ped_df <- mutate_at(ped_df, c("id", "dadid", "momid"), - ~replace(., . %in% na_strings, missid) + ~replace(., . %in% c(na_strings, missid), NA_character_) ) #### Sex #### @@ -131,12 +147,14 @@ norm_ped <- function( is_mother <- ped_df$id %in% ped_df$momid & !is.na(ped_df$id) ## Add missing sex due to parenthood - ped_df$sex[is.na(ped_df$gender) & is_father] <- "male" - ped_df$sex[is.na(ped_df$gender) & is_mother] <- "female" + ped_df$sex[is_father] <- "male" + ped_df$sex[is_mother] <- "female" ## Add terminated for sterilized individuals that is neither dad nor mom if ("sterilisation" %in% colnames(ped_df)) { - ped_df$steril <- vect_to_binary(ped_df$sterilisation) + ped_df$steril <- vect_to_binary( + ped_df$sterilisation, logical = TRUE + ) ped_df$sex[ ped_df$steril == 1 & !is.na(ped_df$steril) & !is_father & !is_mother @@ -146,7 +164,6 @@ norm_ped <- function( ped_df$steril == 1 & !is.na(ped_df$steril) ) & (is_father | is_mother) ] <- "isSterilButIsParent" - nb_steril_parent <- sum(!is.na(err$sexErrTer)) ped_df$steril[!is.na(err$sexErrTer) & (is_father | is_mother) ] <- FALSE @@ -194,14 +211,14 @@ norm_ped <- function( ] <- "selfIdDuplicated" err$idErrOwnParent[ped_df$id %in% id_own_parent] <- "isItsOwnParent" err$idErrBothParent[ - (ped_df$dadid == missid & ped_df$momid != missid) | - (ped_df$dadid != missid & ped_df$momid == missid) + (ped_df$dadid %in% missid & (!ped_df$momid %in% missid)) | + ((!ped_df$dadid %in% missid) & ped_df$momid %in% missid) ] <- "oneParentMissing" ## Unite all id errors in one column err <- unite( err, "idError", c( - "idErrFa", "idErrMo", "idErrSelf", + "idErr", "idErrFa", "idErrMo", "idErrSelf", "idErrOwnParent", "idErrBothParent" ), na.rm = TRUE, sep = "_", remove = TRUE ) @@ -209,16 +226,15 @@ norm_ped <- function( #### Available #### if ("available" %in% colnames(ped_df)) { - ped_df$avail <- vect_to_binary(ped_df$available) + ped_df$avail <- vect_to_binary(ped_df$available, logical = TRUE) } #### Status #### if ("vitalStatus" %in% colnames(ped_df)) { - ped_df$status <- vect_to_binary(ped_df$vitalStatus) + ped_df$status <- vect_to_binary(ped_df$vitalStatus, logical = TRUE) } - #### Affected #### if ("affection" %in% colnames(ped_df)) { - ped_df$affected <- vect_to_binary(ped_df$affection) + ped_df$affected <- vect_to_binary(ped_df$affection, logical = TRUE) } #### Convert to num #### @@ -244,50 +260,63 @@ norm_ped <- function( ped_df } -#' Normalise relationship dataframe +#' Normalise a Rel object dataframe #' -#' @description Normalise relationship dataframe for Pedigree object +#' @description Normalise a dataframe and check for columns correspondance +#' to be able to use it as an input to create a Ped object. #' -#' @inheritParams norm_ped -#' @param rel_df A data.frame with the special relationships between -#' individuals. -#' The minimum columns required are `id1`, `id2` and `code`. -#' The `family` column can also be used to specify the family -#' of the individuals. -#' The code values are: -#' - `1` = Monozygotic twin -#' - `2` = Dizygotic twin -#' - `3` = twin of unknown zygosity -#' - `4` = Spouse +#' @details +#' The `famid` column, if provided, will be merged to the *ids* field +#' separated by an underscore using the [upd_famid_id()] function. +#' The `code` column will be transformed with the [rel_code_to_factor()]. +#' Multiple test are done and errors are checked. #' -#' The value relation code recognized by the function are the one defined -#' by the [rel_code_to_factor()] function. -#' @inheritParams is_parent +#' A number of checks are done to ensure the dataframe is correct: +#' +#' ## On identifiers: +#' - All ids (id1, id2) are not empty (`!= ""`) +#' - `id1` and `id2` are not the same +#' +#' ## On code +#' - All code are recognised as either "MZ twin", "DZ twin", "UZ twin" or +#' "Spouse" +#' +#' @inheritParams norm_ped +#' @inheritParams Pedigree #' #' @examples #' df <- data.frame( -#' indId1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), -#' indId2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), +#' id1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), +#' id2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), #' code = c("MZ twin", "DZ twin", "UZ twin", "Spouse", 1, 2, #' 3, 4, "MzTwin", "sp oUse"), -#' family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) +#' famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) #' ) #' norm_rel(df) #' #' @return A dataframe with the errors identified #' @export -norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = "0") { +norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = NA_character_) { + + if (is.matrix(rel_df)) { + rel_df <- as.data.frame(rel_df) + colnames(rel_df) <- c( + "id1", "id2", "code", "famid" + )[seq_len(ncol(rel_df))] + } + #### Check columns #### err_cols <- c("codeErr", "sameIdErr", "id1Err", "id2Err", "error") err <- data.frame(matrix(NA, nrow = nrow(rel_df), ncol = length(err_cols))) colnames(err) <- err_cols - cols_needed <- c("indId1", "indId2", "code") - cols_used <- c("id1", "id2", "error") - cols_to_use <- c("family") + cols_needed <- c("id1", "id2", "code") + cols_used <- c("error") + cols_to_use <- c("famid") rel_df <- check_columns( rel_df, cols_needed, cols_used, cols_to_use, others_cols = FALSE, cols_to_use_init = TRUE, cols_used_init = TRUE ) + rel_df$famid[is.na(rel_df$famid)] <- missid if (nrow(rel_df) > 0) { rel_df <- mutate_if( rel_df, is.character, @@ -302,17 +331,21 @@ norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = "0") { #### Check for id errors #### Set ids as characters rel_df <- rel_df %>% - mutate(across(c("indId1", "indId2"), as.character)) + mutate(across(c("id1", "id2", "famid"), as.character)) ## Check for non null ids - len1 <- nchar(rel_df$indId1) - len2 <- nchar(rel_df$indId2) - err$id1Err[is.na(len1) | len1 == missid] <- "indId1length0" - err$id2Err[is.na(len2) | len2 == missid] <- "indId2length0" + len1 <- nchar(rel_df$id1) + len2 <- nchar(rel_df$id2) + err$id1Err[is.na(len1) | len1 %in% missid] <- "indId1length0" + err$id2Err[is.na(len2) | len2 %in% missid] <- "indId2length0" ## Compute id with family id - rel_df$id1 <- prefix_famid(rel_df$family, rel_df$indId1, missid) - rel_df$id2 <- prefix_famid(rel_df$family, rel_df$indId2, missid) + rel_df$id1 <- upd_famid_id(rel_df$id1, rel_df$famid, missid) + rel_df$id2 <- upd_famid_id(rel_df$id2, rel_df$famid, missid) + + rel_df <- mutate_at(rel_df, c("id1", "id2", "famid"), + ~replace(., . %in% c(na_strings, missid), NA_character_) + ) err$sameIdErr[rel_df$id1 == rel_df$id2] <- "SameId" diff --git a/R/num_child.R b/R/num_child.R index 18960d49..3ea91147 100644 --- a/R/num_child.R +++ b/R/num_child.R @@ -2,18 +2,17 @@ #' @importFrom tidyr pivot_longer NULL -#' Number of child +#' Number of childs #' -#' @description Compute the number of child per individual +#' @description Compute the number of childs per individual #' #' @details Compute the number of direct child but also the number #' of indirect child given by the ones related with the linked spouses. -#' If a relation ship matrix is given, then even if no children is present +#' If a relation ship dataframe is given, then even if no children is present #' between 2 spouses, the indirect childs will still be added. #' -#' @inheritParams kinship -#' @inheritParams norm_rel -#' @inheritParams is_parent +#' @inheritParams Ped +#' @inheritParams Pedigree #' #' @return #' ## When obj is a vector @@ -25,26 +24,39 @@ NULL #' An updated Pedigree object with the columns `num_child_dir`, #' `num_child_ind` and `num_child_tot` added to the #' Pedigree `ped` slot. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ped1 <- num_child(ped1) -#' summary(ped1$ped) -#' @include pedigreeClass.R +#' @include AllClass.R #' @export +#' @usage NULL setGeneric("num_child", signature = "obj", function(obj, ...) standardGeneric("num_child") ) -#' @export #' @rdname num_child -#' @aliases num_child,character -#' @docType methods -setMethod("num_child", "character", function(obj, dadid, momid, - rel_df = NULL, missid = "0" +#' @examples +#' +#' num_child( +#' obj = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), +#' dadid = c("3", "3", "6", "8", "0", "0", "0", "0", "0", "0"), +#' momid = c("4", "5", "7", "9", "0", "0", "0", "0", "0", "0"), +#' rel_df = data.frame( +#' id1 = "10", +#' id2 = "3", +#' code = "Spouse" +#' ) +#' ) +#' @export +setMethod("num_child", "character_OR_integer", function(obj, dadid, momid, + rel_df = NULL, missid = NA_character_ ) { id <- obj + if (length(dadid) != length(id)) { + stop("The length of dadid should be equal to the length of id") + } + if (length(momid) != length(id)) { + stop("The length of momid should be equal to the length of id") + } + # Create dummy vectors for the check() function child <- NULL num_child_dir <- NULL @@ -57,18 +69,24 @@ setMethod("num_child", "character", function(obj, dadid, momid, childs_all <- NULL df <- data.frame(id, dadid, momid, stringsAsFactors = FALSE) + if (nrow(df) == 0) { + df <- data.frame(id = character(), + num_child_dir = integer(), + num_child_ind = integer(), + num_child_tot = integer(), + stringsAsFactors = FALSE + ) + return(df) + } - spouse_rel <- unique(df[df$dadid != missid & - df$momid != missid, c("dadid", "momid") + spouse_rel <- unique(df[(!df$dadid %in% missid) & + (!df$momid %in% missid), c("dadid", "momid") ] ) colnames(spouse_rel) <- c("id1", "id2") if (!is.null(rel_df)) { - cols_needed <- c("id1", "id2", "code") - rel_df <- check_columns(rel_df, cols_needed, "", "", - others_cols = FALSE - ) + rel_df <- norm_rel(rel_df, missid = missid) spouse_rel <- rbind(spouse_rel, rel_df[rel_df$code == "Spouse", c("id1", "id2")] ) @@ -77,74 +95,85 @@ setMethod("num_child", "character", function(obj, dadid, momid, spouse_rel$idmax <- pmax(spouse_rel$id1, spouse_rel$id2) spouse_rel <- unique(spouse_rel[c("idmin", "idmax")]) - dad_child <- df[df$dadid != missid, c("dadid", "id")] %>% - group_by(dadid) %>% - summarise(child = list(id)) %>% - mutate(num_child_dir = lengths(child)) %>% - rename(id = dadid) - mom_child <- df[df$momid != missid, c("id", "momid")] %>% - group_by(momid) %>% - summarise(child = list(id)) %>% - mutate(num_child_dir = lengths(child)) %>% - rename(id = momid) - id_child <- rbind(dad_child, mom_child) - - # Number of direct child per individual - df$num_child_dir <- id_child$num_child_dir[match(df$id, id_child$id)] - - # Number of total childs per individual - rel_child <- spouse_rel %>% - left_join(id_child, by = c("idmin" = "id")) %>% - left_join(id_child, by = c("idmax" = "id"), - suffix = c("_min", "_max") - ) %>% - rowwise() %>% - mutate(childs = list(unique(unlist( - list(child_min, child_max) - )))) %>% - select(c(idmin, idmax, childs)) %>% - tidyr::pivot_longer(cols = -childs, names_to = "order", - values_to = "id" - ) %>% - group_by(id) %>% - summarise(childs_all = list(unique(unlist(childs)))) %>% - mutate(num_child_tot = lengths(childs_all)) - - df$num_child_tot <- rel_child$num_child_tot[match(df$id, rel_child$id)] - - df <- df %>% - mutate(across(c(num_child_dir, num_child_tot), - ~replace(., is.na(.), 0) - )) - - df$num_child_ind <- df$num_child_tot - df$num_child_dir - - df + if (nrow(spouse_rel) > 0) { + dad_child <- df[(!df$dadid %in% missid), c("dadid", "id")] %>% + group_by(dadid) %>% + summarise(child = list(id)) %>% + mutate(num_child_dir = lengths(child)) %>% + rename(id = dadid) + mom_child <- df[(!df$momid %in% missid), c("id", "momid")] %>% + group_by(momid) %>% + summarise(child = list(id)) %>% + mutate(num_child_dir = lengths(child)) %>% + rename(id = momid) + id_child <- rbind(dad_child, mom_child) + + # Number of direct child per individual + df$num_child_dir <- id_child$num_child_dir[match(df$id, id_child$id)] + + # Number of total childs per individual + spouse_child <- spouse_rel %>% + left_join(id_child, by = c("idmin" = "id")) %>% + left_join(id_child, by = c("idmax" = "id"), + suffix = c("_min", "_max") + ) + rel_child <- spouse_child %>% + rowwise() %>% + mutate(childs = list(unique(unlist( + list(child_min, child_max) + )))) %>% + select(c(idmin, idmax, childs)) %>% + tidyr::pivot_longer(cols = -childs, names_to = "order", + values_to = "id" + ) %>% + group_by(id) %>% + summarise(childs_all = list(unique(unlist(childs)))) %>% + mutate(num_child_tot = lengths(childs_all)) + + df$num_child_tot <- rel_child$num_child_tot[match(df$id, rel_child$id)] + + df <- df %>% + mutate(across(c(num_child_dir, num_child_tot), + ~replace(., is.na(.), 0) + )) + + df$num_child_ind <- df$num_child_tot - df$num_child_dir + + df + } else { + df$num_child_dir <- 0 + df$num_child_ind <- 0 + df$num_child_tot <- 0 + df + } }) -#' @export #' @rdname num_child -#' @aliases num_child,Pedigree -#' @docType methods #' @param reset If TRUE, the `num_child_tot`, `num_child_ind` and #' the `num_child_dir` columns are reset. +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' ped1 <- num_child(ped1, reset = TRUE) +#' summary(ped(ped1)) +#' @export setMethod("num_child", "Pedigree", function(obj, reset = FALSE) { - df <- num_child(obj$ped$id, obj$ped$dadid, obj$ped$momid, - rel_df = obj$rel + df <- num_child(id(ped(obj)), dadid(ped(obj)), momid(ped(obj)), + rel_df = as.data.frame(rel(obj)) ) if (!reset) { - check_columns(obj$ped, NULL, + check_columns(as.data.frame(ped(obj)), NULL, c("num_child_tot", "num_child_ind", "num_child_dir"), NULL, others_cols = TRUE ) } - obj$ped <- merge(obj$ped, - df[c("id", "num_child_tot", "num_child_ind", "num_child_dir")], - by = "id", sort = FALSE - ) + obj@ped@num_child_tot <- df$num_child_tot + obj@ped@num_child_ind <- df$num_child_ind + obj@ped@num_child_dir <- df$num_child_dir + validObject(obj) obj }) -TRUE diff --git a/R/ped_to_legdf.R b/R/ped_to_legdf.R index 0d084e12..4631218a 100644 --- a/R/ped_to_legdf.R +++ b/R/ped_to_legdf.R @@ -1,11 +1,25 @@ -#' Convert a Pedigree to a legend data frame of element to plot +#' Create plotting legend data frame from a Pedigree #' #' @description #' Convert a Pedigree to a legend data frame for it to -#' be plotted with afterwards with [plot_fromdf()]. +#' be plotted afterwards with [plot_fromdf()]. #' -#' @inheritParams align -#' @inheritParams set_plot_area +#' @details The data frame contains the following columns: +#' - `x0`, `y0`, `x1`, `y1`: coordinates of the elements +#' - `type`: type of the elements +#' - `fill`: fill color of the elements +#' - `border`: border color of the elements +#' - `angle`: angle of the shading of the elements +#' - `density`: density of the shading of the elements +#' - `cex`: size of the elements +#' - `label`: label of the elements +#' - `tips`: tips of the elements (used for the tooltips) +#' - `adjx`: horizontal text adjustment of the labels +#' - `adjy`: vertical text adjustment of the labels +#' +#' All those columns are used by [plot_fromdf()] to plot the graph. +#' @param obj A Pedigree object +#' @param cex Character expansion of the text #' @inheritParams plot_fromdf #' @param adjx default=0. Controls the horizontal text adjustment of #' the labels in the legend. @@ -19,11 +33,21 @@ #' data("sampleped") #' ped <- Pedigree(sampleped) #' leg_df <- ped_to_legdf(ped) -#' summary(leg_df$leg_df) -#' plot_fromdf(leg_df$leg_df) +#' summary(leg_df$df) +#' plot_fromdf(leg_df$df, usr = c(-1,15,0,7)) +#' @keywords internal, Pedigree-plot +#' @export +#' @usage NULL +setGeneric( + "ped_to_legdf", signature = "obj", + function(obj, ...) { + standardGeneric("ped_to_legdf") + } +) + +#' @rdname ped_to_legdf #' @export -#' @docType methods -ped_to_legdf <- function(ped, +setMethod("ped_to_legdf", "Pedigree", function(obj, boxh = 1, boxw = 1, cex = 1, adjx = 0, adjy = 0 ) { @@ -37,9 +61,9 @@ ped_to_legdf <- function(ped, adjy = numeric() ) sex_equiv <- c("Male", "Female", "Terminated", "Unknown") - all_lab <- list(sex_equiv, scales(ped)$border$labels) - all_aff <- lapply(unique(scales(ped)$fill$order), function(x) { - scales(ped)$fill$labels[scales(ped)$fill$order == x] + all_lab <- list(sex_equiv, border(obj)$labels) + all_aff <- lapply(unique(fill(obj)$order), function(x) { + fill(obj)$labels[fill(obj)$order == x] }) all_lab <- c(all_lab, all_aff) @@ -63,7 +87,7 @@ ped_to_legdf <- function(ped, posy <- cumsum(posy) - boxh posy <- posy[seq_along(posy) %% 2 == 0] - all_aff <- scales(ped)$fill + all_aff <- fill(obj) n_aff <- length(unique(all_aff$order)) lab_title <- c("Sex", "Border", unique(all_aff$column_values)) @@ -75,9 +99,11 @@ ped_to_legdf <- function(ped, ) plot_df <- rbind.fill(plot_df, titles) + ## Get ped_df + ped_df <- as.data.frame(ped(obj)) # Sex poly1 <- polygons(1) - all_sex <- unique(as.numeric(ped(ped)$sex)) + all_sex <- unique(as.numeric(ped_df$sex)) sex <- data.frame( x0 = posx[1], y0 = posy[all_sex], type = paste(names(poly1)[all_sex], 1, 1, sep = "_"), @@ -98,16 +124,15 @@ ped_to_legdf <- function(ped, plot_df <- rbind.fill(plot_df, sex, sex_label) # Border - bord_df <- scales(ped)$border - border_mods <- unique(ped(ped)[, unique(bord_df[["column"]])]) + border_mods <- unique(ped_df[, unique(border(obj)$column_mods)]) border <- data.frame( x0 = posx[3], y0 = posy[seq_along(border_mods)], type = rep("square_1_1", length(border_mods)), - border = bord_df$border[match(border_mods, bord_df$mods)], + border = border(obj)$border[match(border_mods, border(obj)$mods)], fill = "white", id = "border" ) - lab <- bord_df$labels[match(border_mods, bord_df$mods)] + lab <- border(obj)$labels[match(border_mods, border(obj)$mods)] lab[is.na(lab)] <- "NA" border_label <- data.frame( x0 = posx[4] + adjx, @@ -123,7 +148,7 @@ ped_to_legdf <- function(ped, ## Affected for (aff in seq_len(n_aff)) { aff_df <- all_aff[all_aff$order == aff, ] - aff_mods <- unique(ped(ped)[, unique(aff_df[["column_mods"]])]) + aff_mods <- unique(ped_df[, unique(aff_df[["column_mods"]])]) aff_bkg <- data.frame( x0 = posx[3 + aff * 2], y0 = posy[seq_along(aff_mods)], type = rep(paste("square", 1, 1, sep = "_"), @@ -173,5 +198,6 @@ ped_to_legdf <- function(ped, min(plot_df$x0), max(plot_df$x0), min(plot_df$y0), max(plot_df$y0) ) - list(leg_df = plot_df, par_usr = par_usr) + list(df = plot_df, par_usr = par_usr) } +) diff --git a/R/ped_to_plotdf.R b/R/ped_to_plotdf.R index 0f7ebd58..6da6e4ed 100644 --- a/R/ped_to_plotdf.R +++ b/R/ped_to_plotdf.R @@ -1,43 +1,82 @@ #' @importFrom plyr rbind.fill NULL -#' Convert a Pedigree to a data frame of element to plot +#' Create plotting data frame from a Pedigree +#' +#' @description +#' Convert a Pedigree to a data frame with all the elements and their +#' characteristic for them to be plotted afterwards with [plot_fromdf()]. +#' +#' @details The data frame contains the following columns: +#' - `x0`, `y0`, `x1`, `y1`: coordinates of the elements +#' - `type`: type of the elements +#' - `fill`: fill color of the elements +#' - `border`: border color of the elements +#' - `angle`: angle of the shading of the elements +#' - `density`: density of the shading of the elements +#' - `cex`: size of the elements +#' - `label`: label of the elements +#' - `tips`: tips of the elements (used for the tooltips) +#' - `adjx`: horizontal text adjustment of the labels +#' - `adjy`: vertical text adjustment of the labels +#' +#' All those columns are used by [plot_fromdf()] to plot the graph. #' #' @inheritParams align -#' @param pconnect when connecting parent to children the program will try to +#' @param pconnect When connecting parent to children the program will try to #' make the connecting line as close to vertical as possible, subject to it #' lying inside the endpoints of the line that connects the children by at #' least `pconnect` people. Setting this option to a large number will #' force the line to connect at the midpoint of the children. #' @param branch defines how much angle is used to connect various levels of #' nuclear families. -#' @param aff_mark if TRUE, add a aff_mark to each box corresponding to the +#' @param aff_mark If `TRUE`, add a aff_mark to each box corresponding to the #' value of the affection column for each filling scale. -#' @param label if not NULL, add a label to each box corresponding to the +#' @param label If not `NULL`, add a label to each box corresponding to the #' value of the column given. -#' @inheritParams set_plot_area +#' @param ... Other arguments passed to [par()] #' @inheritParams subregion +#' @inheritParams set_plot_area #' #' @return A list containing the data frame and the user coordinates. +#' #' @examples +#' #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == 1,]) -#' ped_to_plotdf(ped1) -#' @seealso [plot_fromdf()] +#' ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) +#' plot_df <- ped_to_plotdf(ped1) +#' summary(plot_df$df) +#' plot_fromdf(plot_df$df, usr = plot_df$par_usr$usr, +#' boxh = plot_df$par_usr$boxh, boxw = plot_df$par_usr$boxw +#' ) +#' +#' @seealso +#' [plot_fromdf()] #' [ped_to_legdf()] +#' @keywords internal, Pedigree-plot +#' @export +#' @usage NULL +setGeneric( + "ped_to_plotdf", signature = "obj", + function(obj, ...) { + standardGeneric("ped_to_plotdf") + } +) + +#' @rdname ped_to_plotdf #' @export -ped_to_plotdf <- function( - ped, packed = FALSE, width = 10, align = c(1.5, 2), - subreg = NULL, cex = 0.5, symbolsize = cex, pconnect = 0.5, branch = 0.6, +setMethod("ped_to_plotdf", "Pedigree", function( + obj, packed = TRUE, width = 6, align = c(1.5, 2), + subreg = NULL, cex = 1, symbolsize = cex, pconnect = 0.5, branch = 0.6, aff_mark = TRUE, label = NULL, ... ) { - famlist <- unique(ped(ped)$family) + famlist <- unique(famid(obj)) if (length(famlist) > 1) { nfam <- length(famlist) all_df <- vector("list", nfam) for (i_fam in famlist) { - ped_fam <- ped[ped(ped)$family == i_fam] + ped_fam <- obj[famid(obj) == i_fam] all_df[[i_fam]] <- ped_to_plotdf(ped_fam, packed, width, align, subreg, cex, symbolsize, ... ) @@ -53,7 +92,7 @@ ped_to_plotdf <- function( label = character(), tips = character(), adjx = numeric(), adjy = numeric() ) - plist <- align(ped, packed = packed, width = width, align = align) + plist <- align(obj, packed = packed, width = width, align = align) if (!is.null(subreg)) { plist <- subregion(plist, subreg) @@ -62,7 +101,7 @@ ped_to_plotdf <- function( maxlev <- nrow(plist$pos) params_plot <- set_plot_area( - cex, ped(ped)$id, maxlev, xrange, symbolsize, ... + cex, id(ped(obj)), maxlev, xrange, symbolsize, ... ) boxw <- params_plot$boxw @@ -71,7 +110,7 @@ ped_to_plotdf <- function( legh <- params_plot$legh ## Get all boxes to plot - # idx is the index of the boxes in the alignement + # idx is the index of the boxes in the alignment idx <- which(plist$nid > 0) # index value in the ped of each box id <- plist$nid @@ -80,20 +119,20 @@ ped_to_plotdf <- function( # y position i <- (seq_len(length(plist$nid)) - 1) %% length(plist$n) + 1 # sex of each box - sex <- as.numeric(ped(ped)$sex)[id[idx]] + sex <- as.numeric(sex(ped(obj)))[id[idx]] - all_aff <- scales(ped)$fill - bord_df <- scales(ped)$border - n_aff <- length(unique(all_aff$order)) + all_aff <- fill(obj) + n_aff <- length(unique(fill(obj)$order)) polylist <- polygons(max(1, n_aff)) + ped_df <- as.data.frame(ped(obj)) # border mods of each box - border_mods <- ped(ped)[id[idx], unique(bord_df[["column"]])] - border_idx <- match(border_mods, bord_df[["mods"]]) + border_mods <- ped_df[id[idx], unique(border(obj)$column_mods)] + border_idx <- match(border_mods, border(obj)$mods) for (aff in seq_len(n_aff)) { aff_df <- all_aff[all_aff$order == aff, ] - aff_mods <- ped(ped)[id[idx], unique(aff_df[["column_mods"]])] + aff_mods <- ped_df[id[idx], unique(aff_df[["column_mods"]])] aff_idx <- match(aff_mods, aff_df[["mods"]]) @@ -116,7 +155,7 @@ ped_to_plotdf <- function( fill = aff_df[aff_idx, "fill"], density = aff_df[aff_idx, "density"], angle = aff_df[aff_idx, "angle"], - border = bord_df[border_idx, "border"], + border = border(obj)$border[border_idx], id = "polygon" ) plot_df <- rbind.fill(plot_df, ind) @@ -124,7 +163,7 @@ ped_to_plotdf <- function( aff_mark_df <- data.frame( x0 = pos[idx] + poly_aff_x_mr[sex], y0 = i[idx] + boxh / 2, - label = ped(ped)[id[idx], unique(aff_df[["column_values"]])], + label = ped_df[id[idx], unique(aff_df[["column_values"]])], fill = "black", type = "text", cex = cex, id = "aff_mark" @@ -134,7 +173,7 @@ ped_to_plotdf <- function( } ## Add status - status <- ped(ped)[id[idx], "status"] + status <- ped_df[id[idx], "status"] idx_dead <- idx[status == 1 & !is.na(status)] if (length(idx_dead) > 0) { @@ -151,7 +190,7 @@ ped_to_plotdf <- function( ## Add ids id_df <- data.frame( x0 = pos[idx], y0 = i[idx] + boxh + labh * 1.2, - label = ped(ped)[id[idx], "id"], fill = "black", + label = ped_df[id[idx], "id"], fill = "black", type = "text", cex = cex, id = "id" ) @@ -160,10 +199,10 @@ ped_to_plotdf <- function( ## Add a label if given if (!is.null(label)) { - check_columns(ped(ped), label) + check_columns(ped_df, label) label <- data.frame( x0 = pos[idx], y0 = i[idx] + boxh + labh * 2.8, - label = ped(ped)[id[idx], label], + label = ped_df[id[idx], label], fill = "black", type = "text", cex = cex, id = "label" @@ -333,4 +372,4 @@ ped_to_plotdf <- function( } } list(df = plot_df, par_usr = params_plot) -} +}) diff --git a/R/pedigree.R b/R/pedigree.R deleted file mode 100644 index a062d56b..00000000 --- a/R/pedigree.R +++ /dev/null @@ -1,288 +0,0 @@ -#' Create a Pedigree object -#' -#' This constructor help to create a `Pedigree` object from -#' different `data.frame` or a set of vectors. -#' -#' If any errors are found in the data, the function will return -#' the data.frame with the errors for the Pedigree and the relationship -#' data.frame. -#' -#' @inheritParams align -#' @param obj A vector of the individuals identifiers or a data.frame -#' with the individuals informations. -#' The minimum columns required are `indID`, `fatherId`, `motherId` and -#' `gender`. -#' The `family` column can also be used to specify the family of the -#' individuals and will be merge to the `id` field separated by an -#' underscore. -#' The following columns are also recognize `sterilisation`, `available`, -#' `vitalStatus`, `affection`. The four of them will be transformed with the -#' [vect_to_binary()] function when the normalisation is selected and will -#' be set respectively to `steril`, `avail`, -#' `status` and `affected`. -#' If you do not use the normalisation, the columns will be checked to -#' be `0` or `1`. -#' They respectively correspond to the sterilisation status, -#' the availability status, the death status and the affection status -#' of the individuals. The values recognized for those columns are `1` or -#' `0`. -#' @param relation A matrix or a data.frame with 3 required columns -#' (i.e. id1, id2, code) specifying special relationship between pairs -#' of individuals. -#' #' The code values are: -#' - `1` = Monozygotic twin -#' - `2` = Dizygotic twin -#' - `3` = twin of unknown zygosity -#' - `4` = Spouse -#' -#' If `famid` is given in the call to create Pedigrees, then -#' `famid` needs to be in the last column of `relation`. -#' @param cols_ren_ped A named list with the columns to rename for the -#' pedigree dataframe. -#' @param cols_ren_rel A named list with the columns to rename for the -#' relationship matrix. -#' @param scales A list of two data.frame with the scales to use for the -#' affection status and the other one for the border color (e.g availability). -#' @param normalize A logical to know if the data should be normalised. -#' @param ... Other arguments to pass to the function `generate_colors`. -#' @return A Pedigree object. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' @export -setGeneric("Pedigree", signature = "obj", - function(obj, ...) standardGeneric("Pedigree") -) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,numeric -#' @docType methods -setMethod("Pedigree", "numeric", function(obj, ... -) { - Pedigree(as.character(obj), ...) -}) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,character -#' @docType methods -#' @inheritParams is_parent -#' @inheritParams sex_to_factor -#' @inheritParams family_check -#' @inheritParams is_informative -#' @param status A numeric vector of vital status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : alive -#' - `1` : dead -#' - `NA` : vital status not known -#' @param steril A numeric vector of sterilisation status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : not sterilised -#' - `1` : sterilised -#' - `NA` : sterilisation status not known -setMethod("Pedigree", "character", function(obj, dadid, momid, - sex, family = NA, avail = NULL, affected = NULL, status = NULL, - steril = NULL, relation = NULL, - missid = "0", col_aff = "affection", normalize = TRUE, ... -) { - n <- length(obj) - ## Code transferred from noweb to markdown vignette. - ## Sections from the noweb/vignettes are noted here with - ## Doc: Error and Data Checks - ## Doc: Errors1 - if (length(momid) != n) stop("Mismatched lengths, id and momid") - if (length(dadid) != n) stop("Mismatched lengths, id and momid") - if (length(sex) != n) stop("Mismatched lengths, id and sex") - if (length(steril) != n & !is.null(steril)) { - stop("Mismatched lengths, id and steril") - } - - if (length(avail) != n & !is.null(avail)) { - stop("Mismatched lengths, id and avail") - } - if (length(status) != n & !is.null(status)) { - stop("Mismatched lengths, id and status") - } - - - ped_df <- data.frame( - family = family, - id = obj, - dadid = dadid, - momid = momid, - sex = sex - ) - if (any(!is.na(affected))) { - if (is.vector(affected)) { - ped_df$affection <- affected - } else { - ped_df <- cbind(ped_df, affected) - col_aff <- colnames(affected) - } - } - if (any(!is.na(avail))) { - ped_df$available <- avail - } - if (any(!is.na(status))) { - ped_df$vitalStatus <- status - } - if (any(!is.na(steril))) { - ped_df$sterilisation <- steril - } - if (is.null(relation)) { - relation <- data.frame( - id1 = character(), - id2 = character(), - code = numeric(), - family = character() - ) - } - Pedigree(ped_df, relation = relation, - missid = missid, col_aff = col_aff, ... - ) -}) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,data.frame -#' @docType methods -setMethod("Pedigree", "data.frame", function( - obj = data.frame( - id = character(), - dadid = character(), - momid = character(), - sex = numeric(), - family = character(), - available = numeric(), - affection = numeric() - ), - relation = data.frame( - id1 = character(), - id2 = character(), - code = numeric(), - family = character() - ), - cols_ren_ped = list( - "indId" = "id", - "fatherId" = "dadid", - "motherId" = "momid", - "gender" = "sex", - "family" = "family", - "sterilisation" = "steril", - "vitalStatus" = "status", - "affection" = "affected" - ), - cols_ren_rel = list( - "indId1" = "id1", - "indId2" = "id2" - ), - scales = list( - fill = data.frame( - order = numeric(), - column_values = character(), - column_mods = character(), - mods = numeric(), - labels = character(), - affected = logical(), - fill = character(), - density = numeric(), - angle = numeric() - ), - border = data.frame( - column = character(), - mods = numeric(), - labels = character(), - border = character() - ) - ), - hints = list( - order = NULL, - spouse = NULL - ), - normalize = TRUE, - missid = "0", - col_aff = "affection", - ... -) { - ped_df <- obj - if (!is.data.frame(ped_df)) { - stop("ped_df must be a data.frame") - } - - if (is.matrix(relation)) { - rel_df <- data.frame( - id1 = relation[, 1], - id2 = relation[, 2], - code = relation[, 3] - ) - if (dim(relation)[2] > 3) { - rel_df$family <- relation[, 4] - } - } else if (is.data.frame(relation)) { - rel_df <- relation - } else { - stop("relation must be a matrix or a data.frame") - } - - ## Rename columns ped - old_cols <- as.vector(unlist(cols_ren_ped)) - new_cols <- names(cols_ren_ped) - cols_to_ren <- match(old_cols, names(ped_df)) - names(ped_df)[cols_to_ren[!is.na(cols_to_ren)]] <- - new_cols[!is.na(cols_to_ren)] - - ## Rename columns rel - old_cols <- as.vector(unlist(cols_ren_rel)) - new_cols <- names(cols_ren_rel) - cols_to_ren <- match(old_cols, names(rel_df)) - names(rel_df)[cols_to_ren[!is.na(cols_to_ren)]] <- - new_cols[!is.na(cols_to_ren)] - - ## Set family, id, dadid and momid to character - to_char <- c("family", "indId", "fatherId", "motherId") - to_char <- colnames(ped_df)[colnames(ped_df) %in% to_char] - ped_df[to_char] <- lapply(ped_df[to_char], as.character) - - ## Normalise the data before creating the object - if (normalize) { - ped_df <- norm_ped(ped_df, missid = missid) - rel_df <- norm_rel(rel_df, missid = missid) - } else { - cols_need <- c("id", "dadid", "momid", "sex") - cols_to_use <- c("steril", "avail", "family", "status", "affected") - ped_df <- check_columns( - ped_df, cols_need, "", cols_to_use, - others_cols = TRUE, cols_to_use_init = TRUE - ) - cols_need <- c("id1", "id2", "code") - cols_to_use <- c("family") - rel_df <- check_columns( - rel_df, cols_need, "", cols_to_use, cols_to_use_init = TRUE - ) - } - if (any(!is.na(ped_df$error))) { - warning("The Pedigree informations are not valid.", - "Here is the normalised Pedigree informations", - "with the identified problems" - ) - return(ped_df) - } - - if (any(!is.na(rel_df$error))) { - warning("The relationship informations are not valid.", - "Here is the normalised relationship informations", - "with the identified problems" - ) - return(rel_df) - } - - rownames(ped_df) <- ped_df$id - ## Create the object - ped <- new("Pedigree", ped = ped_df, rel = rel_df, - scales = scales, hints = hints - ) - - generate_colors(ped, col_aff = col_aff, ...) -} -) diff --git a/R/pedigreeClass.R b/R/pedigreeClass.R deleted file mode 100644 index d7a039d7..00000000 --- a/R/pedigreeClass.R +++ /dev/null @@ -1,377 +0,0 @@ -#' @importFrom methods as new slot slot<- validObject -NULL - -#' S4 class to represent a pedigree. -#' -#' A pedigree is a ensemble of individuals linked to each other into -#' a family tree. -#' -#' They are created from a data.frame containing the individuals informations -#' and a relation ship data.frame for the special links between individuals. -#' A list of scales can be provided to create a legend. -#' To create a Pedigree object, use the function -#' [Pedigree()]. -#' -#' @slot ped A data.frame with the individuals informations. The minimum -#' columns required are 'id', 'dadid', 'momid' and 'sex'. Other columns can be -#' added to the data.frame and will be recognised by the functions. Some -#' errors can be detected by the validity function and some of them can be -#' corrected and others will be added to a dedicated column. -#' @slot rel A data.frame for the special relationship between -#' individuals. -#' The minimum columns required are 'id1', 'id2' and 'code'. -#' @slot scales A data.frame to use for the affection status. -#' This data.frame is generated by the function -#' [generate_aff_inds()] followed by -#' [generate_colors()]. -#' @slot hints List of two elements. -#' - **order** is a numeric vector with one element per subject in the -#' Pedigree. It determines the relative order of subjects within a sibship, as -#' well as the relative order of processing for the founder couples. (For this -#' latter, the female founders are ordered as though they were sisters). -#' - **spouse** is a matrix with one row per hinted marriage, usually -#' only a few marriages in a Pedigree will need an added hint, for -#' instance reverse the plot order of a husband/wife pair. -#' Each row contains the index of the left spouse, the right hand spouse, -#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). -#' -#' @return A Pedigree object. -#' @seealso [Pedigree()] -#' @docType class -#' @name Pedigree-class -#' @rdname Pedigree-class -#' @aliases Pedigree-class -#' @include validity.R -#' @include Pedigree.R -#' @export -setClass( - "Pedigree", - slots = c( - ped = "data.frame", - rel = "data.frame", - scales = "list", - hints = "list" - ) -) - -setValidity("Pedigree", is_valid) - -#### S4 Accessors #### - -#' @title Pedigree ped accessors -#' @param object A Pedigree object. -#' @return The slot `ped` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases ped,Pedigree-method -#' @export -setGeneric("ped", function(object){ - standardGeneric("ped") -}) - -setMethod("ped", signature(object = "Pedigree"), function(object) { - object@ped -}) - -setGeneric("ped<-", function(object, value) { - standardGeneric("ped<-") -}) - -setMethod("ped<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@ped <- value - validObject(object) - object -}) - -#' @description Pedigree rel accessors -#' @param object A Pedigree object. -#' @return The slot `rel` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases rel,Pedigree-method -#' @export -setGeneric("rel", function(object){ - standardGeneric("rel") -}) - -setMethod("rel", signature(object = "Pedigree"), function(object) { - object@rel -}) - -setGeneric("rel<-", function(object, value) { - standardGeneric("rel<-") -}) - -setMethod("rel<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@rel <- value - validObject(object) - object -}) - -#' @description Pedigree scales accessors -#' @param object A Pedigree object. -#' @return The slot `scales` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases scales,Pedigree-method -#' @export -setGeneric("scales", function(object){ - standardGeneric("scales") -}) - -setMethod("scales", signature(object = "Pedigree"), function(object) { - object@scales -}) - -setGeneric("scales<-", function(object, value) { - standardGeneric("scales<-") -}) - -setMethod("scales<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@scales <- value - validObject(object) - object -}) - -#' @description Pedigree hints accessors -#' @param object A Pedigree object. -#' @return The slot `hints` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases hints,Pedigree-method -#' @export -setGeneric("hints", function(object){ - standardGeneric("hints") -}) - -setMethod("hints", signature(object = "Pedigree"), function(object) { - object@hints -}) - -setGeneric("hints<-", function(object, value) { - standardGeneric("hints<-") -}) - -setMethod("hints<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@hints <- value - validObject(object) - object -}) - - -#### S4 methods #### - -#' @title Pedigree methods -#' @description Pedigree show method -#' @param object A Pedigree object. -#' @return A character vector with the informations about the object. -#' @rdname extract-methods -#' @aliases show,Pedigree-method -setMethod("show", signature(object = "Pedigree"), function(object) { - nb_fam <- length(levels(as.factor(object@ped$family))) - cat("Pedigree object with", nrow(object@ped), "individuals and", - nrow(object@rel), "special relationships across", nb_fam, "families", - fill = TRUE) -}) - -#' @description Pedigree summary method. -#' @param object A Pedigree object. -#' @return A character vector with the summary of the object. -#' @rdname extract-methods -#' @aliases summary,Pedigree-method -setMethod("summary", signature(object = "Pedigree"), function(object) { - cat("Pedigree object with", nrow(object@ped), "individuals", fill = TRUE) - print(summary(object@ped, maxsum = 5)) - cat("and", nrow(object@rel), "special relationships.", fill = TRUE) - print(summary(object@rel)) - cat("The filling scales columns are:", - levels(as.factor(object@scales$fill$column_values)), fill = TRUE - ) - cat("The border scale column are:", - levels(as.factor(object@scales$border$column)), fill = TRUE - ) -}) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @param ... Other arguments. -#' @rdname extract-methods -#' @return The slot `i` present in the Pedigree object. -setMethod("[[", c(x = "Pedigree", i = "ANY", j = "missing"), - function(x, i, j, ..., drop = TRUE) { - slot(x, i) - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param name A slot name. -#' @rdname extract-methods -#' @return The slot `name` present in the Pedigree object. -#' -setMethod("$", c(x = "Pedigree"), - function(x, name) { - slot(x, name) - } -) - -#' @description Replace parts of a Pedigree object -#' @param x A Pedigree object. -#' @param name A slot name. -#' @param value A vector of values to replace. -#' @rdname extract-methods -#' @return The Pedigree object with the slot `name` replaced by `value`. -setMethod("$<-", c(x = "Pedigree"), - function(x, name, value) { - slot(x, name) <- value - validObject(x) - x - } -) - -#' @description Subset the hints list based on the index given -#' @param hints A list of hints -#' @param index A vector of index -#' @return A list of hints subsetted -#' @rdname extract-methods -#' @aliases sub_sel_hints,Pedigree-method -#' @keywords internal -sub_sel_hints <- function(hints, index) { - if (!is.null(hints$order)) { - temp <- list(order = hints$order[index]) - } else { - temp <- list(order = NULL) - } - - if (!is.null(hints$spouse)) { - indx1 <- match(hints$spouse[, 1], index, nomatch = 0) - indx2 <- match(hints$spouse[, 2], index, nomatch = 0) - keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept - if (any(keep)) { - temp$spouse <- cbind(indx1[keep], indx2[keep], - hints$spouse[keep, 3] - ) - } - } else { - temp$spouse <- NULL - } - temp -} - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @rdname extract-methods -#' @return A Pedigree object subsetted. -setMethod("[", c(x = "Pedigree", i = "ANY", j = "ANY"), - function(x, i, j, drop = TRUE) { - if (is.factor(i)) { - i <- as.character(i) - } - if (is.character(i)) { - i <- which(x$ped$id %in% i) - } - ped_df <- x$ped[i, j, drop = drop] - allId <- unique(c(ped_df$id, ped_df$dadid, ped_df$momid)) - rel_df <- x$rel[x$rel$id1 %in% allId | x$rel$id2 %in% allId, ] - idx <- match(allId, ped_df$id, nomatch = 0) - sub_hints <- sub_sel_hints(x$hints, idx) - fill <- x$scales$fill[x$scales$fill$column_values %in% names(ped_df),] - border <- x$scales$border[x$scales$border$column %in% names(ped_df),] - scales <- list(fill = fill, border = border) - - new_ped <- Pedigree(ped_df, rel_df, scales, - hints = sub_hints, cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @return A Pedigree object subsetted. -#' @rdname extract-methods -setMethod("[", c(x = "Pedigree", i = "missing", j = "ANY"), - function(x, i, j, drop = TRUE) { - ped_df <- x$ped[, j, drop = drop] - new_ped <- Pedigree(ped_df, x$rel, x$scales, - cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @return A Pedigree object subsetted. -#' @rdname extract-methods -setMethod("[", c(x = "Pedigree", i = "ANY", j = "missing"), - function(x, i, j, drop = TRUE) { - if (is.factor(i)) { - i <- as.character(i) - } - if (is.character(i)) { - i <- which(x$ped$id %in% i) - } - ped_df <- x$ped[i,] - allId <- unique(c(ped_df$id, ped_df$dadid, ped_df$momid)) - rel_df <- x$rel[x$rel$id1 %in% allId & x$rel$id2 %in% allId, ] - idx <- match(allId, ped_df$id, nomatch = 0) - sub_hints <- sub_sel_hints(x$hints, idx) - new_ped <- Pedigree(ped_df, rel_df, x$scales, - hints = sub_hints, cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' Convert a Pedigree object to a data.frame -#' @param x A Pedigree object. -#' @return A data.frame with the individuals informations. -#' @docType methods -#' @aliases as.data.frame,Pedigree-method -#' @export -setMethod("as.data.frame", c(x = "Pedigree"), - function(x) { - x$ped - } -) - -#' Convert a Pedigree object to a list -#' @param x A Pedigree object. -#' @return A list with all the slots of the Pedigree object. -#' @docType methods -#' @aliases as.list,Pedigree-method -#' @export -setMethod("as.list", c(x = "Pedigree"), - function(x) { - list( - ped = x$ped, - rel = x$rel, - scales = x$scales, - hints = x$hints - ) - } -) - -#' Compute the length of a Pedigree object -#' @param x A Pedigree object. -#' @return The number of individuals in the Pedigree object. -#' @docType methods -#' @aliases length,Pedigree-method -#' @export -setMethod("length", c(x = "Pedigree"), - function(x) { - nrow(x$ped) - } -) \ No newline at end of file diff --git a/R/plot.R b/R/plot.R index e6879791..3bc23e65 100644 --- a/R/plot.R +++ b/R/plot.R @@ -8,33 +8,43 @@ NULL #' Plot Pedigrees #' #' @description -#' plot objects created with the Pedigree function +#' This function is used to plot a Pedigree object. +#' +#' It is a wrapper for [plot_fromdf()] and [ped_to_plotdf()] as well as +#' [ped_to_legdf()] if `legend = TRUE`. #' #' @details #' Two important parameters control the looks of the result. One is the user #' specified maximum width. The smallest possible width is the maximum number #' of subjects on a line, if the user's suggestion is too low it is -#' increased to 1 + that amount (to give just a little wiggle room). To make a -#' Pedigree where all children are centered under parents simply make the width -#' large enough, however, the symbols may get very small. +#' increased to 1 + that amount (to give just a little wiggle room). +#' +#' To make a Pedigree where all children are centered under parents simply +#' make the width large enough, however, the symbols may get very small. #' #' The second is `align`, a vector of 2 alignment parameters `a` and #' `b`. #' For each set of siblings at a set of locations `x` and with parents at -#' `p=c(p1,p2)` the alignment penalty is \deqn{(1/k^a)\sum{i=1}{k} [(x_i - -#' (p1+p2)/2)]^2} sum(x- mean(p))^2/(k^a) where k is the number of siblings in -#' the set. +#' `p=c(p1,p2)` the alignment penalty is +#' +#' \deqn{(1/k^a)\sum{i=1}{k} [(x_i - (p1+p2)/2)]^2} +#' +#' \deqn{\sum(x- \overline(p))^2/(k^a)} +#' +#' Where k is the number of siblings in the set. +#' #' When `a = 1` moving a sibship with `k` sibs one unit to the #' left or right of optimal will incur the same cost as moving one with #' only 1 or two sibs out of place. -#' If `a=0` then large sibships are harder to move than small ones, +#' +#' If `a = 0` then large sibships are harder to move than small ones, #' with the default value `a = 1.5` they are slightly easier to move #' than small ones. The rationale for the default is as long as the parents #' are somewhere between the first and last siblings the result looks fairly -#' good, so we are more flexible with the spacing of a large family. By -#' tethering all the sibs to a single spot they are kept close to each other. +#' good, so we are more flexible with the spacing of a large family. +#' By tethering all the sibs to a single spot they are kept close to each other. #' The alignment penalty for spouses is \eqn{b(x_1 - x_2)^2}{b *(x1-x2)^2}, -#' which tends to keep them together. The size of `b` controls the relative +#' which tends to keep them together. The size of `b` controls the relative #' importance of sib-parent and spouse-spouse closeness. #' #' @param x A Pedigree object. @@ -46,7 +56,7 @@ NULL #' @param fam_to_plot default=1. If the Pedigree contains multiple families, #' this parameter can be used to select which family to plot. #' It can be a numeric value or a character value. If numeric, it is the -#' index of the family to plot returned by `unique(x$ped$family)`. +#' index of the family to plot returned by `unique(x$ped$famid)`. #' If character, it is the family id to plot. #' @param legend default=FALSE. If TRUE, a legend will be added to the plot. #' @param leg_cex default=0.8. Controls the size of the legend text. @@ -79,6 +89,7 @@ NULL #' @include plot_fromdf.R #' @aliases plot.Pedigree #' @aliases plot,Pedigree +#' @keywords Pedigree-plot #' @export #' @docType methods setMethod("plot", c(x = "Pedigree", y = "missing"), @@ -89,10 +100,7 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), legend = FALSE, leg_cex = 0.8, leg_symbolsize = 0.5, leg_loc = NULL, leg_adjx = 0, leg_adjy = 0, ... ) { - lst <- ped_to_plotdf(x, packed, width, align, subreg, - cex, symbolsize, pconnect, branch, aff_mark, label, ... - ) - famlist <- unique(x$ped$family) + famlist <- unique(famid(x)) if (length(famlist) > 1) { message("Multiple families present, only plotting family ", fam_to_plot @@ -100,9 +108,13 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), if (is.numeric(fam_to_plot)) { fam_to_plot <- famlist[fam_to_plot] } - lst <- lst[[fam_to_plot]] + x <- x[famid(x) == fam_to_plot] } + lst <- ped_to_plotdf(x, packed, width, align, subreg, + cex, symbolsize, pconnect, branch, aff_mark, label, ... + ) + p <- plot_fromdf(lst$df, usr = lst$par_usr$usr, title = title, ggplot_gen = ggplot_gen, boxw = lst$par_usr$boxw, boxh = lst$par_usr$boxh @@ -121,14 +133,14 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), wh_fr[3] + 0.1, wh_fr[3] + 0.4 ) } - leg$leg_df$x0 <- scales::rescale(leg$leg_df$x0, + leg$df$x0 <- scales::rescale(leg$df$x0, c(leg_loc[1], leg_loc[2]) ) - leg$leg_df$y0 <- scales::rescale(leg$leg_df$y0, + leg$df$y0 <- scales::rescale(leg$df$y0, c(leg_loc[3], leg_loc[4]) ) clip(leg_loc[1] - 1, leg_loc[2] + 1, leg_loc[3] - 1, leg_loc[4] + 1) - plot_fromdf(leg$leg_df, add_to_existing = TRUE, + plot_fromdf(leg$df, add_to_existing = TRUE, boxw = lst$par_usr$boxw * leg_symbolsize, boxh = lst$par_usr$boxh * leg_symbolsize ) diff --git a/R/plot_fct.R b/R/plot_fct.R index 547ec5ac..96729369 100644 --- a/R/plot_fct.R +++ b/R/plot_fct.R @@ -2,16 +2,16 @@ #' @importFrom graphics polygon frame segments NULL -#' Routine to subset a Pedigree +#' Subset a region of a Pedigree #' -#' @param subreg 4-element vector for (min x, max x, min depth, max depth), +#' @param subreg A 4-element vector for (min x, max x, min depth, max depth), #' used to edit away portions of the plot coordinates returned by #' [align()]. #' This is useful for zooming in on a particular region of the Pedigree. #' @inheritParams findspouse #' -#' @return a Pedigree -#' @keywords internal +#' @return A Pedigree structure with the specified region +#' @keywords internal, Pedigree-plot subregion <- function(plist, subreg) { if (subreg[3] < 1 || subreg[4] > length(plist$n)) { stop("Invalid depth indices in subreg") @@ -55,7 +55,9 @@ subregion <- function(plist, subreg) { } n <- max(n2) - out <- list(n = n2[seq_len(n)], nid = nid2[, seq_len(n), drop = FALSE], + out <- list( + n = n2[seq_len(n)], + nid = nid2[, seq_len(n), drop = FALSE], pos = pos2[, seq_len(n), drop = FALSE], spouse = spouse2[, seq_len(n), drop = FALSE], fam = fam2[, seq_len(n), drop = FALSE] @@ -66,14 +68,23 @@ subregion <- function(plist, subreg) { out } # end subregion() -## Plotting function -#' Generate a circular element + +#' Circular element +#' +#' Create a list of x and y coordinates for a circle +#' with a given number of slices. #' -#' @param nslice number of slices in the circle +#' @param nslice Number of slices in the circle #' @param n Total number of points in the circle #' -#' @return a list of x and y coordinates -#' @keywords internal +#' @return A list of x and y coordinates per slice. +#' @keywords internal, Pedigree-plot +#' @examples +#' +#' circfun(1) +#' circfun(1, 10) +#' circfun(4, 50) +#' @export circfun <- function(nslice, n = 50) { nseg <- ceiling(n / nslice) # segments of arc per slice @@ -89,15 +100,25 @@ circfun <- function(nslice, n = 50) { } -## Doc: polyfun -#' Generate a polygonal element +#' Polygonal element #' -#' @param nslice number of slices in the polygon +#' Create a list of x and y coordinates for a polygon +#' with a given number of slices and a list of coordinates +#' for the polygon. +#' +#' @param nslice Number of slices in the polygon #' @param coor Element form which to generate the polygon #' containing x and y coordinates and theta #' #' @return a list of x and y coordinates -#' @keywords internal +#' @keywords internal, Pedigree-plot +#' @examples +#' polyfun(2, list( +#' x = c(-0.5, -0.5, 0.5, 0.5), +#' y = c(-0.5, 0.5, 0.5, -0.5), +#' theta = -c(3, 5, 7, 9) * pi / 4 +#' )) +#' @export polyfun <- function(nslice, coor) { # make the indirect segments view zmat <- matrix(0, ncol = 4, nrow = length(coor$x)) @@ -140,13 +161,22 @@ polyfun <- function(nslice, coor) { out } -#' Create a list of the different polygonal elements +#' List of polygonal elements +#' +#' Create a list of polygonal elements with x, y coordinates +#' and theta for the square, circle, diamond and triangle. +#' The number of slices in each element can be specified. #' -#' @param nslice number of slices in each element +#' @param nslice Number of slices in each element +#' If nslice > 1, the elements are created with [polyfun()]. #' #' @return a list of polygonal elements with x, y coordinates -#' and theta -#' @keywords internal +#' and theta by slice. +#' @keywords internal, Pedigree-plot +#' @examples +#' polygons() +#' polygons(4) +#' @export polygons <- function(nslice = 1) { if (nslice == 1) { polylist <- list( @@ -195,20 +225,21 @@ polygons <- function(nslice = 1) { #'@importFrom ggplot2 geom_polygon aes annotate NULL -#' Draw segments for a Pedigree +#' Draw segments #' #' @param x0 x coordinate of the first point #' @param y0 y coordinate of the first point #' @param x1 x coordinate of the second point #' @param y1 y coordinate of the second point #' @param p ggplot object -#' @param ggplot_gen logical, if TRUE add the segments to the ggplot object -#' @param col line color -#' @param lwd line width -#' @param lty line type +#' @param ggplot_gen If TRUE add the segments to the ggplot object +#' @param col Line color +#' @param lwd Line width +#' @param lty Line type #' -#' @return Plot the segments or add it to a ggplot object -#' @keywords internal +#' @return Plot the segments to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_segment <- function( x0, y0, x1, y1, p, ggplot_gen, @@ -223,62 +254,71 @@ draw_segment <- function( p } -#' Draw a polygon for a Pedigree +#' Draw a polygon #' #' @param x x coordinates #' @param y y coordinates -#' @param fill fill color -#' @param border border color -#' @param density density of shading -#' @param angle angle of shading +#' @param fill Fill color +#' @param border Border color +#' @param density Density of shading +#' @param angle Angle of shading #' @inheritParams draw_segment #' -#' @return Plot the polygon or add it to a ggplot object -#' @keywords internal +#' @return Plot the polygon to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_polygon <- function( x, y, p, ggplot_gen = FALSE, fill = "grey", border = NULL, density = NULL, angle = 45 ) { - polygon(x, y, col = fill, border = border, density = density, angle = angle) + polygon( + x, y, col = fill, border = border, + density = density, angle = angle + ) if (ggplot_gen) { - p <- p + geom_polygon(aes(x = x, y = y), fill = fill, color = border) + p <- p + + geom_polygon( + aes(x = x, y = y), fill = fill, color = border + ) # To add pattern stripes use ggpattern::geom_polygon_pattern # pattern_density = density[i], pattern_angle = angle[i])) } p } -#' Draw text for a Pedigree +#' Draw texts #' -#' @param label text to be displayed -#' @param cex character expansion of the text -#' @param col text color +#' @param label Text to be displayed +#' @param cex Character expansion of the text +#' @param col Text color #' @param adjx x adjustment #' @param adjy y adjustment #' @inheritParams draw_segment #' @inheritParams draw_polygon #' -#' @return Plot the text or add it to a ggplot object -#' @keywords internal +#' @return Plot the text to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_text <- function(x, y, label, p, ggplot_gen = FALSE, cex = 1, col = NULL, adjx = 0, adjy = 0 ) { text(x, y, label, cex = cex, col = col, adj = c(adjx, adjy)) if (ggplot_gen) { p <- p + annotate( - "text", x = x, y = y, label = label, size = cex / 0.3, color = col + "text", x = x, y = y, label = label, + size = cex / 0.3, color = col ) } p } -## Doc: 4 arcs for multiple instances of subj -#' Draw arcs for multiple instances of a subject +#' Draw arcs #' #' @inheritParams draw_segment #' -#' @return Plot the arcs or add it to a ggplot object -#' @keywords internal +#' @return Plot the arcs to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_arc <- function(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black" ) { @@ -293,16 +333,17 @@ draw_arc <- function(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, #' Set plotting area #' -#' @param cex character expansion of the text -#' @param maxlev maximum level -#' @param xrange range of x values -#' @param symbolsize size of the symbols -#' @param ... other arguments passed to [par()] -#' @inheritParams is_parent +#' @param id A character vector with the identifiers of each individuals +#' @param cex Character expansion of the text +#' @param maxlev Maximum level +#' @param xrange Range of x values +#' @param symbolsize Size of the symbols +#' @param ... Other arguments passed to [par()] #' -#' @return a list of user coordinates, old par, box width, box height, +#' @return List of user coordinates, old par, box width, box height, #' label height and leg height -#' @keywords internal +#' +#' @keywords internal, Pedigree-plot set_plot_area <- function(cex, id, maxlev, xrange, symbolsize, ...) { old_par <- par(xpd = TRUE, ...) ## took out mar=mar psize <- par("pin") # plot region in inches diff --git a/R/plot_fromdf.R b/R/plot_fromdf.R index a72fa416..3774d7c9 100644 --- a/R/plot_fromdf.R +++ b/R/plot_fromdf.R @@ -6,9 +6,10 @@ NULL #' Create a plot from a data.frame #' #' @description -#' This function is used to create a plot from a data.frame. If ggplot_gen is -#' set to TRUE, the plot will be generated with ggplot2 and will be returned -#' invisibly. +#' This function is used to create a plot from a data.frame. +#' +#' If `ggplot_gen = TRUE`, the plot will be generated with ggplot2 and +#' will be returned invisibly. #' #' @param df A data.frame with the following columns: #' - `type`: The type of element to plot. Can be `text`, @@ -36,17 +37,22 @@ NULL #' @param title The title of the plot. #' @param add_to_existing If `TRUE`, the plot will be added to the current #' plot. -#' @param boxh Height of the legend boxes -#' @param boxw Width of the legend boxes +#' @param boxh Height of the polygons elements +#' @param boxw Width of the polygons elements #' @inheritParams draw_segment #' @inheritParams ped_to_plotdf #' @include plot_fct.R +#' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == 1,]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) #' lst <- ped_to_plotdf(ped1) -#' #plot_fromdf(lst$df, lst$usr) +#' #plot_fromdf(lst$df, lst$par_usr$usr, +#' # boxw = lst$par_usr$boxw, boxh = lst$par_usr$boxh +#' #) +#' #' @return an invisible ggplot object and a plot on the current plotting device +#' @keywords internal, Pedigree-plot #' @export plot_fromdf <- function( df, usr = NULL, title = NULL, ggplot_gen = FALSE, boxw = 1, diff --git a/R/shrink.R b/R/shrink.R index 86aadeae..432b4b29 100644 --- a/R/shrink.R +++ b/R/shrink.R @@ -6,6 +6,9 @@ #' Pedigree condensed to a minimally informative size for algorithms or testing #' that are limited by size of the Pedigree. #' +#' If **avail** or **affected** are `NULL`, they are extracted with their +#' corresponding accessors from the Ped object. +#' #' @details #' Iteratively remove subjects from the Pedigree. The random removal of members #' was previously controlled by a seed argument, but we remove this, forcing @@ -14,11 +17,13 @@ #' Next, available terminal subjects with unknown phenotype if both parents #' available. Last, iteratively shrinks Pedigrees by preferentially removing #' individuals (chosen at random if there are multiple of the same status): +#' #' 1. Subjects with unknown affected status #' 2. Subjects with unaffected affected status #' 3. Affected subjects. #' -#' @inheritParams align +#' @param obj A Pedigree or Ped object. +#' @inheritParams Ped #' @inheritParams is_informative #' @param max_bits Optional, the bit size for which to shrink the Pedigree #' @@ -34,119 +39,150 @@ #' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == '1',]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == '1',]) #' shrink(ped1, max_bits = 12) #' -#' @author Original by Dan Schaid, updated by Jason Sinnwell +#' @author Original by Dan Schaid, +#' updated by Jason Sinnwell and Louis Le Nézet +#' @keywords shrink #' @seealso [Pedigree()], [bit_size()] #' @export -shrink <- function( - ped, avail = ped(ped)$avail, affected = ped(ped)$affected, max_bits = 16 -) { - if (any(is.na(avail))) { - stop("NA values not allowed in avail vector.") +#' @usage NULL +setGeneric("shrink", signature = "obj", + function(obj, ...) standardGeneric("shrink") +) + +#' @rdname shrink +#' @export +setMethod("shrink", "Pedigree", + function(obj, avail = NULL, affected = NULL, max_bits = 16) { + lst_trim <- shrink(ped(obj), + avail = avail, + affected = affected, + max_bits = max_bits + ) + all_ids <- id(lst_trim$pedObj) + lst_trim$pedObj <- subset( + obj, id(ped(obj)) %in% all_ids, del_parents = TRUE + ) + lst_trim } +) - id_trim <- numeric() - id_lst <- list() - n_origin <- length(ped(ped)$id) +#' @rdname shrink +#' @export +setMethod("shrink", "Ped", + function(obj, avail = NULL, affected = NULL, max_bits = 16) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + affected <- affected(obj) + } + if (any(is.na(avail))) { + stop("NA values not allowed in avail vector.") + } - bitsize_old <- bit_size(ped)$bit_size + id_trim <- numeric() + id_lst <- list() + n_origin <- length(obj) - ## first find unavailable subjects to remove anyone who is not available - ## and does not have an available descendant + bitsize_old <- bit_size(obj)$bit_size - id_trim_unav <- find_unavailable(ped, avail) + ## first find unavailable subjects to remove anyone who is not available + ## and does not have an available descendant + id_trim_unav <- find_unavailable(obj, avail) - if (length(id_trim_unav)) { - ped_trim <- trim(ped, id_trim_unav) - avail <- avail[match(ped_trim$ped$id, ped(ped)$id)] - id_trim <- c(id_trim, id_trim_unav) - id_lst$unavail <- id_trim_unav + if (length(id_trim_unav)) { + ped_trim <- subset( + obj, id_trim_unav, keep = FALSE, del_parents = TRUE + ) + avail <- avail[match(id(ped_trim), id(obj))] + id_trim <- c(id_trim, id_trim_unav) + id_lst$unavail <- id_trim_unav - } else { - ## no trimming, reset to original ped - ped_trim <- ped - } + } else { + ## no trimming, reset to original ped + ped_trim <- obj + } - ## Next trim any available terminal subjects with unknown phenotype but - ## only if both parents are available + ## Next trim any available terminal subjects with unknown phenotype but + ## only if both parents are available - ## added n_new>0 check because no need to trim anymore if empty ped + ## added n_new>0 check because no need to trim anymore if empty ped - n_chg <- 1 - id_lst$noninform <- NULL - n_new <- length(ped_trim$ped$id) + n_chg <- 1 + id_lst$noninform <- NULL + n_new <- length(ped_trim) - while (n_chg > 0 && n_new > 0) { - n_old <- length(ped_trim$ped$id) + while (n_chg > 0 && n_new > 0) { + n_old <- length(ped_trim) - ## find_avail_noninform finds non-informative, - ## but after suggesting their removal, - ## checks for more unavailable subjects before returning - id_trim_noninf <- find_avail_noninform(ped_trim, avail) + ## find_avail_noninform finds non-informative, + ## but after suggesting their removal, + ## checks for more unavailable subjects before returning + id_trim_noninf <- find_avail_noninform(ped_trim, avail) - if (length(id_trim_noninf)) { - ped_new <- trim(ped_trim, id_trim_noninf) - avail <- avail[match(ped_new$ped$id, ped_trim$ped$id)] - id_trim <- c(id_trim, id_trim_noninf) - id_lst$noninform <- c(id_lst$noninform, id_trim_noninf) - ped_trim <- ped_new + if (length(id_trim_noninf)) { + ped_new <- subset(ped_trim, id_trim_noninf, keep = FALSE) + avail <- avail[match(id(ped_new), id(ped_trim))] + id_trim <- c(id_trim, id_trim_noninf) + id_lst$noninform <- c(id_lst$noninform, id_trim_noninf) + ped_trim <- ped_new + } + n_new <- length(ped_trim) + n_chg <- n_old - n_new } - n_new <- length(ped_trim$ped$id) - n_chg <- n_old - n_new - } - - ## Determine number of subjects & bit_size after initial trimming - n_inter <- length(ped_trim$ped$id) - - bit_size <- bit_size(ped_trim)$bit_size - ## Now sequentially shrink to fit bit_size <= max_bits + ## Determine number of subjects & bit_size after initial trimming + n_inter <- length(ped_trim) - bitsize_vec <- c(bitsize_old, bit_size) + bit_size <- bit_size(ped_trim)$bit_size - is_trim <- TRUE - id_lst$affect <- NULL + ## Now sequentially shrink to fit bit_size <= max_bits - while (is_trim && (bit_size > max_bits)) { - ## First, try trimming by unknown status - save <- find_avail_affected(ped_trim, avail, affstatus = NA) - is_trim <- save$is_trim - - ## Second, try trimming by unaffected status if no unknowns to trim - if (!is_trim) { - save <- find_avail_affected(ped_trim, avail, affstatus = 0) - is_trim <- save$is_trim - } + bitsize_vec <- c(bitsize_old, bit_size) + is_trim <- TRUE + id_lst$affect <- NULL - ## Third, try trimming by affected status if no unknowns & no - ## unaffecteds to trim - if (!is_trim) { - save <- find_avail_affected(ped_trim, avail, affstatus = 1) + while (is_trim && (bit_size > max_bits)) { + ## First, try trimming by unknown status + save <- find_avail_affected(ped_trim, avail, affstatus = NA) is_trim <- save$is_trim - } - if (is_trim) { - ped_trim <- save$ped - avail <- save$new_avail - bit_size <- save$bit_size - bitsize_vec <- c(bitsize_vec, bit_size) - id_trim <- c(id_trim, save$id_trim) - id_lst$affect <- c(id_lst$affect, save$id_trim) + ## Second, try trimming by unaffected status if no unknowns to trim + if (!is_trim) { + save <- find_avail_affected(ped_trim, avail, affstatus = 0) + is_trim <- save$is_trim + } + + ## Third, try trimming by affected status if no unknowns & no + ## unaffecteds to trim + if (!is_trim) { + save <- find_avail_affected(ped_trim, avail, affstatus = 1) + is_trim <- save$is_trim + } + + if (is_trim) { + ped_trim <- save$ped + avail <- save$new_avail + bit_size <- save$bit_size + bitsize_vec <- c(bitsize_vec, bit_size) + id_trim <- c(id_trim, save$id_trim) + id_lst$affect <- c(id_lst$affect, save$id_trim) + } } - } - ## end while (is_trim) & (bit_size > max_bits) + ## end while (is_trim) & (bit_size > max_bits) - n_final <- length(ped_trim$ped$id) + n_final <- length(ped_trim) - obj <- list(pedObj = ped_trim, id_trim = id_trim, id_lst = id_lst, - bit_size = bitsize_vec, avail = avail, pedSizeOriginal = n_origin, - pedSizeIntermed = n_inter, pedSizeFinal = n_final - ) + obj <- list( + pedObj = ped_trim, id_trim = id_trim, id_lst = id_lst, + bit_size = bitsize_vec, avail = avail, pedSizeOriginal = n_origin, + pedSizeIntermed = n_inter, pedSizeFinal = n_final + ) - obj -} - -TRUE \ No newline at end of file + obj + } +) diff --git a/R/trim.R b/R/trim.R deleted file mode 100644 index 7f62322e..00000000 --- a/R/trim.R +++ /dev/null @@ -1,30 +0,0 @@ -# Automatically generated from all.nw using noweb TODO add documentation -#' Trim a Pedigree -#' -#' Carries out the removal of the subjects identified from a Pedigree object. -#' -#' @inheritParams align -#' @inheritParams is_parent -#' @param id_rm Vector of ids to remove -#' -#' @return A Pedigree object with the subjects removed -#' -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' trim(ped1, "1_101") -#' @export -trim <- function(ped, id_rm, missid = "0") { - ## trim subjects from a Pedigree who match the removeID trim relation - ## matrix as well - rmidx <- match(id_rm, ped(ped)$id) - if (length(rmidx) > 0) { - ped(ped)[ped(ped)$dadid %in% id_rm | - ped(ped)$momid %in% id_rm, - c("dadid", "momid")] <- missid - ped[-rmidx, ] - } else { - ped - } -} -TRUE diff --git a/R/unrelated.R b/R/unrelated.R index c6e315d4..e631bf57 100644 --- a/R/unrelated.R +++ b/R/unrelated.R @@ -1,10 +1,7 @@ #' @importFrom stats runif NULL -# Automatically generated from all.nw using noweb Authors: Dan Schaid, Shannon -# McDonnell Updated by Jason Sinnwell - -#' Get unrelated subjects +#' Find Unrelated subjects #' #' @description #' Determine set of maximum number of unrelated available subjects from a @@ -13,7 +10,8 @@ NULL #' @details #' Determine set of maximum number of unrelated available subjects from a #' Pedigree, given vectors id, father, and mother for a Pedigree structure, and -#' status vector of T/F for whether each subject is available (e.g. has DNA) +#' status vector of `TRUE` / `FALSE` for whether each subject is +#' available (e.g. has DNA). #' #' This is a greedy algorithm that uses the kinship matrix, sequentially #' removing rows/cols that are non-zero for subjects that have the most number @@ -23,79 +21,101 @@ NULL #' zeros for rows, a random choice is made. Hence, running this function #' multiple times can return different sets of unrelated subjects. #' -#' @inheritParams align -#' @inheritParams is_informative +#' If **avail** is `NULL`, it is extracted with its +#' corresponding accessor from the Ped object. +#' +#' @inheritParams shrink #' #' @return A vector of the ids of subjects that are unrelated. #' @examples -#' data(sampleped) -#' fam1 <- sampleped[sampleped$family == 1, ] -#' #' +#' data(sampleped) +#' fam1 <- sampleped[sampleped$famid == 1, ] #' ped1 <- Pedigree(fam1) -#' -#' ## to see plot: -#' id1 <- unrelated(ped1) -#' -#' id1 +#' unrelated(ped1) #' ## some possible vectors #' ## [1] '110' '113' '133' '109' #' ## [1] '113' '118' '141' '109' #' ## [1] '113' '118' '140' '109' #' ## [1] '110' '113' '116' '109' #' ## [1] '113' '133' '141' '109' +#' +#' @author Dan Schaid and Shannon McDonnell updated by Jason Sinnwell +#' @export +#' @usage NULL +setGeneric("unrelated", signature = "obj", + function(obj, ...) standardGeneric("unrelated") +) + +#' @rdname unrelated #' @export -unrelated <- function(ped, avail = ped(ped)$avail) { - # Requires: kinship function +setMethod("unrelated", "Ped", + function(obj, avail = NULL) { + if (is.null(avail)) { + avail <- avail(obj) + } + # Requires: kinship function - # Given vectors id, father, and mother for a Pedigree structure, and avail - # = vector of T/F or 1/0 for whether each subject (corresponding to id - # vector) is available (e.g., has DNA available), determine set of maximum - # number of unrelated available subjects from a Pedigree. + # Given vectors id, father, and mother for a Pedigree structure, + # and avail = vector of T/F or 1/0 for whether each subject + # (corresponding to id vector) is available + # (e.g., has DNA available), determine set of maximum + # number of unrelated available subjects from a Pedigree. - # This is a greedy algorithm that uses the kinship matrix, sequentially - # removing rows/cols that are non-zero for subjects that have the most - # number of zero kinship coefficients (greedy by choosing a row of kinship - # matrix that has the most number of zeros, and then remove any cols and - # their corresponding rows that are non-zero. To account for ties of the - # count of zeros for rows, a random choice is made. Hence, running this - # function multiple times can return different sets of unrelated subjects. + # This is a greedy algorithm that uses the kinship matrix, sequentially + # removing rows/cols that are non-zero for subjects that have the most + # number of zero kinship coefficients + # (greedy by choosing a row of kinship matrix that has the most number + # of zeros, and then remove any cols and their corresponding rows that + # are non-zero). + # To account for ties of the count of zeros for rows, a random choice + # is made. + # Hence, running this function multiple times can return different + # sets of unrelated subjects. - id <- ped(ped)$id + id <- id(obj) - kin <- kinship(ped) + kin <- kinship(obj) - ord <- order(id) - id <- id[ord] - avail <- as.logical(avail[ord]) - kin <- kin[ord, ][, ord] + ord <- order(id) + id <- id[ord] + avail <- as.logical(avail[ord]) + kin <- kin[ord, ][, ord] - rord <- order(runif(nrow(kin))) + rord <- order(runif(nrow(kin))) - id <- id[rord] - avail <- avail[rord] - kin <- kin[rord, ][, rord] + id <- id[rord] + avail <- avail[rord] + kin <- kin[rord, ][, rord] - kin_avail <- kin[avail, , drop = FALSE][, avail, drop = FALSE] + kin_avail <- kin[avail, , drop = FALSE][, avail, drop = FALSE] - diag(kin_avail) <- 0 + diag(kin_avail) <- 0 - while (any(kin_avail > 0)) { - nr <- nrow(kin_avail) - indx <- seq_len(nrow(kin_avail)) - zero_count <- apply(kin_avail == 0, 1, sum) + while (any(kin_avail > 0)) { + nr <- nrow(kin_avail) + indx <- seq_len(nrow(kin_avail)) + zero_count <- apply(kin_avail == 0, 1, sum) - mx <- max(zero_count[zero_count < nr]) - zero_mx <- indx[zero_count == mx][1] + mx <- max(zero_count[zero_count < nr]) + zero_mx <- indx[zero_count == mx][1] - exclude <- indx[kin_avail[, zero_mx] > 0] + exclude <- indx[kin_avail[, zero_mx] > 0] - kin_avail <- kin_avail[-exclude, , drop = FALSE][ - , -exclude, drop = FALSE - ] + kin_avail <- kin_avail[-exclude, , drop = FALSE][ + , -exclude, drop = FALSE + ] + } + + sort(dimnames(kin_avail)[[1]]) } +) - sort(dimnames(kin_avail)[[1]]) -} -TRUE +#' @rdname unrelated +#' @export +setMethod("unrelated", "Pedigree", + function(obj, avail = NULL) { + unrelated(ped(obj), avail = avail) + } +) diff --git a/R/useful_inds.R b/R/useful_inds.R index aea76f38..948bcc25 100644 --- a/R/useful_inds.R +++ b/R/useful_inds.R @@ -1,65 +1,58 @@ -#' Compute the usefulness of individuals +#' Usefulness of individuals #' -#' @description Check for usefulness of individuals +#' @description Compute the usefulness of individuals #' #' @details Check for the informativeness of the individuals based on the #' informative parameter given, the number of children and the usefulness -#' of their parents. A `useful` column is added to the dataframe with the +#' of their parents. A `useful` slot is added to the Ped object with the #' usefulness of the individual. This boolean is hereditary. #' #' @param num_child_tot A numeric vector of the number of children of each #' individuals #' @param keep_infos Boolean to indicate if individuals with unknown status #' but available or reverse should be kept +#' @inheritParams Ped #' @inheritParams is_informative -#' @inheritParams num_child -#' @inheritParams kinship -#' @inheritParams is_parent #' #' @return #' ## When obj is a vector #' A vector of useful individuals identifiers #' -#' ## When obj is a Pedigree -#' The Pedigree object with a new column named 'useful' containing 1 for -#' useful individuals and 0 otherwise. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ped1 <- num_child(ped1) -#' useful_inds(ped1, informative = "AvAf")$ped +#' ## When obj is a Pedigree or Ped object +#' The Pedigree or Ped object with the slot 'useful' containing `TRUE` for +#' useful individuals and `FALSE` otherwise. +#' @keywords shrink #' @export +#' @usage NULL setGeneric("useful_inds", signature = "obj", function(obj, ...) standardGeneric("useful_inds") ) #' @include is_informative.R -#' @export #' @rdname useful_inds -#' @docType methods -#' @aliases useful_inds,character +#' @export setMethod("useful_inds", "character", function(obj, dadid, momid, avail, affected, num_child_tot, - informative = "AvAf", keep_infos = FALSE, missid = "0" + informative = "AvAf", keep_infos = FALSE ) { id <- obj # Get informative individuals id_inf <- is_informative(id, avail, affected, - informative, missid + informative ) - is_inf <- id %in% id_inf + isinf <- id %in% id_inf # Keep individual affected or available if (keep_infos) { - is_inf <- is_inf | + isinf <- isinf | (!is.na(affected) & affected == 1) | (!is.na(avail) & avail == 1) } # Check if parents participate to the Pedigree structure ped_part <- num_child_tot > 1 - to_kept <- is_inf | ped_part + to_kept <- isinf | ped_part num_ind_old <- 0 num_ind_new <- length(id[to_kept]) @@ -80,26 +73,45 @@ setMethod("useful_inds", "character", } ) -#' @docType methods -#' @aliases useful_inds,Pedigree -#' @export #' @rdname useful_inds #' @param reset Boolean to indicate if the `useful` column should be reset +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' ped(useful_inds(ped1, informative = "AvAf")) +#' @export setMethod("useful_inds", "Pedigree", function(obj, - informative = "AvAf", keep_infos = FALSE, - missid = "0", reset = FALSE + informative = "AvAf", keep_infos = FALSE, reset = FALSE ) { - cols_needed <- c( - "avail", "affected", "num_child_tot" + new_ped <- useful_inds(ped(obj), + informative, keep_infos, reset ) - check_columns(obj$ped, cols_needed, "", "", others_cols = TRUE) - useful <- useful_inds(obj$ped$id, obj$ped$dadid, obj$ped$momid, - obj$ped$avail, obj$ped$affected, obj$ped$num_child_tot, - informative, keep_infos, missid + + obj@ped <- new_ped + validObject(obj) + obj +}) + +#' @rdname useful_inds +#' @export +setMethod("useful_inds", "Ped", function(obj, + informative = "AvAf", keep_infos = FALSE, reset = FALSE +) { + useful <- useful_inds(id(obj), dadid(obj), momid(obj), + avail(obj), affected(obj), obj@num_child_tot, + informative, keep_infos ) - if (!reset) { - check_columns(obj$ped, NULL, "useful", NULL, others_cols = TRUE) + + if (!reset & any(!is.na(useful(obj)))) { + stop( + "The useful slot already has values in the Ped object", + " and reset is set to FALSE" + ) } - obj$ped$useful <- ifelse(obj$ped$id %in% useful, 1, 0) + obj@useful <- vect_to_binary( + ifelse(id(obj) %in% useful, 1, 0), logical = TRUE + ) + validObject(obj) obj }) diff --git a/R/utils.R b/R/utils.R index e26e2659..9cb63e56 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,7 +1,7 @@ #' @importFrom dplyr select one_of %>% NULL -#' Check for columns name usage +#' Check columns presence in a dataframe #' #' @description Check for presence / absence of columns names #' depending on their need @@ -38,8 +38,11 @@ NULL #' ColU1 = 'B', ColU2 = '1', #' ColTU1 = 'A', ColTU2 = 3, #' ColNR1 = 4, ColNR2 = 5) -#' tryCatch(check_columns(df, c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), -#' c('ColTU1', 'ColTU2')), error = function(e) print(e)) +#' tryCatch( +#' check_columns(df, +#' c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), +#' c('ColTU1', 'ColTU2') +#' ), error = function(e) print(e)) #' #' @keywords internal check_columns <- function( @@ -125,18 +128,18 @@ check_columns <- function( #'@importFrom stringr str_detect NULL -#' Check is numeric +#' Is numeric or NA #' #' @description Check if a variable given is numeric or NA #' -#' @details Check if the values in `var` are numeric or if they are -#' NA in the case that `na_as_num` is set to TRUE. +#' @details Check if the values in **var** are numeric or if they are +#' `NA` in the case that `na_as_num` is set to TRUE. #' #' @param var Vector of value to test #' @param na_as_num Boolean defining if the `NA` string should be #' considered as numerical values #' -#' @return A vector of boolean of the same size as `var` +#' @return A vector of boolean of the same size as **var** #' @keywords internal check_num_na <- function(var, na_as_num = TRUE) { # Should the NA value considered as numeric values @@ -150,66 +153,95 @@ check_num_na <- function(var, na_as_num = TRUE) { } -#' Check wich individuals are parents +#' Are individuals parents #' #' @description Check which individuals are parents. #' -#' @param id A vector of each subjects identifiers -#' @param dadid A vector containing for each subject, the identifiers of the -#' biologicals fathers. -#' @param momid vector containing for each subject, the identifiers of the -#' biologicals mothers. -#' @param missid The missing identifier value. Founders are the individuals with -#' no father and no mother in the Pedigree -#' (i.e. `dadid` and `momid` equal to the value of this variable). -#' The default for `missid` is `"0"`. -#' -#' @return A vector of boolean of the same size as `id` -#' with TRUE if the individual is a parent and FALSE otherwise +#' @param obj A vector of each subjects identifiers or a Ped object +#' @inheritParams Ped #' +#' @return A vector of boolean of the same size as **obj** +#' with TRUE if the individual is a parent and FALSE otherwise +#' @inheritParams Ped #' @keywords internal -is_parent <- function(id, dadid, momid, missid = "0") { - # determine subjects who are parents assume input of dadid/momid indices, - # not ids +#' @usage NULL +setGeneric("is_parent", signature = "obj", + function(obj, ...) standardGeneric("is_parent") +) - if (length(id) != length(dadid) | length(id) != length(momid)) { - stop("The length of the vectors are not the same") +#' @rdname is_parent +#' @examples +#' +#' is_parent(c("1", "2", "3", "4"), c("3", "3", NA, NA), c("4", "4", NA, NA)) +#' @export +setMethod("is_parent", "character_OR_integer", + function(obj, dadid, momid, missid = NA_character_) { + # determine subjects who are parents assume input of + # dadid/momid indices, not ids + + if (length(obj) != length(dadid) | length(obj) != length(momid)) { + stop("The length of the vectors are not the same") + } + + is_father <- !is.na(match(obj, unique(dadid[!dadid %in% missid]))) + is_mother <- !is.na(match(obj, unique(momid[!momid %in% missid]))) + is_father | is_mother } +) - is_father <- !is.na(match(id, unique(dadid[dadid != missid]))) - is_mother <- !is.na(match(id, unique(momid[momid != missid]))) - is_father | is_mother -} +#' @rdname is_parent +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' is_parent(ped(ped)) +#' @export +setMethod("is_parent", "Ped", + function(obj, missid = NA_character_) { + is_parent(id(obj), dadid(obj), momid(obj), missid) + } +) -#' Check wich individuals are founders +#' Are individuals founders #' #' @description Check which individuals are founders. #' -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A vector of boolean of the same size as `dadid` and `momid` -#' with TRUE if the individual has no parents (i.e is a founder) and FALSE -#' otherwise. +#' @return A vector of boolean of the same size as **dadid** and **momid** +#' with `TRUE` if the individual has no parents (i.e is a founder) and +#' `FALSE` otherwise. #' +#' @examples +#' is_founder(c("3", "3", NA, NA), c("4", "4", NA, NA)) #' @keywords internal -is_founder <- function(momid, dadid, missid = "0") { - (dadid == missid) & (momid == missid) +#' @export +is_founder <- function(momid, dadid, missid = NA_character_) { + (dadid %in% missid) & (momid %in% missid) } -#' Check wich individuals are disconnected +#' Are individuals disconnected #' #' @description Check which individuals are disconnected. #' #' @details An individuals is considered disconnected if the kinship with -#' all the other individuals is 0. +#' all the other individuals is `0`. #' -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A vector of boolean of the same size as `id` -#' with TRUE if the individual is disconnected and FALSE otherwise +#' @return A vector of boolean of the same size as **id** +#' with `TRUE` if the individual is disconnected and +#' `FALSE` otherwise #' #' @include kinship.R #' @keywords internal +#' @examples +#' is_disconnected( +#' c("1", "2", "3", "4", "5"), +#' c("3", "3", NA, NA, NA), +#' c("4", "4", NA, NA, NA) +#' ) +#' @export is_disconnected <- function(id, dadid, momid) { # check to see if any subjects are disconnected in Pedigree by checking for # kinship = 0 for all subjects excluding self @@ -221,13 +253,9 @@ is_disconnected <- function(id, dadid, momid) { #' @importFrom plyr revalue NULL -#' Transform a gender variable to an ordered factor +#' Gender variable to ordered factor #' -#' @param sex A character, factor or numeric vector corresponding to -#' the gender of the individuals. The following values are recognized: -#' - character() or factor() : "f", "m", "woman", "man", "male", "female", -#' "unknown", "terminated" -#' - numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +#' @inheritParams Ped #' #' @return an ordered factor vector containing the transformed variable #' "male" < "female" < "unknown" < "terminated" @@ -257,65 +285,61 @@ sex_to_factor <- function(sex) { #' @importFrom stringr str_remove_all NULL -#' Transform a relationship code variable to an ordered factor -#' -#' @param rel_code A character, factor or numeric vector corresponding to -#' the relation code of the individuals: -#' - MZ twin = Monozygotic twin -#' - DZ twin = Dizygotic twin -#' - UZ twin = twin of unknown zygosity -#' - Spouse = Spouse -#' The following values are recognized: -#' - character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with -#' of without space between the words. The case is not important. -#' - numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +#' Relationship code variable to ordered factor +#' +#' @inheritParams Rel #' #' @return an ordered factor vector containing the transformed variable #' "MZ twin" < "DZ twin" < "UZ twin" < "Spouse" #' @examples #' rel_code_to_factor(c(1, 2, 3, 4, "MZ twin", "DZ twin", "UZ twin", "Spouse")) #' @export -rel_code_to_factor <- function(rel_code) { - if (is.factor(rel_code) || is.numeric(rel_code)) { - rel_code <- as.character(rel_code) +rel_code_to_factor <- function(code) { + if (is.factor(code) || is.numeric(code)) { + code <- as.character(code) } ## Normalized difference notations for code code_equiv <- c( mztwin = "MZ twin", dztwin = "DZ twin", uztwin = "UZ twin", spouse = "Spouse", - `1` = "MZ twin", `2` = "DZ twin", `3` = "UZ twin", `4` = "Spouse" + "1" = "MZ twin", "2" = "DZ twin", "3" = "UZ twin", "4" = "Spouse" ) codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") - rel_code <- as.character(revalue(as.factor( + code <- as.character(revalue(as.factor( str_remove_all( - casefold(as.character(rel_code), upper = FALSE), + casefold(as.character(code), upper = FALSE), " " ) ), code_equiv, warn_missing = FALSE)) - rel_code <- factor(rel_code, codes, ordered = TRUE) - rel_code + code <- factor(code, codes, ordered = TRUE) + code } -TRUE -#' Transform a vector variable to binary vector +#' Vector variable to binary vector +#' +#' @description Transform a vector to a binary vector. +#' All values that are not `0`, `1`, `TRUE`, `FALSE`, or `NA` +#' are transformed to `NA`. #' #' @param vect A character, factor, logical or numeric vector corresponding to -#' a binary variable (i.e. 0 or 1). +#' a binary variable (i.e. `0` or `1`). #' The following values are recognized: #' - character() or factor() : "TRUE", "FALSE", "0", "1", "NA" will be -#' respectively transformed to 1, 0, 0, 1, NA. +#' respectively transformed to `1`, `0`, `0`, `1`, `NA`. #' Spaces and case are ignored. #' All other values will be transformed to NA. -#' - numeric() : 0 and 1 are kept, all other values are transformed to NA. -#' - logical() : TRUE and FALSE are tansformed to 1 and 0. -#' -#' @return numeric binary vector of the same size as `vect` with 0 and 1 +#' - numeric() : `0` and `1` are kept, all other values are transformed to NA. +#' - logical() : `TRUE` and `FALSE` are tansformed to `1` and `0`. +#' @param logical Boolean defining if the output should be a logical vector +#' instead of a numeric vector (i.e. `0` and `1` becomes `FALSE` and `TRUE). +#' @return numeric binary vector of the same size as **vect** +#' with `0` and `1` #' @examples #' vect_to_binary( #' c(0, 1, 2, 3.6, "TRUE", "FALSE", "0", "1", "NA", "B", TRUE, FALSE, NA) #' ) #' @export -vect_to_binary <- function(vect) { +vect_to_binary <- function(vect, logical = FALSE) { if (is.factor(vect) || is.numeric(vect) || is.logical(vect)) { vect <- as.character(vect) } @@ -330,6 +354,45 @@ vect_to_binary <- function(vect) { ), code_equiv, warn_missing = FALSE )) vect[!vect %in% c(0, 1)] <- NA - vect + if (logical) { + as.logical(vect) + } else { + vect + } +} + +#' Anchor variable to ordered factor +#' +#' @param anchor A character, factor or numeric vector corresponding to +#' the anchor of the individuals. The following values are recognized: +#' - character() or factor() : "0", "1", "2", "left", "right", "either" +#' - numeric() : 1 = "left", 2 = "right", 0 = "either" +#' +#' @return An ordered factor vector containing the transformed variable +#' "either" < "left" < "right" +#' @examples +#' anchor_to_factor(c(1, 2, 0, "left", "right", "either")) +#' @export +anchor_to_factor <- function(anchor) { + if (is.factor(anchor) || is.numeric(anchor)) { + anchor <- as.character(anchor) + } + ## Normalized difference notations for anchor + anchor_equiv <- c( + "0" = "either", "1" = "left", "2" = "right", + "left" = "left", "right" = "right", "either" = "either" + ) + anchor <- as.character(revalue(as.factor( + casefold(anchor, upper = FALSE) + ), anchor_equiv, warn_missing = FALSE)) + anchor_codes <- c("left", "right", "either") + if (any(!anchor %in% anchor_codes)) { + stop(paste( + "The following values are not recognized :", + paste0(unique(anchor[!anchor %in% anchor_codes]), collapse = ", "), + ".\n" + )) + } + + factor(anchor, anchor_codes, ordered = TRUE) } -TRUE diff --git a/R/validity.R b/R/validity.R deleted file mode 100644 index 19709715..00000000 --- a/R/validity.R +++ /dev/null @@ -1,219 +0,0 @@ -#' Print0 to max -#' -#' Print0 the elements inside a vector until a maximum is reached. -#' -#' @param x A vector. -#' @param max The maximum number of elements to print. -#' @param ... Additional arguments passed to print0 -#' -#' @return The character vector aggregated until the maximum is reached. -#' @keywords internal -paste0max <- function(x, max = 5, ...) { - if (length(x) > max) { - paste(paste0(unique(x[seq_len(max)]), collapse = ", ", ...), "...") - } else { - paste0(unique(x), collapse = ", ", ...) - } -} - -#' Check if the fields are present in an object slot -#' -#' @param obj An object. -#' @param slot A slot of object. -#' @param fields A character vector with the fields to check. -#' -#' @return A character vector with the errors if any. -#' @keywords internal -check_slot_fd <- function(obj, slot = NULL, fields = character()) { - if (is.object(obj)) { - obj <- as.list(obj) - } - if (is.data.frame(obj[[slot]])) { - array_names <- colnames(obj[[slot]]) - } else if (is.list(obj[[slot]])) { - array_names <- names(obj[[slot]]) - } else { - stop( - "Slot ", slot, " is not a data.frame or a list.", - class(obj[[slot]]), " found." - ) - } - if (length(array_names) == 0) { - paste0( - "Missing fields in ", slot, - " slot. See Pedigree documentation." - ) - } else if (any(!fields %in% array_names)) { - paste0( - "`", paste0max(fields[!fields %in% array_names]), - "`", " column(s) is not present in slot ", slot, "." - ) - } -} - -#' Check values in a slot -#' -#' Check if the all the values in a slot are in a vector of values. -#' -#' @param obj An object. -#' @param slot A slot of the object. -#' @param column A column of the slot. -#' @param values A vector of values to check. -#' @param present A logical value indicating if the values should be present -#' or not -#' -#' @return A character vector with the errors if any. -#' @keywords internal -check_values <- function(val, ref, present = TRUE) { - if (length(dim(val)) > 1) { - stop("val must be a vector") - } - - if (present) { - val_abs <- !val %in% ref - should <- " should be in " - } else { - val_abs <- val %in% ref - should <- " should not be in " - } - - if (any(val_abs)) { - paste0( - "Values ", paste0max(val[val_abs]), should, - paste0max(ref), "." - ) - } -} - -#' Pedigree validity method. -#' -#' Check if the Pedigree object is valid. -#' -#' It will check : -#' the fields of the slots -#' the values in the columns of the ped, rel and scale slot -#' @param object A Pedigree object. -#' @return A logical value or a character vector with the errors. -#' @keywords internal -is_valid <- function(object) { - missid <- "0" - errors <- c() - - #### Check that the slots have the right columns #### - ped_cols <- c( - "id", "dadid", "momid", "family", - "sex", "steril", "status", "avail", "affected" - ) - rel_cols <- c("id1", "id2", "code", "family") - fill_cols <- c( - "order", "column_values", "column_mods", "mods", - "labels", "affected", "fill", "density", "angle" - ) - border_cols <- c("column", "mods", "labels", "border") - errors <- c(errors, check_slot_fd(object, "ped", ped_cols)) - errors <- c(errors, check_slot_fd(object, "rel", rel_cols)) - errors <- c(errors, check_slot_fd(object, "scales", c("fill", "border"))) - errors <- c(errors, check_slot_fd(object$scales, "fill", fill_cols)) - errors <- c(errors, check_slot_fd(object$scales, "border", border_cols)) - - - #### Check that the ped columns have the right values #### - # Check for ped$id uniqueness - if (any(duplicated(object$ped$id))) { - errors <- c(errors, "Id in ped slot must be unique") - } - - # Control values for ped - errors <- c(errors, check_values(object$ped$id, missid, present = FALSE)) - errors <- c(errors, check_values( - object$ped$dadid, c(object$ped$id, missid) - )) - errors <- c(errors, check_values( - object$ped$momid, c(object$ped$id, missid) - )) - sex_code <- c("male", "female", "unknown", "terminated") - errors <- c(errors, check_values(object$ped$sex, sex_code)) - errors <- c(errors, check_values(object$ped$steril, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$status, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$avail, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$affected, c(0, 1, NA))) - - # Control sex for parents - id <- object$ped$id - momid <- object$ped$momid - dadid <- object$ped$dadid - sex <- object$ped$sex - is_dad <- id %in% dadid - is_mom <- id %in% momid - if (any(sex[is_dad] != "male")) { - errors <- c(errors, "Some dad are not male") - } - if (any(sex[is_mom] != "female")) { - errors <- c(errors, "Some mom are not female") - } - if (any( - (dadid %in% missid & (! momid %in% missid)) | - ((! dadid %in% missid) & momid %in% missid) - )) { - errors <- c(errors, "Individuals should have both parents or none") - } - - #### Check that the rel columns have the right values #### - codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") - errors <- c(errors, check_values(object$rel$code, codes)) - errors <- c(errors, check_values( - object$rel$family, c(object$ped$family, NA) - )) - errors <- c(errors, check_values(object$rel$id1, object$ped$id)) - errors <- c(errors, check_values(object$rel$id2, object$ped$id)) - - # Check if twins has same parents - code <- object$rel$code - ncode <- as.numeric(code) - id1 <- object$rel$id1 - id2 <- object$rel$id2 - temp1 <- match(id1, id, nomatch = 0) - temp2 <- match(id2, id, nomatch = 0) - if (any(ncode < 3)) { - twins <- (ncode < 3) - if (any(momid[temp1[twins]] != momid[temp2[twins]])) { - errors <- c(errors, "twins found with different mothers") - } - if (any(dadid[temp1[twins]] != dadid[temp2[twins]])) { - errors <- c(errors, "twins found with different fathers") - } - } - - # Check if the monozygote twins has same gender - if (any(ncode == 1)) { - mztwins <- (ncode == 1) - if (any(sex[temp1[mztwins]] != sex[temp2[mztwins]])) { - errors <- c(errors, "MZ twins with different genders") - } - } - - # Check that the scales columns have the right values - errors <- c(errors, check_values( - object$scales$fill$column_values, colnames(object$ped) - )) - errors <- c(errors, check_values( - object$scales$fill$column_mods, colnames(object$ped) - )) - errors <- c(errors, check_values( - object$scales$border$column, colnames(object$ped) - )) - - # Check that all modalities are present in the scales - for (col in unique(object$scales$column)){ - errors <- c(errors, check_values( - object$ped$col, - object$scales$fill[object$scales$fill$column_mods == col, "mods"] - )) - } - - if (length(errors) == 0) { - TRUE - } else { - errors - } -} diff --git a/data/sampleped.rda b/data/sampleped.rda index 56622175..d090493d 100644 Binary files a/data/sampleped.rda and b/data/sampleped.rda differ diff --git a/inst/extdata/sampleped.tab b/inst/extdata/sampleped.tab index 043253e7..d2c91583 100644 --- a/inst/extdata/sampleped.tab +++ b/inst/extdata/sampleped.tab @@ -1,4 +1,4 @@ -"family" "id" "dadid" "momid" "sex" "affected" "available" +"famid" "id" "dadid" "momid" "sex" "affection" "avail" "1" 1 101 0 0 1 0 0 "2" 1 102 0 0 2 1 0 "3" 1 103 135 136 1 1 0 diff --git a/inst/script/dataset.R b/inst/script/dataset.R index 3eacf151..e025ea23 100644 --- a/inst/script/dataset.R +++ b/inst/script/dataset.R @@ -7,8 +7,12 @@ sampleped <- read.delim("inst/extdata/sampleped.tab", header = TRUE, sep = " ", stringsAsFactors = FALSE ) -sampleped[c("family", "id", "dadid", "momid")] <- as.data.frame( - lapply(sampleped[c("family", "id", "dadid", "momid")], as.character) +sampleped[c("famid", "id", "dadid", "momid")] <- as.data.frame( + lapply(sampleped[c("famid", "id", "dadid", "momid")], as.character) +) +sampleped <- mutate_if( + sampleped, is.character, + ~replace(., . %in% "0", NA) ) summary(sampleped) usethis::use_data(sampleped, overwrite = TRUE) diff --git a/man/Hints-class.Rd b/man/Hints-class.Rd new file mode 100644 index 00000000..136d404a --- /dev/null +++ b/man/Hints-class.Rd @@ -0,0 +1,145 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Hints-class} +\alias{Hints-class} +\alias{Hints} +\alias{Hints,Hints,missing_OR_NULL-method} +\alias{Hints,list,missing_OR_NULL-method} +\alias{Hints,numeric,data.frame-method} +\alias{Hints,numeric,missing_OR_NULL-method} +\alias{Hints,missing_OR_NULL,missing_OR_NULL-method} +\alias{horder} +\alias{horder,Hints-method} +\alias{horder<-} +\alias{horder<-,Hints-method} +\alias{spouse} +\alias{spouse,Hints-method} +\alias{spouse<-,Hints,data.frame-method} +\alias{as.list,Hints-method} +\alias{subset,Hints-method} +\title{Hints object} +\usage{ +Hints(horder, spouse) + +\S4method{Hints}{list,missing_OR_NULL}(horder, spouse) + +\S4method{Hints}{numeric,data.frame}(horder, spouse) + +\S4method{Hints}{numeric,missing_OR_NULL}(horder, spouse) +} +\arguments{ +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} + +\item{spouse}{A data.frame with one row per hinted marriage, usually only +a few marriages in a pedigree will need an added hint, for instance reverse +the plot order of a husband/wife pair. +Each row contains the id of the left spouse (i.e. \code{idl}), the id of the +right hand spouse (i.e. \code{idr}), and the anchor (i.e : \code{anchor} : +\code{1} = left, \code{2} = right, \code{0} = either). +Children will preferentially appear under the parents of the anchored spouse.} +} +\value{ +A Hints object. +} +\description{ +The hints are used to specify the order of the individuals in the pedigree +and to specify the order of the spouses. + +\subsection{Constructor :}{ + +You either need to provide \strong{horder} or \strong{spouse} in +the dedicated parameters (together or separately), or inside a list. +} +} +\section{Slots}{ + +\describe{ +\item{\code{horder}}{A numeric named vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within +a sibship, as well as the relative order of processing for the founder +couples. (For this latter, the female founders are ordered as though they +were sisters).} + +\item{\code{spouse}}{A data.frame with one row per hinted marriage, usually +only a few marriages in a Pedigree will need an added hint, for +instance reverse the plot order of a husband/wife pair. +Each row contains the identifiers of the left spouse, the right hand spouse, +and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +}} + +\section{Accessors}{ + +\itemize{ +\item \code{horder(x)} : Get the horder vector +} + + +\itemize{ +\item \code{horder(x) <- value} : Set the horder vector +} + + +\itemize{ +\item \code{spouse(x)} : Get the spouse data.frame +} + + +\itemize{ +\item \code{spouse(x) <- value} : Set the spouse data.frame +} +} + +\section{Generics}{ + +\itemize{ +\item \code{as.list(x)}: Convert a Hints object to a list +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Hints object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +Hints( + list( + horder = c("1" = 1, "2" = 2, "3" = 3), + spouse = data.frame( + idl = c("1", "2"), + idr = c("2", "3"), + anchor = c(1, 2) + ) + ) +) + +Hints( + horder = c("1" = 1, "2" = 2, "3" = 3), + spouse = data.frame( + idl = c("1", "2"), + idr = c("2", "3"), + anchor = c(1, 2) + ) +) + +Hints( + horder = c("1" = 1, "2" = 2, "3" = 3) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Ped-class.Rd b/man/Ped-class.Rd new file mode 100644 index 00000000..1d2a91be --- /dev/null +++ b/man/Ped-class.Rd @@ -0,0 +1,333 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Ped-class} +\alias{Ped-class} +\alias{Ped} +\alias{Ped,data.frame-method} +\alias{Ped,character_OR_integer-method} +\alias{Ped,missing-method} +\alias{id} +\alias{id,Ped-method} +\alias{id<-} +\alias{id<-,Ped,character_OR_integer-method} +\alias{dadid} +\alias{dadid,Ped-method} +\alias{dadid<-} +\alias{dadid<-,Ped,character_OR_integer-method} +\alias{momid} +\alias{momid,Ped-method} +\alias{momid<-} +\alias{momid<-,Ped,character_OR_integer-method} +\alias{famid} +\alias{famid,Ped-method} +\alias{famid<-} +\alias{famid<-,Ped,character_OR_integer-method} +\alias{sex} +\alias{sex,Ped-method} +\alias{sex<-} +\alias{sex<-,Ped,character_OR_integer-method} +\alias{affected} +\alias{affected,Ped-method} +\alias{affected<-} +\alias{affected<-,Ped,numeric_OR_logical-method} +\alias{avail} +\alias{avail,Ped-method} +\alias{avail<-} +\alias{avail<-,Ped,numeric_OR_logical-method} +\alias{status} +\alias{status,Ped-method} +\alias{status<-} +\alias{status<-,Ped,numeric_OR_logical-method} +\alias{isinf} +\alias{isinf,Ped-method} +\alias{isinf<-} +\alias{isinf<-,Ped,numeric_OR_logical-method} +\alias{kin} +\alias{kin,Ped-method} +\alias{kin<-} +\alias{kin<-,Ped,numeric-method} +\alias{useful} +\alias{useful,Ped-method} +\alias{useful<-} +\alias{useful<-,Ped,numeric_OR_logical-method} +\alias{mcols<-,Ped,list-method} +\alias{mcols<-,Ped,data.frame-method} +\alias{summary,Ped-method} +\alias{show,Ped-method} +\alias{as.list,Ped-method} +\alias{as.data.frame,Ped-method} +\alias{subset,Ped-method} +\title{Ped object} +\usage{ +\S4method{Ped}{data.frame}(obj, cols_used_init = FALSE, cols_used_del = FALSE) + +\S4method{Ped}{character_OR_integer}( + obj, + sex, + dadid, + momid, + famid = NA, + steril = NA, + status = NA, + avail = NA, + affected = NA, + missid = NA_character_ +) +} +\arguments{ +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} + +\item{cols_used_init}{Boolean defining if the columns that will be used +should be initialised to NA.} + +\item{cols_used_del}{Boolean defining if the columns that will be used +should be deleted.} + +\item{sex}{A character, factor or numeric vector corresponding to +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: +\itemize{ +\item character() or factor() : "f", "m", "woman", "man", "male", "female", +"unknown", "terminated" +\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +}} + +\item{dadid}{A vector containing for each subject, the identifiers of the +biologicals fathers.} + +\item{momid}{A vector containing for each subject, the identifiers of the +biologicals mothers.} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} + +\item{steril}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{status}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} +} +\value{ +A Ped object. +} +\description{ +S4 class to represent the identity informations of the individuals +in a pedigree. + +\subsection{Constructor :}{ + +You either need to provide a vector of the same size for each slot +or a \code{data.frame} with the corresponding columns. + +The metadata will correspond to the columns that do not correspond +to the Ped slots. +} +} +\details{ +The minimal needed informations are \code{id}, \code{dadid}, \code{momid} and \code{sex}. +The other slots are used to store recognized informations. +Additional columns can be added to the Ped object and will be +stored in the \code{elementMetadata} slot of the Ped object. +} +\section{Slots}{ + +\describe{ +\item{\code{id}}{A character vector with the id of the individuals.} + +\item{\code{dadid}}{A character vector with the id of the father of the individuals.} + +\item{\code{momid}}{A character vector with the id of the mother of the individuals.} + +\item{\code{sex}}{An ordered factor vector for the sex of the individuals +(i.e. \code{male} < \code{female} < \code{unknown} < \code{terminated}).} + +\item{\code{famid}}{A character vector with the family identifiers of the +individuals (optional).} + +\item{\code{steril}}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{\code{status}}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{\code{avail}}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{\code{affected}}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = not affected, \code{TRUE} = affected, \code{NA} = unknown).} + +\item{\code{useful}}{A logical vector with the usefulness status of the +individuals +(i.e. \code{FALSE} = not useful, \code{TRUE} = useful).} + +\item{\code{isinf}}{A logical vector indicating if the individual is informative +or not +(i.e. \code{FALSE} = not informative, \code{TRUE} = informative).} + +\item{\code{kin}}{A numeric vector with minimal kinship value between the +individuals and the useful individuals.} + +\item{\code{num_child_tot}}{A numeric vector with the total number of children +of the individuals.} + +\item{\code{num_child_dir}}{A numeric vector with the number of children +of the individuals.} + +\item{\code{num_child_ind}}{A numeric vector with the number of children +of the individuals.} + +\item{\code{elementMetadata}}{A DataFrame with the additional metadata columns +of the Ped object.} + +\item{\code{metadata}}{Meta informations about the pedigree.} +}} + +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Ped object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. For each getter, you have a setter with the +same name, to be use as \code{slot(x) <- value}. +The \code{value} parameter is a vector of the same length as \code{x}, except +for the \code{mcols()} accessors where \code{value} is a list or a data.frame with +each elements with the same length as \code{x}. + + +\itemize{ +\item \code{id(x)} : Individuals identifiers +} + + +\itemize{ +\item \code{dadid(x)} : Individuals' father identifiers +} + + +\itemize{ +\item \code{momid(x)} : Individuals' mother identifiers +} + + +\itemize{ +\item \code{famid(x)} : Individuals' family identifiers +} + + +\itemize{ +\item \code{sex(x)} : Individuals' gender +} + + +\itemize{ +\item \code{affected(x)} : Individuals' affection status +} + + +\itemize{ +\item \code{avail(x)} : Individuals' availability status +} + + +\itemize{ +\item \code{status(x)} : Individuals' death status +} + + +\itemize{ +\item \code{isinf(x)} : Individuals' informativeness status +} + + +\itemize{ +\item \code{kin(x)} : Individuals' kinship distance to the +informative individuals +} + + +\itemize{ +\item \code{useful(x)} : Individuals' usefullness status +} + + +\itemize{ +\item \code{mcols(x)} : Individuals' metadata +} +} + +\section{Generics}{ + +\itemize{ +\item \code{summary(x)}: Compute the summary of a Ped object +} + + +\itemize{ +\item \code{show(x)}: Convert the Ped object to a data.frame +and print it with its summary. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Ped object to a list with +the metadata columns at the end. +} + + +\itemize{ +\item \code{as.data.frame(x)}: Convert a Ped object to a data.frame with +the metadata columns at the end. +} + + +\itemize{ +\item \code{subset(x, i, del_parents = FALSE, keep = TRUE)}: Subset a Ped object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{del_parents} : A logical value indicating if the parents +of the individuals should be deleted. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +data(sampleped) +Ped(sampleped) + +Ped( + obj = c("1", "2", "3", "4", "5", "6"), + dadid = c("4", "4", "6", "0", "0", "0"), + momid = c("5", "5", "5", "0", "0", "0"), + sex = c(1, 2, 3, 1, 2, 1), + missid = "0" +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Pedigree-class.Rd b/man/Pedigree-class.Rd index 3790495f..215b6b92 100644 --- a/man/Pedigree-class.Rd +++ b/man/Pedigree-class.Rd @@ -1,55 +1,462 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R \docType{class} \name{Pedigree-class} \alias{Pedigree-class} -\title{S4 class to represent a pedigree.} +\alias{Pedigree} +\alias{Pedigree,character_OR_integer-method} +\alias{Pedigree,data.frame-method} +\alias{Pedigree,missing-method} +\alias{famid,Pedigree-method} +\alias{ped} +\alias{ped,Pedigree,ANY-method} +\alias{ped,Pedigree,missing-method} +\alias{ped<-} +\alias{ped<-,Pedigree,ANY,ANY-method} +\alias{ped<-,Pedigree,missing,Ped-method} +\alias{mcols,Pedigree-method} +\alias{mcols<-,Pedigree,ANY-method} +\alias{rel} +\alias{rel,Pedigree,ANY-method} +\alias{rel,Pedigree,missing-method} +\alias{rel<-} +\alias{rel<-,Pedigree,ANY,ANY-method} +\alias{rel<-,Pedigree,missing,Rel-method} +\alias{scales} +\alias{scales,Pedigree-method} +\alias{scales<-} +\alias{scales<-,Pedigree,Scales-method} +\alias{fill,Pedigree-method} +\alias{fill<-,Pedigree,data.frame-method} +\alias{border,Pedigree-method} +\alias{border<-,Pedigree,data.frame-method} +\alias{hints} +\alias{hints,Pedigree-method} +\alias{hints<-} +\alias{hints<-,Pedigree,Hints-method} +\alias{horder,Pedigree-method} +\alias{horder<-,Pedigree-method} +\alias{spouse,Pedigree-method} +\alias{spouse<-} +\alias{spouse<-,Pedigree,data.frame-method} +\alias{length,Pedigree-method} +\alias{show,Pedigree-method} +\alias{summary,Pedigree-method} +\alias{as.list,Pedigree-method} +\alias{subset,Pedigree-method} +\alias{[,Pedigree,ANY,missing,ANY-method} +\title{Pedigree object} +\usage{ +Pedigree(obj, ...) + +\S4method{Pedigree}{character_OR_integer}( + obj, + dadid, + momid, + sex, + famid = NA, + avail = NULL, + affected = NULL, + status = NULL, + steril = NULL, + rel_df = NULL, + missid = NA_character_, + col_aff = "affection", + normalize = TRUE, + ... +) + +\S4method{Pedigree}{data.frame}( + obj = data.frame(indId = character(), fatherId = character(), motherId = character(), + gender = numeric(), family = character(), available = numeric(), vitalStatus = + numeric(), affection = numeric(), sterilisation = numeric()), + rel_df = data.frame(id1 = character(), id2 = character(), code = numeric(), famid = + character()), + cols_ren_ped = list(indId = "id", fatherId = "dadid", motherId = "momid", family = + "famid", gender = "sex", sterilisation = "steril", affection = "affected", available + = "avail", vitalStatus = "status"), + cols_ren_rel = list(id1 = "indId1", id2 = "indId2", famid = "family"), + hints = list(horder = NULL, spouse = NULL), + normalize = TRUE, + missid = NA_character_, + col_aff = "affection", + ... +) +} +\arguments{ +\item{obj}{A vector of the individuals identifiers or a data.frame +with the individuals informations. See \code{\link[=Ped]{Ped()}} for more informations.} + +\item{...}{ + Arguments passed on to \code{\link[=generate_colors]{generate_colors}} + \describe{ + \item{\code{}}{} + }} + +\item{dadid}{A vector containing for each subject, the identifiers of the +biologicals fathers.} + +\item{momid}{A vector containing for each subject, the identifiers of the +biologicals mothers.} + +\item{sex}{A character, factor or numeric vector corresponding to +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: +\itemize{ +\item character() or factor() : "f", "m", "woman", "man", "male", "female", +"unknown", "terminated" +\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +}} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} + +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown). +Can also be a data.frame with the same length as \code{obj}. If it is a +matrix, it will be converted to a data.frame and the columns will be +named after the \code{col_aff} argument.} + +\item{status}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{steril}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{rel_df}{A data.frame with the special relationships between +individuals. See \code{\link[=Rel]{Rel()}} for more informations. +The minimum columns required are \code{id1}, \code{id2} and \code{code}. +The \code{famid} column can also be used to specify the family +of the individuals. +If a matrix is given, the columns needs to be ordered as +\code{id1}, \code{id2}, \code{code} and \code{famid}. +The code values are: +\itemize{ +\item \code{1} = Monozygotic twin +\item \code{2} = Dizygotic twin +\item \code{3} = twin of unknown zygosity +\item \code{4} = Spouse +} + +The value relation code recognized by the function are the one defined +by the \code{\link[=rel_code_to_factor]{rel_code_to_factor()}} function.} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} + +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} + +\item{normalize}{A logical to know if the data should be normalised.} + +\item{cols_ren_ped}{A named list with the columns to rename for the +pedigree dataframe. This is useful if you want to use a dataframe with +different column names. The names of the list should be the new column +names and the values should be the old column names. The default values +are to be used with \code{normalize = TRUE}.} + +\item{cols_ren_rel}{A named list with the columns to rename for the +relationship matrix. This is useful if you want to use a dataframe with +different column names. The names of the list should be the new column +names and the values should be the old column names.} + +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}.} +} \value{ A Pedigree object. } \description{ A pedigree is a ensemble of individuals linked to each other into a family tree. +A Pedigree object store the informations of the individuals and the +special relationships between them. It also permit to store the +informations needed to plot the pedigree (i.e. scales and hints). + +\subsection{Constructor :}{ + +Main constructor of the package. +This constructor help to create a \code{Pedigree} object from +different \code{data.frame} or a set of vectors. + +If any errors are found in the data, the function will return +the data.frame with the errors of the Ped object and the +Rel object. +} } \details{ -They are created from a data.frame containing the individuals informations -and a relation ship data.frame for the special links between individuals. -A list of scales can be provided to create a legend. -To create a Pedigree object, use the function -\code{\link[=Pedigree]{Pedigree()}}. +If the normalization is set to \code{TRUE}, then the data will be +standardized using the function \code{norm_ped()} and \code{norm_rel()}. + +If a data.frame is given, the columns names needed will depend if +the normalization is selected or not. If the normalization is selected, +the columns names needed are as follow and if not the columns names +needed are in parenthesis: +\itemize{ +\item \code{indID}: the individual identifier (\code{id}) +\item \code{fatherId}: the identifier of the biological father (\code{dadid}) +\item \code{motherId}: the identifier of the biological mother (\code{momid}) +\item \code{gender}: the sex of the individual (\code{sex}) +\item \code{family}: the family identifier of the individual (\code{famid}) +\item \code{sterilisation}: the sterilisation status of the individual (\code{steril}) +\item \code{available}: the availability status of the individual (\code{avail}) +\item \code{vitalStatus}: the death status of the individual (\code{status}) +\item \code{affection}: the affection status of the individual (\code{affected}) +\item \code{...}: other columns that will be stored in the \code{elementMetadata} slot +} + +The minimum columns required are : +\itemize{ +\item \code{indID} / \code{id} +\item \code{fatherId} / \code{dadid} +\item \code{motherId} / \code{momid} +\item \code{gender} / \code{sex} +} + +The \code{family} / \code{famid} column can also be used to specify the family of the +individuals and will be merge to the \code{indId} / \code{id} field separated by an +underscore. +The columns \code{sterilisation}, \code{available}, \code{vitalStatus}, \code{affection} +will be transformed with the \code{\link[=vect_to_binary]{vect_to_binary()}} function when the +normalisation is selected. +If you do not use the normalisation, the columns will be checked to +be \code{0} or \code{1}. + +If \code{affected} is a data.frame, \strong{col_aff} will be overwritten by the column +names of the data.frame. } \section{Slots}{ \describe{ -\item{\code{ped}}{A data.frame with the individuals informations. The minimum -columns required are 'id', 'dadid', 'momid' and 'sex'. Other columns can be -added to the data.frame and will be recognised by the functions. Some -errors can be detected by the validity function and some of them can be -corrected and others will be added to a dedicated column.} - -\item{\code{rel}}{A data.frame for the special relationship between -individuals. -The minimum columns required are 'id1', 'id2' and 'code'.} - -\item{\code{scales}}{A data.frame to use for the affection status. -This data.frame is generated by the function -\code{\link[=generate_aff_inds]{generate_aff_inds()}} followed by -\code{\link[=generate_colors]{generate_colors()}}.} - -\item{\code{hints}}{List of two elements. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a Pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse, -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either). -}} +\item{\code{ped}}{A Ped object for the identity informations. See \code{\link[=Ped]{Ped()}} for +more informations.} + +\item{\code{rel}}{A Rel object for the special relationships. See \code{\link[=Rel]{Rel()}} for +more informations.} + +\item{\code{scales}}{A Scales object for the filling and bordering +colors used in the plot. See \code{\link[=Scales]{Scales()}} for more informations.} + +\item{\code{hints}}{A Hints object for the ordering of the +individuals in the plot. See \code{\link[=Hints]{Hints()}} for more informations.} }} +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Pedigree object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. + + +\itemize{ +\item \code{famid(x)} : Get the family identifiers of a Pedigree object. This +function is a wrapper around \code{famid(ped(x))}. +} + + +\itemize{ +\item \code{ped(x, slot)} : Get the value of a specific slot of the Ped object +} + + +\itemize{ +\item \code{ped(x)} : Get the Ped object +} + + +\itemize{ +\item \code{ped(x, slot) <- value} : Set the value of a specific slot of +the Ped object +Wrapper of \code{slot(ped(x)) <- value} +} + + +\itemize{ +\item \code{ped(x) <- value} : Set the Ped object +} + + +\itemize{ +\item \code{mcols(x)} : Get the metadata of a Pedigree object. +This function is a wrapper around \code{mcols(ped(x))}. +} + + +\itemize{ +\item \code{mcols(x) <- value} : Set the metadata of a Pedigree object. +This function is a wrapper around \code{mcols(ped(x)) <- value}. +} + + +\itemize{ +\item \code{rel(x, slot)} : Get the value of a specific slot of the Rel object +} + + +\itemize{ +\item \code{rel(x)} : Get the Rel object +} + + +\itemize{ +\item \code{rel(x, slot) <- value} : Set the value of a specific slot of the +Rel object +Wrapper of \code{slot(rel(x)) <- value} +} + + +\itemize{ +\item \code{rel(x) <- value} : Set the Rel object +} + + +\itemize{ +\item \code{scales(x)} : Get the Scales object +} + + +\itemize{ +\item \code{scales(x) <- value} : Set the Scales object +} + + +\itemize{ +\item \code{fill(x)} : Get the fill data.frame from the Scales object. +Wrapper of \code{fill(scales(x))} +} + + +\itemize{ +\item \code{fill(x) <- value} : Set the fill data.frame from the Scales object. +Wrapper of \code{fill(scales(x)) <- value} +} + + +\itemize{ +\item \code{border(x)} : Get the border data.frame from the Scales object. +Wrapper of \code{border(scales(x))} +} + + +\itemize{ +\item \code{border(x) <- value} : Set the border data.frame from the Scales object. +Wrapper of \code{border(scales(x)) <- value} +} + + +\itemize{ +\item \code{hints(x)} : Get the Hints object +} + + +\itemize{ +\item \code{hints(x) <- value} : Set the Hints object +} + + +\itemize{ +\item \code{horder(x)} : Get the horder vector from the Hints object. +Wrapper of \code{horder(hints(x))} +} + + +\itemize{ +\item \code{horder(x) <- value} : Set the horder vector from the Hints object. +Wrapper of \code{horder(hints(x)) <- value} +} + + +\itemize{ +\item \code{spouse(x)} : Get the spouse data.frame from the Hints object. +Wrapper of \code{spouse(hints(x))}. +} + + +\itemize{ +\item \code{spouse(x) <- value} : Set the spouse data.frame from the Hints object. +Wrapper of \code{spouse(hints(x)) <- value}. +} +} + +\section{Generics}{ + +\itemize{ +\item \code{length(x)}: Get the length of a Pedigree object. +Wrapper of \code{length(ped(x))}. +} + + +\itemize{ +\item \code{show(x)}: Print the information of the Ped and Rel +object inside the Pedigree object. +} + + +\itemize{ +\item \code{summary(x)}: Compute the summary of the Ped and Rel object +inside the Pedigree object. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Pedigree object to a list +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Pedigree object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{del_parents} : A logical value indicating if the parents +of the individuals should be deleted. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} + + +\itemize{ +\item \code{x[i, del_parents, keep]}: Subset a Pedigree object +based on the individuals identifiers given. +} +} + +\examples{ + +Pedigree( + obj = c("1", "2", "3", "4", "5", "6"), + dadid = c("4", "4", "6", "0", "0", "0"), + momid = c("5", "5", "5", "0", "0", "0"), + sex = c(1, 2, 3, 1, 2, 1), + avail = c(0, 1, 0, 1, 0, 1), + affected = matrix(c( + 0, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1 + ), ncol = 2), + col_aff = c("aff1", "aff2"), + missid = "0", + rel_df = matrix(c( + "1", "2", 2 + ), ncol = 3, byrow = TRUE), +) + +data(sampleped) +Pedigree(sampleped) +} \seealso{ -\code{\link[=Pedigree]{Pedigree()}} +\code{\link[=Pedigree]{Pedigree()}}, \code{\link[=Ped]{Ped()}}, \code{\link[=Rel]{Rel()}}, \code{\link[=Scales]{Scales()}}, \code{\link[=Hints]{Hints()}} + +\code{\link[=Ped]{Ped()}}, \code{\link[=Rel]{Rel()}}, \code{\link[=Scales]{Scales()}} } diff --git a/man/Pedigree.Rd b/man/Pedigree.Rd deleted file mode 100644 index 2828092c..00000000 --- a/man/Pedigree.Rd +++ /dev/null @@ -1,190 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Pedigree.R -\docType{methods} -\name{Pedigree} -\alias{Pedigree} -\alias{Pedigree,numeric-method} -\alias{Pedigree,numeric} -\alias{Pedigree,character-method} -\alias{Pedigree,character} -\alias{Pedigree,data.frame-method} -\alias{Pedigree,data.frame} -\title{Create a Pedigree object} -\usage{ -Pedigree(obj, ...) - -\S4method{Pedigree}{numeric}(obj, ...) - -\S4method{Pedigree}{character}( - obj, - dadid, - momid, - sex, - family = NA, - avail = NULL, - affected = NULL, - status = NULL, - steril = NULL, - relation = NULL, - missid = "0", - col_aff = "affection", - normalize = TRUE, - ... -) - -\S4method{Pedigree}{data.frame}( - obj = data.frame(id = character(), dadid = character(), momid = character(), sex = - numeric(), family = character(), available = numeric(), affection = numeric()), - relation = data.frame(id1 = character(), id2 = character(), code = numeric(), family = - character()), - cols_ren_ped = list(indId = "id", fatherId = "dadid", motherId = "momid", gender = - "sex", family = "family", sterilisation = "steril", vitalStatus = "status", affection - = "affected"), - cols_ren_rel = list(indId1 = "id1", indId2 = "id2"), - scales = list(fill = data.frame(order = numeric(), column_values = character(), - column_mods = character(), mods = numeric(), labels = character(), affected = - logical(), fill = character(), density = numeric(), angle = numeric()), border = - data.frame(column = character(), mods = numeric(), labels = character(), border = - character())), - hints = list(order = NULL, spouse = NULL), - normalize = TRUE, - missid = "0", - col_aff = "affection", - ... -) -} -\arguments{ -\item{obj}{A vector of the individuals identifiers or a data.frame -with the individuals informations. -The minimum columns required are \code{indID}, \code{fatherId}, \code{motherId} and -\code{gender}. -The \code{family} column can also be used to specify the family of the -individuals and will be merge to the \code{id} field separated by an -underscore. -The following columns are also recognize \code{sterilisation}, \code{available}, -\code{vitalStatus}, \code{affection}. The four of them will be transformed with the -\code{\link[=vect_to_binary]{vect_to_binary()}} function when the normalisation is selected and will -be set respectively to \code{steril}, \code{avail}, -\code{status} and \code{affected}. -If you do not use the normalisation, the columns will be checked to -be \code{0} or \code{1}. -They respectively correspond to the sterilisation status, -the availability status, the death status and the affection status -of the individuals. The values recognized for those columns are \code{1} or -\code{0}.} - -\item{...}{Other arguments to pass to the function \code{generate_colors}.} - -\item{dadid}{A vector containing for each subject, the identifiers of the -biologicals fathers.} - -\item{momid}{vector containing for each subject, the identifiers of the -biologicals mothers.} - -\item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: -\itemize{ -\item character() or factor() : "f", "m", "woman", "man", "male", "female", -"unknown", "terminated" -\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" -}} - -\item{family}{A vector of family identifiers} - -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} - -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} - -\item{status}{A numeric vector of vital status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : alive -\item \code{1} : dead -\item \code{NA} : vital status not known -}} - -\item{steril}{A numeric vector of sterilisation status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : not sterilised -\item \code{1} : sterilised -\item \code{NA} : sterilisation status not known -}} - -\item{relation}{A matrix or a data.frame with 3 required columns -(i.e. id1, id2, code) specifying special relationship between pairs -of individuals. -#' The code values are: -\itemize{ -\item \code{1} = Monozygotic twin -\item \code{2} = Dizygotic twin -\item \code{3} = twin of unknown zygosity -\item \code{4} = Spouse -} - -If \code{famid} is given in the call to create Pedigrees, then -\code{famid} needs to be in the last column of \code{relation}.} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{col_aff}{A string with the column name to use for the affection status.} - -\item{normalize}{A logical to know if the data should be normalised.} - -\item{cols_ren_ped}{A named list with the columns to rename for the -pedigree dataframe.} - -\item{cols_ren_rel}{A named list with the columns to rename for the -relationship matrix.} - -\item{scales}{A list of two data.frame with the scales to use for the -affection status and the other one for the border color (e.g availability).} - -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} -} -\value{ -A Pedigree object. -} -\description{ -This constructor help to create a \code{Pedigree} object from -different \code{data.frame} or a set of vectors. -} -\details{ -If any errors are found in the data, the function will return -the data.frame with the errors for the Pedigree and the relationship -data.frame. -} -\examples{ -data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -} diff --git a/man/Pedixplorer_package.Rd b/man/Pedixplorer_package.Rd index c138c605..4b3a607d 100644 --- a/man/Pedixplorer_package.Rd +++ b/man/Pedixplorer_package.Rd @@ -16,7 +16,8 @@ additional functionality and bug fixes. } \details{ The package download, NEWS, and README are available on CRAN: -\\url{https://cran.r-project.org/package=kinship2} +\\url{https://cran.r-project.org/package=kinship2} for the +previous version of the package. } \section{Functions}{ @@ -30,12 +31,14 @@ given identifiers, sex, affection status(es), and special relationships probability having an allele sampled from two individuals be the same via IBD. -\code{\link[=ped_to_plotdf]{ped_to_plotdf()}} : Method to transform a Pedigree -object into a dataframe of graphical elements. -Allows extra information to be included in the id under the plot symbol - -\code{\link[=plot_fromdf]{plot_fromdf()}} : Method to plot a Pedigree from a -dataframe of graphical elements. +\code{\link[=plot]{plot()}} : Method to transform a Pedigree +object into a graphical plot. +Allows extra information to be included in the id under the +plot symbol. +This method use the \code{\link[=plot_fromdf]{plot_fromdf()}} function to transform the Pedigree +object into a data frame of graphical elements, the same is done for the +legend with the \code{\link[=ped_to_legdf]{ped_to_legdf()}} function. +When done, the data frames are plotted with the \code{\link[=plot_fromdf]{plot_fromdf()}} function. \code{\link[=shrink]{shrink()}}: Shrink a Pedigree to a specific bit size, removing non-informative members first. @@ -62,6 +65,7 @@ library(Pedixplorer) \seealso{ Useful links: \itemize{ + \item \url{https://github.com/LouisLeNezet/Pedixplorer} \item Report bugs at \url{https://github.com/LouisLeLezet/Pedixplorer/issues} } @@ -83,4 +87,3 @@ Other contributors: } } -\keyword{internal} diff --git a/man/Rel-class.Rd b/man/Rel-class.Rd new file mode 100644 index 00000000..dea6d06a --- /dev/null +++ b/man/Rel-class.Rd @@ -0,0 +1,178 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Rel-class} +\alias{Rel-class} +\alias{Rel} +\alias{Rel,data.frame-method} +\alias{Rel,character_OR_integer-method} +\alias{Rel,missing-method} +\alias{code} +\alias{code,Rel-method} +\alias{id1} +\alias{id1,Rel-method} +\alias{id2} +\alias{id2,Rel-method} +\alias{famid,Rel-method} +\alias{famid<-,Rel,character_OR_integer-method} +\alias{summary,Rel-method} +\alias{show,Rel-method} +\alias{as.list,Rel-method} +\alias{as.data.frame,Rel-method} +\alias{subset,Rel-method} +\title{Rel object} +\usage{ +\S4method{Rel}{data.frame}(obj) + +\S4method{Rel}{character_OR_integer}(obj, id2, code, famid = NA_character_) +} +\arguments{ +\item{obj}{A character vector with the id of the first individuals of each +pairs or a \code{data.frame} with all the informations in corresponding columns.} + +\item{id2}{A character vector with the id of the second individuals of each +pairs} + +\item{code}{A character, factor or numeric vector corresponding to +the relation code of the individuals: +\itemize{ +\item MZ twin = Monozygotic twin +\item DZ twin = Dizygotic twin +\item UZ twin = twin of unknown zygosity +\item Spouse = Spouse +The following values are recognized: +\item character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with +of without space between the words. The case is not important. +\item numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +}} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} +} +\value{ +A Rel object. +} +\description{ +S4 class to represent the special relationships in a Pedigree. + +\subsection{Constructor :}{ + +You either need to provide a vector of the same size for each slot +or a \code{data.frame} with the corresponding columns. +} +} +\details{ +A Rel object is a list of special relationships +between individuals in the pedigree. +It is used to create a Pedigree object. +The minimal needed informations are \code{id1}, \code{id2} and \code{code}. + +If a \code{famid} is provided, the individuals \code{id} will be aggregated +to the \code{famid} character to ensure the uniqueness of the \code{id}. +} +\section{Slots}{ + +\describe{ +\item{\code{id1}}{A character vector with the id of the first individual.} + +\item{\code{id2}}{A character vector with the id of the second individual.} + +\item{\code{code}}{An ordered factor vector with the code of the special +relationship. + +(i.e. \verb{MZ twin} < \verb{DZ twin} < \verb{UZ twin} < \code{Spouse}).} + +\item{\code{famid}}{A character vector with the famid of the individuals.} +}} + +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Rel object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. + + +\itemize{ +\item \code{code(x)} : Relationships' code +} + + +\itemize{ +\item \code{id1(x)} : Relationships' first individuals' identifier +} + + +\itemize{ +\item \code{id2(x)} : Relationships' second individuals' identifier +} + + +\itemize{ +\item \code{famid(x)} : Relationships' individuals' family identifier +} + + +\itemize{ +\item \code{famid(x) <- value} : Set the relationships' individuals' family +identifier +\itemize{ +\item \code{value} : A character or integer vector of the same length as x +with the family identifiers +} +} +} + +\section{Generics}{ + +\itemize{ +\item \code{summary(x)}: Compute the summary of a Rel object +} + + +\itemize{ +\item \code{show(x)}: Convert the Rel object to a data.frame +and print it with its summary. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Rel object to a list +} + + +\itemize{ +\item \code{as.data.frame(x)}: Convert a Rel object to a data.frame +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Rel object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +rel_df <- data.frame( + id1 = c("1", "2", "3"), + id2 = c("2", "3", "4"), + code = c(1, 2, 3) +) +Rel(rel_df) + +Rel( + obj = c("1", "2", "3"), + id2 = c("2", "3", "4"), + code = c(1, 2, 3) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Scales-class.Rd b/man/Scales-class.Rd new file mode 100644 index 00000000..13cc3d70 --- /dev/null +++ b/man/Scales-class.Rd @@ -0,0 +1,162 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Scales-class} +\alias{Scales-class} +\alias{Scales} +\alias{Scales,data.frame,data.frame-method} +\alias{Scales,missing,missing-method} +\alias{fill} +\alias{fill,Scales-method} +\alias{fill<-} +\alias{fill<-,Scales,data.frame-method} +\alias{border} +\alias{border,Scales-method} +\alias{border<-} +\alias{border<-,Scales,data.frame-method} +\alias{as.list,Scales-method} +\title{Scales object} +\usage{ +Scales(fill, border) + +\S4method{Scales}{data.frame,data.frame}(fill, border) +} +\arguments{ +\item{fill}{A data.frame with the informations for the affection status. +The columns needed are: +\itemize{ +\item 'order': the order of the affection to be used +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'affected': a logical value indicating if the mod correspond to an affected +individuals +\item 'fill': the color to use for this mods +\item 'density': the density of the shading +\item 'angle': the angle of the shading +}} + +\item{border}{A data.frame with the informations for the availability status. +The columns needed are: +\itemize{ +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'border': the color to use for this mods +}} +} +\value{ +A Scales object. +} +\description{ +A Scales object is a list of two data.frame. +The first one is used to represent the affection status of the individuals +and therefore the filling of the individuals in the pedigree plot. +The second one is used to represent the availability status of the +individuals and therefore the border color of the individuals in the +pedigree plot. + +\subsection{Constructor :}{ + +You need to provide both \strong{fill} and \strong{border} in the dedicated parameters. +However this is usually done using the \code{\link[=generate_colors]{generate_colors()}} function with a +Pedigree object. +} +} +\section{Slots}{ + +\describe{ +\item{\code{fill}}{A data.frame with the informations for the affection status. +The columns needed are: +\itemize{ +\item 'order': the order of the affection to be used +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'affected': a logical value indicating if the mod correspond to an affected +individuals +\item 'fill': the color to use for this mods +\item 'density': the density of the shading +\item 'angle': the angle of the shading +}} + +\item{\code{border}}{A data.frame with the informations for the availability status. +The columns needed are: +\itemize{ +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'border': the color to use for this mods +}} +}} + +\section{Accessors}{ + +\itemize{ +\item \code{fill(x)} : Get the fill data.frame +} + + +\itemize{ +\item \code{fill(x) <- value} : Set the fill data.frame +} + + +\itemize{ +\item \code{border(x)} : Get the border data.frame +} + + +\itemize{ +\item \code{border(x) <- value} : Set the border data.frame +} +} + +\section{Generics}{ + +\itemize{ +\item \code{as.list(x)}: Convert a Scales object to a list +} +} + +\examples{ + +Scales( + fill = data.frame( + order = 1, + column_values = "affected", + column_mods = "affected_mods", + mods = c(0, 1), + labels = c("unaffected", "affected"), + affected = c(FALSE, TRUE), + fill = c("white", "red"), + density = c(NA, 20), + angle = c(NA, 45) + ), + border = data.frame( + column_values = "avail", + column_mods = "avail_mods", + mods = c(0, 1), + labels = c("not available", "available"), + border = c("black", "blue") + ) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} + +\code{\link[=generate_colors]{generate_colors()}} +} diff --git a/man/align.Rd b/man/align.Rd index af9c6199..a5494a3c 100644 --- a/man/align.Rd +++ b/man/align.Rd @@ -1,77 +1,64 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/align.R +\docType{methods} \name{align} \alias{align} -\title{Generate plotting information for a Pedigree} +\alias{align,Pedigree-method} +\title{Align a Pedigree object} \usage{ -align( - ped, +\S4method{align}{Pedigree}( + obj, packed = TRUE, width = 10, align = TRUE, - hints = ped$hints, - missid = "0" + hints = NULL, + missid = "NA_character_" ) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{width}{for a packed output, the minimum width of the plot, in +\item{width}{For a packed output, the minimum width of the plot, in inches.} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}. If \code{NULL} then the Hints stored in \strong{obj} will be used.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ A list with components \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n}: A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid}: A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos}: A matrix giving the horizontal position of each plot point +\item \code{fam}: A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values +\item \code{spouse}: A matrix with values \itemize{ \item \code{0} = not a spouse \item \code{1} = subject plotted to the immediate right is a spouse \item \code{2} = subject plotted to the immediate right is an inbred spouse } -\item twins Optional matrix which will only be present if the Pedigree +\item \code{twins}: Optional matrix which will only be present if the Pedigree contains twins : \itemize{ \item \code{0} = not a twin @@ -87,16 +74,22 @@ layout of a plot of the Pedigree. } \details{ This is an internal routine, used almost exclusively by -\code{ped_to_plotdf()}. The subservient functions \code{auto_hint()}, -\code{alignped1()}, \code{alignped2()}, -\code{alignped3()}, and \code{alignped4()} +\code{\link[=ped_to_plotdf]{ped_to_plotdf()}}. + +The subservient functions \code{\link[=auto_hint]{auto_hint()}}, +\code{\link[=alignped1]{alignped1()}}, \code{\link[=alignped2]{alignped2()}}, +\code{\link[=alignped3]{alignped3()}}, and \code{\link[=alignped4]{alignped4()}} contain the bulk of the computation. -If the \strong{hints} are missing the \code{auto_hint()} routine is called to + +If the \strong{hints} are missing the \code{\link[=auto_hint]{auto_hint()}} routine is called to supply an initial guess. -If multiple families are present in the Pedigree, this routine is called -once for each family, and the results are combined in the list returned. -For more information you can read the associated vignette:align -\code{vignette("alignement_details")}. + +If multiple families are present in the \strong{obj} Pedigree, this routine +is called once for each family, and the results are combined in the +list returned. + +For more information you can read the associated vignette: +\code{vignette("pedigree_alignment")}. } \examples{ data(sampleped) diff --git a/man/alignped1.Rd b/man/alignped1.Rd index e9ae73cd..161e3b62 100644 --- a/man/alignped1.Rd +++ b/man/alignped1.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped1.R \name{alignped1} \alias{alignped1} -\title{First routine alignement} +\title{Alignment first routine} \usage{ alignped1(idx, dadx, momx, level, horder, packed, spouselist) } @@ -15,58 +15,69 @@ alignped1(idx, dadx, momx, level, horder, packed, spouselist) \item{level}{Vector of the level of each subject} -\item{horder}{Vector of the horizontal order of each subject} +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{spouselist}{Matrix of the spouses with one row per hinted marriage, -usually only a few marriages in a pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +\item{spouselist}{Matrix of spouses with 4 columns: +\itemize{ +\item \code{1}: husband index +\item \code{2}: wife index +\item \code{3}: husband anchor +\item \code{4}: wife anchor +}} } \value{ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ -First alignement routine which create the subtree founded on a single +First alignment routine which create the subtree founded on a single subject as though it were the only tree. } \details{ -\enumerate{ -\item In this routine the \strong{nid} array consists of the final +In this routine the \strong{nid} array consists of the final \verb{nid array + 1/2} of the final spouse array. Note that the \strong{spouselist} matrix will only contain spouse pairs that are not yet processed. The logic for anchoring is slightly tricky. -First, if row 4 of the spouselist matrix is 0, we anchor at the first +\subsection{1. Anchoring:}{ + +First, if col 4 of the spouselist matrix is 0, we anchor at the first opportunity. Also note that if \code{spouselist[, 3] == spouselist[, 4]} it is the husband who is the anchor (just write out the possibilities). -\item Create the set of 3 return structures, which will be matrices +} + +\subsection{2. Return values initialization:}{ + +Create the set of 3 return structures, which will be matrices with \code{1 + nspouse} columns. If there are children then other routines will widen the result. -\item Create the two complimentary lists \strong{lspouse} and \strong{rspouse} -to denote those plotted on the left and on the right. +} + +\subsection{3. Create \strong{lspouse} and \strong{rspouse}:}{ + +This two complimentary lists denote the spouses plotted on the left +and on the right. For someone with lots of spouses we try to split them evenly. If the number of spouses is odd, then men should have more on the right than on the left, women more on the right. @@ -79,11 +90,19 @@ by plotting canine data, lspouse could initially be empty but \code{length(rspouse) > 1}. This caused \code{nleft > length(indx)}. A fix was to not let \strong{indx} to be indexed beyond its length, fix by JPS 5/2013. -\item For each spouse get the list of children. If there are any we +} + +\subsection{4. List the children:}{ + +For each spouse get the list of children. If there are any we call \code{\link[=alignped2]{alignped2()}} to generate their tree and then mark the connection to their parent. If multiple marriages have children we need to join the trees. -\item To finish up we need to splice together the tree made up from +} + +\subsection{5. Splice the tree:}{ + +To finish up we need to splice together the tree made up from all the kids, which only has data from \code{lev + 1} down, with the data here. There are 3 cases: \enumerate{ @@ -103,3 +122,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped2.Rd b/man/alignped2.Rd index ade26d80..45dddde9 100644 --- a/man/alignped2.Rd +++ b/man/alignped2.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped2.R \name{alignped2} \alias{alignped2} -\title{Second routine alignement} +\title{Alignment second routine} \usage{ alignped2(idx, dadx, momx, level, horder, packed, spouselist) } @@ -15,38 +15,40 @@ alignped2(idx, dadx, momx, level, horder, packed, spouselist) \item{level}{Vector of the level of each subject} -\item{horder}{Vector of the horizontal order of each subject} +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{spouselist}{Matrix of the spouses with one row per hinted marriage, -usually only a few marriages in a pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +\item{spouselist}{Matrix of spouses with 4 columns: +\itemize{ +\item \code{1}: husband index +\item \code{2}: wife index +\item \code{3}: husband anchor +\item \code{4}: wife anchor +}} } \value{ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ @@ -63,7 +65,7 @@ The code below has one non-obvious special case. Suppose that two sibs marry. When the first sib is processed by \code{alignped1} then both partners (and any children) will be added to the rval structure below. When the second sib is processed they will come back as a 1 element tree -(the marriage will no longer be on the spouselist), which should be added +(the marriage will no longer be on the \strong{spouselist}), which should be added onto rval. The rule thus is to not add any 1 element tree whose value (which must be \code{idx[i]} is already in the rval structure for this level. } @@ -74,5 +76,7 @@ align(ped) } \seealso{ -\code{\link[=align]{align()}}, \code{\link[=alignped1]{alignped1()}} +\code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped3.Rd b/man/alignped3.Rd index f399eb32..f1da616c 100644 --- a/man/alignped3.Rd +++ b/man/alignped3.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/alignped3.R \name{alignped3} \alias{alignped3} -\title{Third routine alignement} +\title{Alignment third routine} \usage{ alignped3(alt1, alt2, packed, space = 1) } \arguments{ -\item{alt1}{Alignement of the first tree} +\item{alt1}{Alignment of the first tree} -\item{alt2}{Alignement of the second tree} +\item{alt2}{Alignment of the second tree} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} \item{space}{Space between two subjects} } @@ -22,21 +22,16 @@ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ @@ -49,8 +44,8 @@ the left tree is the same as the leftmost person in the right tree; we need not plot two copies of the same person side by side. (When initializing the output structures do not worry about this, there is no harm if they are a column bigger than finally needed.) -Beyond that the work is simple bookkeeping. -\subsection{Slide}{ +Beyond that the work is simple book keeping. +\subsection{1. Slide:}{ For the unpacked case, which is the traditional way to draw a Pedigree when we can assume the paper is infinitely wide, all parents are @@ -59,7 +54,7 @@ merged as solid blocks. On input they both have a left margin of 0. Compute how far over we have to slide the right tree. } -\subsection{Merge}{ +\subsection{2. Merge:}{ Now merge the two trees. Start at the top level and work down. } @@ -73,3 +68,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped4.Rd b/man/alignped4.Rd index b9c797bb..bfa9365d 100644 --- a/man/alignped4.Rd +++ b/man/alignped4.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped4.R \name{alignped4} \alias{alignped4} -\title{Fourth and last routine alignement} +\title{Alignment fourth routine} \usage{ alignped4(rval, spouse, level, width, align) } @@ -15,15 +15,15 @@ the subject is a spouse or not.} \item{level}{Vector of the level of each subject} -\item{width}{for a packed output, the minimum width of the plot, in +\item{width}{For a packed output, the minimum width of the plot, in inches.} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} } \value{ @@ -49,14 +49,14 @@ line, if the user suggestion is too low it is increased to that For each set of siblings \code{x} with parents at \code{p_1} and \code{p_2} the alignment penalty is : -\eqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} +\deqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} where \code{k} is the number of siblings in the set. } Using the fact that when \code{a = 1} : -\eqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} +\deqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} then moving a sibship with \code{k} sibs one unit to the left or right of optimal will incur the same cost as moving one with only 1 or @@ -99,3 +99,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/ancestors.Rd b/man/ancestors.Rd index 6293c067..5635f530 100644 --- a/man/ancestors.Rd +++ b/man/ancestors.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/align.R \name{ancestors} \alias{ancestors} -\title{Routine function to get ancestors of a subject} +\title{Ancestors indexes of a subject} \usage{ ancestors(idx, momx, dadx) } @@ -20,11 +20,16 @@ A vector of ancestor indexes Given the index of one or multiple individual(s), this function iterate through the mom and dad indexes to list out all the ancestors of the said individual(s). -This function is use in the \code{align()} function to +This function is use in the \code{\link[=align]{align()}} function to identify which spouse pairs has a common ancestor and therefore if they need to be connected with a double line (i.e. inbred). } +\examples{ +ancestors(c(1), c(3, 4, 5, 6), c(7, 8, 9, 10)) +ancestors(c(1, 2), c(3, 4, 5, 6), c(7, 8, 9, 10)) +} \seealso{ \code{\link[=align]{align()}} } +\keyword{internal} diff --git a/man/anchor_to_factor.Rd b/man/anchor_to_factor.Rd new file mode 100644 index 00000000..b20ea140 --- /dev/null +++ b/man/anchor_to_factor.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{anchor_to_factor} +\alias{anchor_to_factor} +\title{Anchor variable to ordered factor} +\usage{ +anchor_to_factor(anchor) +} +\arguments{ +\item{anchor}{A character, factor or numeric vector corresponding to +the anchor of the individuals. The following values are recognized: +\itemize{ +\item character() or factor() : "0", "1", "2", "left", "right", "either" +\item numeric() : 1 = "left", 2 = "right", 0 = "either" +}} +} +\value{ +An ordered factor vector containing the transformed variable +"either" < "left" < "right" +} +\description{ +Anchor variable to ordered factor +} +\examples{ +anchor_to_factor(c(1, 2, 0, "left", "right", "either")) +} diff --git a/man/as.data.frame-Pedigree-method.Rd b/man/as.data.frame-Pedigree-method.Rd deleted file mode 100644 index 2b6c0754..00000000 --- a/man/as.data.frame-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{as.data.frame,Pedigree-method} -\alias{as.data.frame,Pedigree-method} -\title{Convert a Pedigree object to a data.frame} -\usage{ -\S4method{as.data.frame}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -A data.frame with the individuals informations. -} -\description{ -Convert a Pedigree object to a data.frame -} diff --git a/man/as.list-Pedigree-method.Rd b/man/as.list-Pedigree-method.Rd deleted file mode 100644 index 5a6660dd..00000000 --- a/man/as.list-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{as.list,Pedigree-method} -\alias{as.list,Pedigree-method} -\title{Convert a Pedigree object to a list} -\usage{ -\S4method{as.list}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -A list with all the slots of the Pedigree object. -} -\description{ -Convert a Pedigree object to a list -} diff --git a/man/auto_hint.Rd b/man/auto_hint.Rd index c53a6183..786c966c 100644 --- a/man/auto_hint.Rd +++ b/man/auto_hint.Rd @@ -2,53 +2,40 @@ % Please edit documentation in R/auto_hint.R \name{auto_hint} \alias{auto_hint} -\title{First initial guess for the alignment of a Pedigree} +\alias{auto_hint,Pedigree-method} +\title{Initial hint for a Pedigree alignment} \usage{ -auto_hint(ped, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE) +\S4method{auto_hint}{Pedigree}(obj, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}. If \code{NULL} then the Hints stored in \strong{obj} will be used.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{reset}{If \code{TRUE}, then even if \code{ped} object has hints, reset -them to the initial values} +\item{reset}{If \code{TRUE}, then even if the Ped object has Hints, reset +them to the initial values.} } \value{ -The \strong{hints} list containing components \code{order} and \code{spouse} +The initial \linkS4class{Hints} object. } \description{ Compute an initial guess for the alignment of a Pedigree } \details{ -A Pedigree structure can contain a \code{hints} object which helps to +A Pedigree structure can contain a \linkS4class{Hints} object which helps to reorder the Pedigree (e.g. left-to-right order of children within family) so as to plot with minimal distortion. This routine is used to create an initial version of the hints. They can then be modified if desired. @@ -57,22 +44,24 @@ This routine would not normally be called by a user. It moves children within families, so that marriages are on the "edge" of a set children, closest to the spouse. For pedigrees that have only a single connection between two families this simple-minded approach works surprisingly well. -For more complex structures hand-tuning of the hints matrix may be required. +For more complex structures hand-tuning of the hints may be required. -The Pedigree in the example below is one where rearranging the founders -greatly decreases the number of extra connections. When \code{auto_hint()} is -called with a a vector of numbers as the second argument, the values for the -founder females are used to order the founder families left to right across -the plot. The values within a sibship are used as the preliminary order of +When \code{auto_hint()} is called with a a vector of numbers as the \strong{hints} +argument, the values for the founder females are used to order the founder +families left to right across the plot. +The values within a sibship are used as the preliminary order of siblings within a family; this may be changed to move one of them to the edge so as to match up with a spouse. The actual values in the vector are not important, only their order. } \examples{ data(sampleped) -ped <- Pedigree(sampleped[sampleped$family == 1, ]) +ped <- Pedigree(sampleped[sampleped$famid == 1, ]) auto_hint(ped) } \seealso{ -\code{\link[=align]{align()}}, \code{\link[=best_hint]{best_hint()}} +\code{\link[=align]{align()}}, \code{\link[=best_hint]{best_hint()}}, \linkS4class{Hints} } +\keyword{alignment,} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/best_hint.Rd b/man/best_hint.Rd index 0eafb54e..eaf0ea81 100644 --- a/man/best_hint.Rd +++ b/man/best_hint.Rd @@ -2,37 +2,41 @@ % Please edit documentation in R/best_hint.R \name{best_hint} \alias{best_hint} -\title{Best hint for alignement} +\alias{best_hint,Pedigree-method} +\title{Best hint for a Pedigree alignment} \usage{ -best_hint(ped, wt = c(1000, 10, 1), tolerance = 0) +\S4method{best_hint}{Pedigree}(obj, wt = c(1000, 10, 1), tolerance = 0) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{wt}{A vector of three weights for the three error measures -\itemize{ +\item{wt}{A vector of three weights for the three error measures. +Default is \code{c(1000, 10, 1)}. +\enumerate{ \item The number of duplicate individuals in the plot \item The sum of the absolute values of the differences in the positions of duplicate individuals \item The sum of the absolute values of the differences between -the center of the children and the parents -Default is \code{c(1000, 10, 1)}. +the center of the children and the parents. }} -\item{tolerance}{The maximum stress level to accept. Default is \code{0}} +\item{tolerance}{The maximum stress level to accept. +Default is \code{0}} } \value{ -The best hint object out of all the permutations +The best Hints object out of all the permutations } \description{ -When computer time is cheap, use this routine to get a 'best' Pedigree. +When computer time is cheap, use this routine to get a \emph{best} +Pedigree alignment. This routine will try all possible founder orders, and return the one -with the least 'stress'. +with the least \strong{stress}. } \details{ -The auto_hint routine will rearrange sibling order, but not founder order. -This calls auto_hint with every possible founder order, and finds that -plot with the least 'stress'. +The \code{\link[=auto_hint]{auto_hint()}} routine will rearrange sibling order, but not +founder order. +This calls \code{\link[=auto_hint]{auto_hint()}} with every possible founder order, and finds that +plot with the least "stress". The stress is computed as a weighted sum of three error measures: \itemize{ \item nbArcs The number of duplicate individuals in the plot @@ -42,10 +46,10 @@ positions of duplicate individuals the center of the children and the parents } -\eqn{stress = - wt[1] \times nbArcs + - wt[2] \times lgArcs + - wt[3] \times lgParentsChilds +\deqn{stress = + wt[1] * nbArcs + + wt[2] * lgArcs + + wt[3] * lgParentsChilds } If during the search, a plot is found with a stress level less than @@ -53,9 +57,11 @@ If during the search, a plot is found with a stress level less than } \examples{ data(sampleped) -ped <- Pedigree(sampleped[sampleped$family == 1,]) +ped <- Pedigree(sampleped[sampleped$famid == 1,]) best_hint(ped) } \seealso{ -\code{\link[=auto_hint]{auto_hint()}} +\code{\link[=auto_hint]{auto_hint()}}, \code{\link[=align]{align()}} } +\keyword{alignment,} +\keyword{auto_hint} diff --git a/man/bit_size.Rd b/man/bit_size.Rd index e398c5b4..4c2d1da1 100644 --- a/man/bit_size.Rd +++ b/man/bit_size.Rd @@ -1,51 +1,51 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bit_size.R -\docType{methods} \name{bit_size} \alias{bit_size} -\alias{bit_size,character-method} -\alias{bit_size,character} +\alias{bit_size,character_OR_integer-method} \alias{bit_size,Pedigree-method} -\alias{bit_size,Pedigree} -\title{Get Pedigree bit_size} +\alias{bit_size,Ped-method} +\title{Bit size of a Pedigree} \usage{ -bit_size(obj, ...) +\S4method{bit_size}{character_OR_integer}(obj, momid, missid = NA_character_) -\S4method{bit_size}{character}(obj, momid, missid = "0") +\S4method{bit_size}{Pedigree}(obj) -\S4method{bit_size}{Pedigree}(obj, missid = "0") +\S4method{bit_size}{Ped}(obj) } \arguments{ -\item{obj}{A Pedigree object or a vector of fathers identifierss} +\item{obj}{A Ped or Pedigree object or a vector of fathers identifiers} -\item{...}{Additional arguments passed to methods} - -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ A list with the following components: \itemize{ -\item bit_size The bit_size of input Pedigree +\item bit_size The bit size of the Pedigree \item nFounder The number of founders in the Pedigree -\item nNonFounder The number of nonfounders in the Pedigree +\item nNonFounder The number of non founders in the Pedigree } } \description{ -Calculate Pedigree bit_size, defined as : +Utility function used in the \code{shrink()} function +to calculate the bit size of a Pedigree. +} +\details{ +The bit size of a Pedigree is defined as : -\eqn{ +\deqn{ 2 \times NbNonFounders - NbFounders } -} -\details{ -This is a utility function used in \code{shrink()} -to calculate the bit_size of a Pedigree. + +Where \code{NbNonFounders} is the number of non founders in the Pedigree +(i.e. individuals with identified parents) and +\code{NbFounders} is the number of founders in the Pedigree +(i.e. individuals without identified parents). } \examples{ data(sampleped) @@ -55,4 +55,5 @@ bit_size(ped) \seealso{ \code{\link[=shrink]{shrink()}} } -\keyword{internal} +\keyword{internal,} +\keyword{shrink} diff --git a/man/check_columns.Rd b/man/check_columns.Rd index 89250e3d..0791d4df 100644 --- a/man/check_columns.Rd +++ b/man/check_columns.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{check_columns} \alias{check_columns} -\title{Check for columns name usage} +\title{Check columns presence in a dataframe} \usage{ check_columns( df, @@ -65,8 +65,11 @@ df <- data.frame(ColN1 = c(1, 2), ColN2 = 4, ColU1 = 'B', ColU2 = '1', ColTU1 = 'A', ColTU2 = 3, ColNR1 = 4, ColNR2 = 5) -tryCatch(check_columns(df, c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), - c('ColTU1', 'ColTU2')), error = function(e) print(e)) +tryCatch( + check_columns(df, + c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), + c('ColTU1', 'ColTU2') +), error = function(e) print(e)) } \keyword{internal} diff --git a/man/check_hints.Rd b/man/check_hints.Rd deleted file mode 100644 index 149f2d99..00000000 --- a/man/check_hints.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check_hints.R -\name{check_hints} -\alias{check_hints} -\title{Detect hints inconsistencies} -\usage{ -check_hints(hints, sex) -} -\arguments{ -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} - -\item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: -\itemize{ -\item character() or factor() : "f", "m", "woman", "man", "male", "female", -"unknown", "terminated" -\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" -}} -} -\value{ -Nothing, but will stop if there is a problem. -} -\description{ -This routine tries to detect inconsistencies in spousal hints. -} -\details{ -These arise in \code{auto_hint()} with complex Pedigrees. -One can have ABA (subject A is on both the left and the right of B), -cycles, etc. -Users can introduce problems as well if they modify the hints. -} -\examples{ -data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -ht1 <- auto_hint(ped1) -check_hints(ht1, ped1$ped$sex) - -} -\seealso{ -\code{\link[=auto_hint]{auto_hint()}}, \code{\link[=best_hint]{best_hint()}} -} -\keyword{internal} diff --git a/man/check_num_na.Rd b/man/check_num_na.Rd index 3e0627cd..0ee6d4c3 100644 --- a/man/check_num_na.Rd +++ b/man/check_num_na.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{check_num_na} \alias{check_num_na} -\title{Check is numeric} +\title{Is numeric or NA} \usage{ check_num_na(var, na_as_num = TRUE) } @@ -13,13 +13,13 @@ check_num_na(var, na_as_num = TRUE) considered as numerical values} } \value{ -A vector of boolean of the same size as \code{var} +A vector of boolean of the same size as \strong{var} } \description{ Check if a variable given is numeric or NA } \details{ -Check if the values in \code{var} are numeric or if they are -NA in the case that \code{na_as_num} is set to TRUE. +Check if the values in \strong{var} are numeric or if they are +\code{NA} in the case that \code{na_as_num} is set to TRUE. } \keyword{internal} diff --git a/man/check_slot_fd.Rd b/man/check_slot_fd.Rd index 0731ac6b..51ac5eb3 100644 --- a/man/check_slot_fd.Rd +++ b/man/check_slot_fd.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R +% Please edit documentation in R/AllValidity.R \name{check_slot_fd} \alias{check_slot_fd} \title{Check if the fields are present in an object slot} diff --git a/man/check_values.Rd b/man/check_values.Rd index e2eb9b15..c0cc1589 100644 --- a/man/check_values.Rd +++ b/man/check_values.Rd @@ -1,22 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R +% Please edit documentation in R/AllValidity.R \name{check_values} \alias{check_values} \title{Check values in a slot} \usage{ -check_values(val, ref, present = TRUE) +check_values(val, ref, name = NULL, present = TRUE) } \arguments{ -\item{present}{A logical value indicating if the values should be present -or not} - -\item{obj}{An object.} +\item{val}{A vector of values to check.} -\item{slot}{A slot of the object.} +\item{ref}{A vector of reference values.} -\item{column}{A column of the slot.} +\item{name}{A character vector with the name of the values to check.} -\item{values}{A vector of values to check.} +\item{present}{A logical value indicating if the values should be present +or not} } \value{ A character vector with the errors if any. diff --git a/man/circfun.Rd b/man/circfun.Rd index 31819db8..04885b73 100644 --- a/man/circfun.Rd +++ b/man/circfun.Rd @@ -2,19 +2,27 @@ % Please edit documentation in R/plot_fct.R \name{circfun} \alias{circfun} -\title{Generate a circular element} +\title{Circular element} \usage{ circfun(nslice, n = 50) } \arguments{ -\item{nslice}{number of slices in the circle} +\item{nslice}{Number of slices in the circle} \item{n}{Total number of points in the circle} } \value{ -a list of x and y coordinates +A list of x and y coordinates per slice. } \description{ -Generate a circular element +Create a list of x and y coordinates for a circle +with a given number of slices. } -\keyword{internal} +\examples{ + +circfun(1) +circfun(1, 10) +circfun(4, 50) +} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/descendants.Rd b/man/descendants.Rd index 49308efe..420cbb35 100644 --- a/man/descendants.Rd +++ b/man/descendants.Rd @@ -1,31 +1,28 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/descendants.R -\docType{methods} \name{descendants} \alias{descendants} -\alias{descendants,character,character-method} -\alias{descendants,character} -\alias{descendants,character,Pedigree-method} -\alias{descendants,Pedigree} -\title{Find all the descendants} +\alias{descendants,character_OR_integer,character_OR_integer-method} +\alias{descendants,character_OR_integer,Pedigree-method} +\alias{descendants,character_OR_integer,Ped-method} +\title{Descendants of individuals} \usage{ -descendants(idlist, obj, ...) +\S4method{descendants}{character_OR_integer,character_OR_integer}(idlist, obj, dadid, momid) -\S4method{descendants}{character,character}(idlist, obj, dadid, momid) +\S4method{descendants}{character_OR_integer,Pedigree}(idlist, obj) -\S4method{descendants}{character,Pedigree}(idlist, obj) +\S4method{descendants}{character_OR_integer,Ped}(idlist, obj) } \arguments{ \item{idlist}{List of individuals identifiers to be considered} -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A Ped or Pedigree object or a vector of the +individuals identifiers.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -34,7 +31,7 @@ The list is not ordered. } \description{ Find all the descendants of a particular list of individuals -given a Pedigree +given a Pedigree object. } \examples{ data("sampleped") diff --git a/man/draw_arc.Rd b/man/draw_arc.Rd index 1a0be3c5..328d8f0c 100644 --- a/man/draw_arc.Rd +++ b/man/draw_arc.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_arc} \alias{draw_arc} -\title{Draw arcs for multiple instances of a subject} +\title{Draw arcs} \usage{ draw_arc(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black") } @@ -17,16 +17,18 @@ draw_arc(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black") \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{lwd}{line width} +\item{lwd}{Line width} -\item{col}{line color} +\item{col}{Line color} } \value{ -Plot the arcs or add it to a ggplot object +Plot the arcs to the current device +or add it to a ggplot object } \description{ -Draw arcs for multiple instances of a subject +Draw arcs } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_polygon.Rd b/man/draw_polygon.Rd index 9dbdf111..7135bc60 100644 --- a/man/draw_polygon.Rd +++ b/man/draw_polygon.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_polygon} \alias{draw_polygon} -\title{Draw a polygon for a Pedigree} +\title{Draw a polygon} \usage{ draw_polygon( x, @@ -22,20 +22,22 @@ draw_polygon( \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{fill}{fill color} +\item{fill}{Fill color} -\item{border}{border color} +\item{border}{Border color} -\item{density}{density of shading} +\item{density}{Density of shading} -\item{angle}{angle of shading} +\item{angle}{Angle of shading} } \value{ -Plot the polygon or add it to a ggplot object +Plot the polygon to the current device +or add it to a ggplot object } \description{ -Draw a polygon for a Pedigree +Draw a polygon } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_segment.Rd b/man/draw_segment.Rd index 4eeee997..804b836a 100644 --- a/man/draw_segment.Rd +++ b/man/draw_segment.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_segment} \alias{draw_segment} -\title{Draw segments for a Pedigree} +\title{Draw segments} \usage{ draw_segment( x0, @@ -27,18 +27,20 @@ draw_segment( \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{col}{line color} +\item{col}{Line color} -\item{lwd}{line width} +\item{lwd}{Line width} -\item{lty}{line type} +\item{lty}{Line type} } \value{ -Plot the segments or add it to a ggplot object +Plot the segments to the current device +or add it to a ggplot object } \description{ -Draw segments for a Pedigree +Draw segments } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_text.Rd b/man/draw_text.Rd index e6476e2f..4aedd8b3 100644 --- a/man/draw_text.Rd +++ b/man/draw_text.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_text} \alias{draw_text} -\title{Draw text for a Pedigree} +\title{Draw texts} \usage{ draw_text( x, @@ -21,24 +21,26 @@ draw_text( \item{y}{y coordinates} -\item{label}{text to be displayed} +\item{label}{Text to be displayed} \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} -\item{col}{text color} +\item{col}{Text color} \item{adjx}{x adjustment} \item{adjy}{y adjustment} } \value{ -Plot the text or add it to a ggplot object +Plot the text to the current device +or add it to a ggplot object } \description{ -Draw text for a Pedigree +Draw texts } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/duporder.Rd b/man/duporder.Rd index 632722bd..a8732d74 100644 --- a/man/duporder.Rd +++ b/man/duporder.Rd @@ -2,39 +2,25 @@ % Please edit documentation in R/auto_hint.R \name{duporder} \alias{duporder} -\title{Routine to find the duplicate pairs of a subject} +\title{Find the duplicate pairs of a subject} \usage{ -duporder(idlist, plist, lev, ped) +duporder(idlist, plist, lev, obj) } \arguments{ \item{idlist}{List of individuals identifiers to be considered} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ A matrix of duplicate pairs } \description{ -Routine to find the duplicate pairs of a subject +Find the duplicate pairs of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -44,4 +30,5 @@ the order they should be plotted. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/exclude_stray_marryin.Rd b/man/exclude_stray_marryin.Rd index 86cb95c2..57dbd3e2 100644 --- a/man/exclude_stray_marryin.Rd +++ b/man/exclude_stray_marryin.Rd @@ -7,12 +7,12 @@ exclude_stray_marryin(id, dadid, momid) } \arguments{ -\item{id}{Vector of subject identifiers} +\item{id}{A character vector with the identifiers of each individuals} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -20,6 +20,10 @@ Returns a data frame of subject identifiers and their parents. The data frame is trimmed of any founders who are not parents. } \description{ -Exclude from a Pedigree any founders who are not parents. +Exclude any founders who are not parents. } -\keyword{internal} +\seealso{ +\code{\link[=shrink]{shrink()}} +} +\keyword{internal,} +\keyword{shrink} diff --git a/man/exclude_unavail_founders.Rd b/man/exclude_unavail_founders.Rd index 5da23d06..0b7856ce 100644 --- a/man/exclude_unavail_founders.Rd +++ b/man/exclude_unavail_founders.Rd @@ -4,26 +4,24 @@ \alias{exclude_unavail_founders} \title{Exclude unavailable founders} \usage{ -exclude_unavail_founders(id, dadid, momid, avail, missid = "0") +exclude_unavail_founders(id, dadid, momid, avail, missid = NA_character_) } \arguments{ -\item{id}{Vector of subject identifiers} +\item{id}{A character vector with the identifiers of each individuals} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{missid}{Character defining the missing ids} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ Returns a list with the following components: @@ -36,6 +34,10 @@ Returns a list with the following components: } } \description{ -Exclude from a Pedigree any unavailable founders. +Exclude any unavailable founders. +} +\seealso{ +\code{\link[=shrink]{shrink()}} } -\keyword{internal} +\keyword{internal,} +\keyword{shrink} diff --git a/man/extract-methods.Rd b/man/extract-methods.Rd deleted file mode 100644 index 2b946732..00000000 --- a/man/extract-methods.Rd +++ /dev/null @@ -1,123 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\name{ped} -\alias{ped} -\alias{ped,Pedigree-method} -\alias{rel} -\alias{rel,Pedigree-method} -\alias{scales} -\alias{scales,Pedigree-method} -\alias{hints} -\alias{hints,Pedigree-method} -\alias{show,Pedigree-method} -\alias{summary,Pedigree-method} -\alias{[[,Pedigree,ANY,missing-method} -\alias{$,Pedigree-method} -\alias{$<-,Pedigree-method} -\alias{sub_sel_hints} -\alias{sub_sel_hints,Pedigree-method} -\alias{[,Pedigree,ANY,ANY,ANY-method} -\alias{[,Pedigree,missing,ANY,ANY-method} -\alias{[,Pedigree,ANY,missing,ANY-method} -\title{Pedigree ped accessors} -\usage{ -ped(object) - -rel(object) - -scales(object) - -hints(object) - -\S4method{show}{Pedigree}(object) - -\S4method{summary}{Pedigree}(object) - -\S4method{[[}{Pedigree,ANY,missing}(x, i, j, ..., drop = TRUE) - -\S4method{$}{Pedigree}(x, name) - -\S4method{$}{Pedigree}(x, name) <- value - -sub_sel_hints(hints, index) - -\S4method{[}{Pedigree,ANY,ANY,ANY}(x, i, j, drop = TRUE) - -\S4method{[}{Pedigree,missing,ANY,ANY}(x, i, j, drop = TRUE) - -\S4method{[}{Pedigree,ANY,missing,ANY}(x, i, j, drop = TRUE) -} -\arguments{ -\item{object}{A Pedigree object.} - -\item{x}{A Pedigree object.} - -\item{i}{A vector of individuals id or a vector of index.} - -\item{j}{A vector of columns names.} - -\item{...}{Other arguments.} - -\item{drop}{A logical value indicating if the dimensions should be dropped.} - -\item{name}{A slot name.} - -\item{value}{A vector of values to replace.} - -\item{hints}{A list of hints} - -\item{index}{A vector of index} -} -\value{ -The slot \code{ped} present in the Pedigree object. - -The slot \code{rel} present in the Pedigree object. - -The slot \code{scales} present in the Pedigree object. - -The slot \code{hints} present in the Pedigree object. - -A character vector with the informations about the object. - -A character vector with the summary of the object. - -The slot \code{i} present in the Pedigree object. - -The slot \code{name} present in the Pedigree object. - -The Pedigree object with the slot \code{name} replaced by \code{value}. - -A list of hints subsetted - -A Pedigree object subsetted. - -A Pedigree object subsetted. - -A Pedigree object subsetted. -} -\description{ -Pedigree rel accessors - -Pedigree scales accessors - -Pedigree hints accessors - -Pedigree show method - -Pedigree summary method. - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object - -Replace parts of a Pedigree object - -Subset the hints list based on the index given - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object -} -\keyword{internal} diff --git a/man/family_check.Rd b/man/family_check.Rd index 41792931..100aaf5d 100644 --- a/man/family_check.Rd +++ b/man/family_check.Rd @@ -1,32 +1,31 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/family_check.R -\docType{methods} \name{family_check} \alias{family_check} -\alias{family_check,character-method} -\alias{family_check,character} +\alias{family_check,character_OR_integer-method} \alias{family_check,Pedigree-method} -\alias{family_check,Pedigree} +\alias{family_check,Ped-method} \title{Check family} \usage{ -family_check(obj, ...) - -\S4method{family_check}{character}(obj, dadid, momid, family, newfam) +\S4method{family_check}{character_OR_integer}(obj, dadid, momid, famid, newfam) \S4method{family_check}{Pedigree}(obj) + +\S4method{family_check}{Ped}(obj) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{family}{A vector of family identifiers} +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} \item{newfam}{The result of a call to \code{make_famid()}. If this has already been computed by the user, adding it as an argument shortens the running @@ -34,10 +33,10 @@ time somewhat.} } \value{ a data frame with one row for each unique family id in the -\code{family} argument or the one detected in the Pedigree object. +\code{famid} argument or the one detected in the Pedigree object. Components of the output are: \itemize{ -\item \code{family} : The family id, as entered into the data set +\item \code{famid} : The family id, as entered into the data set \item \code{n} : Number of subjects in the family \item \code{unrelated} : Number of them that appear to be unrelated to anyone else in the entire Pedigree. This is usually marry-ins with no @@ -60,7 +59,7 @@ Error check for a family classification Given a family id vector, also compute the familial grouping from first principles using the parenting data, and compare the results. -The \code{make_famid} function is used to create a de novo family id from the +The \code{\link[=make_famid]{make_famid()}} function is used to create a de novo family id from the parentage data, and this is compared to the family id given in the data. If there are any joins, then an attribute 'join' is attached. @@ -82,19 +81,8 @@ fcheck.combined <- with(sampleped, family_check(id, dadid, momid, rep(1, nrow(sampleped)))) fcheck.combined -# make person 120's father be her son. -sampleped[20, 3] <- 131 -fcheck1.bad <- try( - { - with(sampleped, family_check(id, father, mother, family)) - }, - silent = FALSE -) - -## fcheck1.bad is a try-error - } \seealso{ -\code{\link[=make_famid]{make_famid()}}, \code{\link[=kinship]{kinship()}} +\code{\link[=make_famid]{make_famid()}} } \keyword{internal} diff --git a/man/find_avail_affected.Rd b/man/find_avail_affected.Rd index 46afc347..f8071489 100644 --- a/man/find_avail_affected.Rd +++ b/man/find_avail_affected.Rd @@ -2,31 +2,35 @@ % Please edit documentation in R/find_avail_affected.R \name{find_avail_affected} \alias{find_avail_affected} -\title{Find a single person to trim from a Pedigree whose is available} +\alias{find_avail_affected,Ped-method} +\alias{find_avail_affected,Pedigree-method} +\title{Find single affected and available individual from a Pedigree} \usage{ -find_avail_affected(ped, avail = ped(ped)$avail, affstatus = NA) +\S4method{find_avail_affected}{Ped}(obj, avail = NULL, affected = NULL, affstatus = NA) + +\S4method{find_avail_affected}{Pedigree}(obj, avail = NULL, affected = NULL, affstatus = NA) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{affstatus}{Affection status to search for.} } \value{ A list is returned with the following components \itemize{ -\item ped The new Pedigree object +\item ped The new Ped object \item newAvail Vector of availability status of trimmed individuals \item idTrimmed Vector of IDs of trimmed individuals -\item isTrimmed logical value indicating whether Pedigree has been trimmed -\item bit_size Bit size of the trimmed Pedigree +\item isTrimmed logical value indicating whether Ped object has been trimmed +\item bit_size Bit size of the trimmed Ped } } \description{ @@ -34,9 +38,12 @@ Finds one subject from among available non-parents with indicated affection status. } \details{ -When used within pedigree.shrink, this function is called with the first +When used within \code{\link[=shrink]{shrink()}}, this function is called with the first affected indicator, if the affected item in the Pedigree is a matrix of multiple affected indicators. + +If \strong{avail} or \strong{affected} is null, then the function will use the +corresponding Ped accessor. } \examples{ data(sampleped) @@ -46,3 +53,5 @@ find_avail_affected(ped, affstatus = 1) \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/find_avail_noninform.Rd b/man/find_avail_noninform.Rd index 66ab9e05..7570b9b7 100644 --- a/man/find_avail_noninform.Rd +++ b/man/find_avail_noninform.Rd @@ -2,39 +2,40 @@ % Please edit documentation in R/find_avail_noninform.R \name{find_avail_noninform} \alias{find_avail_noninform} +\alias{find_avail_noninform,Ped-method} +\alias{find_avail_noninform,Pedigree-method} \title{Find uninformative but available subject} \usage{ -find_avail_noninform(ped, avail = ped(ped)$avail, missid = "0") +\S4method{find_avail_noninform}{Ped}(obj, avail = NULL, affected = NULL) + +\S4method{find_avail_noninform}{Pedigree}(obj, avail = NULL, affected = NULL) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} } \value{ Vector of subject ids who can be removed by having lowest informativeness. } \description{ -Find uninformative but available subject +Finds subjects from among available non-parents with all affection +equal to \code{0}. } \details{ -Find subjects from a Pedigree who are available and uninformative - Identify subjects to remove from a Pedigree who are available but -non-informative. This is the second step to remove subjects in -pedigree.shrink if the Pedigree does not meet the desired bit size. +non-informative (unaffected). This is the second step to remove subjects in +\code{\link[=shrink]{shrink()}} if the Pedigree does not meet the desired bit size. + +If \strong{avail} or \strong{affected} is null, then the function will use the +corresponding Ped accessor. } \examples{ data(sampleped) @@ -45,3 +46,5 @@ find_avail_noninform(ped) \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/find_unavailable.Rd b/man/find_unavailable.Rd index 7ab7250e..c48630f2 100644 --- a/man/find_unavailable.Rd +++ b/man/find_unavailable.Rd @@ -2,31 +2,34 @@ % Please edit documentation in R/find_unavailable.R \name{find_unavailable} \alias{find_unavailable} +\alias{find_unavailable,Ped-method} +\alias{find_unavailable,Pedigree-method} \title{Find unavailable subjects in a Pedigree} \usage{ -find_unavailable(ped, avail = ped(ped)$avail) +\S4method{find_unavailable}{Ped}(obj, avail = NULL) + +\S4method{find_unavailable}{Pedigree}(obj, avail = NULL) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} } \value{ Returns a vector of subject ids for who can be removed. } \description{ -Find the ID of subjects in a Pedigree iteratively, as anyone who is not -available and does not have an available descendant by successively removing -unavailable terminal nodes. +Find the identifiers of subjects in a Pedigree iteratively, +as anyone who is not available and does not have an available +descendant by successively removing unavailable terminal nodes. } \details{ +If \strong{avail} is null, then the function will use the +corresponding Ped accessor. + Originally written as pedTrim by Steve Iturria, modified by Dan Schaid 2007, and now split into the two separate functions: \code{find_unavailable()}, and \code{trim()} to do the tasks separately. @@ -38,18 +41,22 @@ isolated after trimming their unavailable offspring, and If the subject ids are character, make sure none of the characters in the ids is a colon (":"), which is a special character used to concatenate and split subjects within the utility. +The \code{trim()} functions is now replaced by the \code{subset()} function. } \section{Side Effects}{ - relation matrix from \code{trim} is trimmed of any + +Relation matrix from subsetting is trimmed of any special relations that include the subjects to trim. } \examples{ data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) find_unavailable(ped1) } \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/findsibs.Rd b/man/findsibs.Rd index 3ae0296d..2deb2084 100644 --- a/man/findsibs.Rd +++ b/man/findsibs.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/auto_hint.R \name{findsibs} \alias{findsibs} -\title{Routine to find the siblings of a subject} +\title{Find the siblings of a subject} \usage{ findsibs(idpos, plist, lev) } @@ -10,21 +10,7 @@ findsibs(idpos, plist, lev) \item{idpos}{The position of the subject} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} } @@ -32,7 +18,7 @@ It contains the following components: The positions of the siblings } \description{ -Routine to find the siblings of a subject +Find the siblings of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -41,4 +27,5 @@ It finds the siblings of a subject. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/findspouse.Rd b/man/findspouse.Rd index 71a9c3db..047b2149 100644 --- a/man/findspouse.Rd +++ b/man/findspouse.Rd @@ -2,39 +2,25 @@ % Please edit documentation in R/auto_hint.R \name{findspouse} \alias{findspouse} -\title{Routine to find the spouse of a subject} +\title{Find the spouse of a subject} \usage{ -findspouse(idpos, plist, lev, ped) +findspouse(idpos, plist, lev, obj) } \arguments{ \item{idpos}{The position of the subject} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ The position of the spouse } \description{ -Routine to find the spouse of a subject +Find the spouse of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -43,4 +29,5 @@ It finds the spouse of a subject. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/fix_parents.Rd b/man/fix_parents.Rd index 8465f0a0..7d26e8c7 100644 --- a/man/fix_parents.Rd +++ b/man/fix_parents.Rd @@ -3,44 +3,41 @@ \name{fix_parents} \alias{fix_parents} \alias{fix_parents,character-method} -\alias{fix_parents,character} \alias{fix_parents,data.frame-method} -\title{Fix details on the parents for children of the Pedigree} +\title{Fix parents relationship and gender} \usage{ -fix_parents(obj, ...) +\S4method{fix_parents}{character}(obj, dadid, momid, sex, famid = NULL, missid = NA_character_) -\S4method{fix_parents}{character}(obj, dadid, momid, sex, family = NULL, missid = "0") - -\S4method{fix_parents}{data.frame}(obj, delete = FALSE, filter = NULL, missid = "0") +\S4method{fix_parents}{data.frame}(obj, delete = FALSE, filter = NULL, missid = NA_character_) } \arguments{ \item{obj}{A data.frame or a vector of the individuals identifiers. If a dataframe is given it must contain the columns \code{id}, \code{dadid}, -\code{momid}, \code{sex} and \code{family}. Family is optional.} - -\item{...}{Additional arguments passed to methods} +\code{momid}, \code{sex} and \code{famid} (optional).} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} -\item{family}{Optional family identification set it to NULL to invalidate. -If used it will modify the ids of the individuals by pasting it with an _.} +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} \item{delete}{Boolean defining if missing parents needs to be: \itemize{ @@ -56,7 +53,7 @@ A data.frame with id, dadid, momid, sex as columns with the relationships fixed. } \description{ -Fix the sex of parents, add parents that are missing from the Pedigree +Fix the sex of parents, add parents that are missing from the data. Can be used with a dataframe or a vector of the different individuals informations. } @@ -64,16 +61,17 @@ different individuals informations. First look to add parents whose ids are given in momid/dadid. Second, fix sex of parents. Last look to add second parent for children for whom only one parent id is given. -If a family vector is given the family id will be added to the ids of all -individuals (id, dadid, momid) separated by an underscore befor proceeding. +If a \strong{famid} vector is given the family id will be added to the ids of all +individuals (\code{id}, \code{dadid}, \code{momid}) +separated by an underscore before proceeding. \subsection{Special case for dataframe}{ -Check for presence of both parents id in the \code{id} field. -If not both presence behaviour depend of \code{delete} parameter +Check for presence of both parents id in the \strong{id} field. +If not both presence behaviour depend of \strong{delete} parameter \itemize{ -\item If TRUE then use fix_parents function and merge back the other fields +\item If \code{TRUE} then use fix_parents function and merge back the other fields in the dataframe then set availability to O for non available parents. -\item If FALSE then delete the id of missing parents +\item If \code{FALSE} then delete the id of missing parents } } } @@ -93,10 +91,9 @@ test1char <- data.frame( ) test1newmom <- with(test1char, fix_parents(id, father, mother, sex, - missid = '0' + missid = NA_character_ )) -newped <- Pedigree(test1newmom) -as.data.frame(newped) +Pedigree(test1newmom) } \author{ diff --git a/man/generate_aff_inds.Rd b/man/generate_aff_inds.Rd index d61a6f6c..f8779a53 100644 --- a/man/generate_aff_inds.Rd +++ b/man/generate_aff_inds.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/generate_aff_inds.R \name{generate_aff_inds} \alias{generate_aff_inds} -\title{Process the information for affection} +\title{Process the affection informations} \usage{ generate_aff_inds( values, @@ -37,14 +37,31 @@ The different columns are: } } \description{ -Process the information for affection +Perform transformation uppon a vector given as the one +containing the affection status to obtain an \code{affected} binary state. } \details{ -Perform transformation uppon a column given as the one -containing affection status to get an \code{affected} column usable for -the rest of the script +This function helps to configure a binary state from a character or +numeric variable. +\subsection{If the variable is a \code{character} or a \code{factor}:}{ + +In this case the affected state will depend on the modality provided as +an affected status. All individuals with a value corresponding to one of the +element in the vector \strong{mods_aff} will be considered as affected. +} + +\subsection{If the variable is \code{numeric}:}{ + +In this case the affected state will be \code{TRUE} if the value of the individual +is above the \strong{threshold} if \strong{sup_thres_aff} is \code{TRUE} and \code{FALSE} +otherwise. +} } \examples{ generate_aff_inds(c(1, 2, 3, 4, 5), threshold = 3, sup_thres_aff = TRUE) generate_aff_inds(c("A", "B", "C", "A", "V", "B"), mods_aff = c("A", "B")) } +\author{ +Louis Le Nézet +} +\keyword{generate_scales} diff --git a/man/generate_border.Rd b/man/generate_border.Rd index f33ce0b7..3c1fc4a9 100644 --- a/man/generate_border.Rd +++ b/man/generate_border.Rd @@ -2,30 +2,35 @@ % Please edit documentation in R/generate_colors.R \name{generate_border} \alias{generate_border} -\title{Process the colors based on affection and availability} +\title{Process the border colors based on availability} \usage{ -generate_border(avail, colors_avail = c("green", "black")) +generate_border(values, colors_avail = c("green", "black")) } \arguments{ -\item{avail}{The vector containing the availability status. -The values need to be numeric and can only be 0, 1 or NA.} +\item{values}{The vector containing the values to process as available.} \item{colors_avail}{Set of 2 colors to use for the box's border of an -individual. The first color will be used for available individual (avail -== 1) and the second for the unavailable individual (avail == 0).} +individual. The first color will be used for available individual +(\code{avail == 1}) and the second for the unavailable individual +(\code{avail == 0}).} } \value{ -A dataframe containing the scale to use for the availability -status. +A list of three elements +\itemize{ +\item \code{mods} : The processed values column as a numeric factor +\item \code{avail} : A logical vector indicating if the individual is available +\item \code{sc_bord} : A dataframe containing the description of each modality of the +scale } -\description{ -Process the colors based on affection and availability } -\details{ -Perform transformation uppon a column given as the one +\description{ +Perform transformation uppon a vector given as the one containing the availability status to compute the border color. +The vector given will be transformed using the \code{\link[=vect_to_binary]{vect_to_binary()}} +function. } \examples{ generate_border(c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA)) } +\keyword{generate_scales} diff --git a/man/generate_colors.Rd b/man/generate_colors.Rd index aa631296..a6452615 100644 --- a/man/generate_colors.Rd +++ b/man/generate_colors.Rd @@ -1,28 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/generate_colors.R -\docType{methods} \name{generate_colors} \alias{generate_colors} \alias{generate_colors,character-method} -\alias{generate_colors,character} \alias{generate_colors,numeric-method} -\alias{generate_colors,numeric} -\alias{generate_colors,logical-method} -\alias{generate_colors,logical} \alias{generate_colors,Pedigree-method} -\alias{generate_colors,Pedigree} -\title{Process the colors based on affection and availability} +\title{Process the filling and border colors based on affection and availability} \usage{ -generate_colors(obj, ...) - \S4method{generate_colors}{character}( obj, avail, mods_aff = NULL, - threshold = 0.5, - sup_thres_aff = TRUE, keep_full_scale = FALSE, - breaks = 3, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), colors_avail = c("green", "black") @@ -31,20 +20,6 @@ generate_colors(obj, ...) \S4method{generate_colors}{numeric}( obj, avail, - mods_aff = NULL, - threshold = 0.5, - sup_thres_aff = TRUE, - keep_full_scale = FALSE, - breaks = 3, - colors_aff = c("yellow2", "red"), - colors_unaff = c("white", "steelblue4"), - colors_avail = c("green", "black") -) - -\S4method{generate_colors}{logical}( - obj, - avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, keep_full_scale = FALSE, @@ -72,21 +47,31 @@ generate_colors(obj, ...) } \arguments{ \item{obj}{A Pedigree object or a vector containing the affection status for -each individuals. The affection status can be numeric, logical or character.} +each individuals. The affection status can be numeric or a character.} -\item{...}{Other parameters to pass to the \code{generate_colors} function} - -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} \item{mods_aff}{Vector of modality to consider as affected in the case where the \code{values} is a factor.} +\item{keep_full_scale}{Boolean defining if the affection values need to +be set as a scale. If \code{values} is numeric the filling scale will be +calculated based on the values and the number of breaks given. +If \code{values} isn't numeric then each levels will get it's own color} + +\item{colors_aff}{Set of increasing colors to use for the filling of the +affected individuls.} + +\item{colors_unaff}{Set of increasing colors to use for the filling of the +unaffected individuls.} + +\item{colors_avail}{Set of 2 colors to use for the box's border of an +individual. The first color will be used for available individual +(\code{avail == 1}) and the second for the unavailable individual +(\code{avail == 0}).} + \item{threshold}{Numeric value separating the affected and healthy subject in the case where the \code{values} is numeric.} @@ -97,31 +82,18 @@ if the value of \code{values} is stricly above the \code{threshold}. If \code{FALSE}, the individuals will be considered affected if the value is stricly under the \code{threshold}.} -\item{keep_full_scale}{Boolean defining if the affection values need to -be set as a scale. If \code{values} is numeric the filling scale will be -calculated based on the values and the number of breaks given. -If \code{values} isn't numeric then each levels will get it's own color} - \item{breaks}{Number of breaks to use when using full scale with numeric values. The same number of breaks will be done for values from affected individuals and unaffected individuals.} -\item{colors_aff}{Set of increasing colors to use for the filling of the -affected individuls.} - -\item{colors_unaff}{Set of increasing colors to use for the filling of the -unaffected individuls.} - -\item{colors_avail}{Set of 2 colors to use for the box's border of an -individual. The first color will be used for available individual (avail -== 1) and the second for the unavailable individual (avail == 0).} - -\item{col_aff}{A string with the column name to use for the affection status.} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} \item{add_to_scale}{Boolean defining if the scales need to be added to the existing scales or if they need to replace the existing scales.} -\item{col_avail}{The name of the column containing the availability status.} +\item{col_avail}{A character vector with the name of the column to be used +for the availability status.} \item{reset}{If \code{TRUE} the scale of the specified column will be reset if already present.} @@ -129,32 +101,44 @@ already present.} \value{ \subsection{When used with a vector}{ -A list of three elements +A list of two elements \itemize{ -\item A vector containing the transformed filling modalities -\item A dataframe containing the description of each filling modalities -\item A dataframe containing the description of the border modalities +\item The list containing the filling colors processed and their description +\item The list containing the border colors processed and their description } } \subsection{When used with a Pedigree object}{ The Pedigree object with the \code{affected} and \code{avail} columns -processed accordingly. - -The Pedigree scales slots are updated +processed accordingly as well as the \code{scales} slot updated. } } \description{ -Process the colors based on affection and availability -} -\details{ Perform transformation uppon a dataframe given to compute the colors for the filling and the border of the individuals based on the affection and availability status. } +\details{ +The colors will be set using the \code{\link[=generate_fill]{generate_fill()}} and the +\code{\link[=generate_border]{generate_border()}} functions respectively for the filling and the border. +} \examples{ + +generate_colors( + c("A", "B", "A", "B", NA, "A", "B", "A", "B", NA), + c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), + mods_aff = "A", +) + +generate_colors( + c(10, 0, 5, 7, NA, 6, 2, 1, 3, NA), + c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), + threshold = 3, keep_full_scale = TRUE +) data("sampleped") ped <- Pedigree(sampleped) -generate_colors(ped, "affected", add_to_scale=FALSE)$scales +ped <- generate_colors(ped, "affected", add_to_scale=FALSE) +scales(ped) } +\keyword{generate_scales} diff --git a/man/generate_fill.Rd b/man/generate_fill.Rd index 044ab2a4..c72aff9f 100644 --- a/man/generate_fill.Rd +++ b/man/generate_fill.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/generate_colors.R \name{generate_fill} \alias{generate_fill} -\title{Process the colors based on affection} +\title{Process the filling colors based on affection} \usage{ generate_fill( values, @@ -17,7 +17,9 @@ generate_fill( \arguments{ \item{values}{The vector containing the values to process as affection.} -\item{affected}{The vector containing the affection status TRUE/FALSE.} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{labels}{The vector containing the labels to use for the affection.} @@ -37,21 +39,47 @@ affected individuls.} unaffected individuls.} } \value{ -A list of two elements +A list of three elements \itemize{ -\item The processed values column as a numeric factor -\item A dataframe containing the description of each modality of the scale +\item \code{mods} : The processed values column as a numeric factor +\item \code{affected} : A logical vector indicating if the individual is affected +\item \code{sc_fill} : A dataframe containing the description of each modality of the +scale } } \description{ -Process the colors based on affection -} -\details{ Perform transformation uppon a column given as the one containing affection status to compute the filling color. } +\details{ +The colors will be set using the +\code{\link[grDevices:colorRamp]{grDevices::colorRampPalette()}} function +with the colors given as parameters. + +The colors will be set as follow: +\itemize{ +\item If \strong{keep_full_scale} is \code{FALSE}: +Then the affected individuals will get the first color of the +\strong{colors_aff} vector and the unaffected individuals will get the +first color of the \strong{colors_unaff} vector. +\item If \strong{keep_full_scale} is \code{TRUE}: +\itemize{ +\item If \strong{values} isn't numeric: +Each levels of the affected \strong{values} vector will get it's own color from +the \strong{colors_aff} vector using the \code{\link[grDevices:colorRamp]{grDevices::colorRampPalette()}} and +the same will be done for the unaffected individuals using the +\strong{colors_unaff}. +\item If \strong{values} is numeric: +The mean of the affected individuals will be compared to the mean of the +unaffected individuals and the colors will be set up such as the color +gradient follow the direction of the affection. +} +} +} \examples{ aff <- generate_aff_inds(seq_len(5), threshold = 3, sup_thres_aff = TRUE) generate_fill(seq_len(5), aff$affected, aff$labels) generate_fill(seq_len(5), aff$affected, aff$labels, keep_full_scale = TRUE) + } +\keyword{generate_scales} diff --git a/man/get_twin_rel.Rd b/man/get_twin_rel.Rd index a542a631..73373c90 100644 --- a/man/get_twin_rel.Rd +++ b/man/get_twin_rel.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/auto_hint.R \name{get_twin_rel} \alias{get_twin_rel} -\title{Routine to get twin relationships} +\title{Get twin relationships} \usage{ -get_twin_rel(ped) +get_twin_rel(obj) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ A list containing components @@ -18,16 +18,16 @@ A list containing components } } \description{ -Routine to get twin relationships +Get twin relationships } \details{ This routine function determine the twin relationships -in a Pedigree. It complete the missing twin relationships for -triplets, quads, etc. It also determine the order of the twins +in a Pedigree. It determine the order of the twins in the Pedigree. It is used by \code{auto_hint()}. } \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/ibd_matrix.Rd b/man/ibd_matrix.Rd index 4b3cb289..55049902 100644 --- a/man/ibd_matrix.Rd +++ b/man/ibd_matrix.Rd @@ -2,14 +2,16 @@ % Please edit documentation in R/ibd_matrix.R \name{ibd_matrix} \alias{ibd_matrix} -\title{Create an IBD matrix} +\title{IBD matrix} \usage{ ibd_matrix(id1, id2, ibd, idmap, diagonal) } \arguments{ -\item{id1}{First subject identifiers} +\item{id1}{A character vector with the id of the first individuals of each +pairs} -\item{id2}{Second subject identifiers} +\item{id2}{A character vector with the id of the second individuals of each +pairs} \item{ibd}{the IBD value for that pair} diff --git a/man/is_disconnected.Rd b/man/is_disconnected.Rd index aeb92c8e..15d8f298 100644 --- a/man/is_disconnected.Rd +++ b/man/is_disconnected.Rd @@ -2,28 +2,34 @@ % Please edit documentation in R/utils.R \name{is_disconnected} \alias{is_disconnected} -\title{Check wich individuals are disconnected} +\title{Are individuals disconnected} \usage{ is_disconnected(id, dadid, momid) } \arguments{ -\item{id}{A vector of each subjects identifiers} - \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ -A vector of boolean of the same size as \code{id} -with TRUE if the individual is disconnected and FALSE otherwise +A vector of boolean of the same size as \strong{id} +with \code{TRUE} if the individual is disconnected and +\code{FALSE} otherwise } \description{ Check which individuals are disconnected. } \details{ An individuals is considered disconnected if the kinship with -all the other individuals is 0. +all the other individuals is \code{0}. +} +\examples{ +is_disconnected( + c("1", "2", "3", "4", "5"), + c("3", "3", NA, NA, NA), + c("4", "4", NA, NA, NA) +) } \keyword{internal} diff --git a/man/is_founder.Rd b/man/is_founder.Rd index e09e09da..c5c5b154 100644 --- a/man/is_founder.Rd +++ b/man/is_founder.Rd @@ -2,28 +2,30 @@ % Please edit documentation in R/utils.R \name{is_founder} \alias{is_founder} -\title{Check wich individuals are founders} +\title{Are individuals founders} \usage{ -is_founder(momid, dadid, missid = "0") +is_founder(momid, dadid, missid = NA_character_) } \arguments{ -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ -A vector of boolean of the same size as \code{dadid} and \code{momid} -with TRUE if the individual has no parents (i.e is a founder) and FALSE -otherwise. +A vector of boolean of the same size as \strong{dadid} and \strong{momid} +with \code{TRUE} if the individual has no parents (i.e is a founder) and +\code{FALSE} otherwise. } \description{ Check which individuals are founders. } +\examples{ +is_founder(c("3", "3", NA, NA), c("4", "4", NA, NA)) +} \keyword{internal} diff --git a/man/is_informative.Rd b/man/is_informative.Rd index 53153e83..006968a4 100644 --- a/man/is_informative.Rd +++ b/man/is_informative.Rd @@ -3,44 +3,25 @@ \docType{methods} \name{is_informative} \alias{is_informative} -\alias{is_informative,character-method} -\alias{is_informative,character} +\alias{is_informative,character_OR_integer-method} \alias{is_informative,Pedigree-method} -\alias{is_informative,Pedigree} -\title{Is informative} +\title{Find informative individuals} \usage{ -is_informative(obj, ...) +\S4method{is_informative}{character_OR_integer}(obj, avail, affected, informative = "AvAf") -\S4method{is_informative}{character}(obj, avail, affected, informative = "AvAf", missid = "0") - -\S4method{is_informative}{Pedigree}( - obj, - col_aff = NULL, - informative = "AvAf", - missid = "0", - reset = FALSE -) +\S4method{is_informative}{Pedigree}(obj, col_aff = NULL, informative = "AvAf", reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} - -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{informative}{Informative individuals selection can take 5 values: \itemize{ @@ -53,44 +34,49 @@ is_informative(obj, ...) \item A boolean }} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{col_aff}{A string with the column name to use for the affection status.} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} -\item{reset}{If \code{TRUE}, the \code{id_inf} column is reset} +\item{reset}{If \code{TRUE}, the \code{isinf} slot is reset} } \value{ \subsection{When obj is a vector}{ -A vector of individuals informative identifiers +A vector of individuals informative identifiers. } \subsection{When obj is a Pedigree}{ -A list containing the Pedigree object and the vector of individuals -identifiers. -The Pedigree object will have a new column named 'id_inf' containing 1 for -informative individuals and 0 otherwise. +The Pedigree object with its \code{isinf} slot updated. } } \description{ Select the ids of the informative individuals. } \details{ -Depending on the informative parameter, the function will extract -the ids of the informative individuals. In the case of a numeric vector, -the function will return the same vector. In the case of a boolean, the -function will return the ids of the individuals if TRUE, NA otherwise. +Depending on the \strong{informative} parameter, the function will +extract the ids of the informative individuals. In the case of a +numeric vector, the function will return the same vector. +In the case of a boolean, the function will return the ids of the +individuals if TRUE, NA otherwise. In the case of a string, the function will return the ids of the corresponding informative individuals based on the avail and affected columns. } \examples{ + +is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +is_informative(c("A", "B", "C", "D", "E"), informative = c(1, 2)) +is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), + affected = c(0, 1, 0, 1, 1), informative = "AvAf") +is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), + affected = c(0, 1, 0, 1, 1), informative = "AvOrAf") +is_informative(c("A", "B", "C", "D", "E"), + informative = c(TRUE, FALSE, TRUE, FALSE, TRUE)) + data("sampleped") ped <- Pedigree(sampleped) -is_informative(ped, col_aff = "affection_aff") - +ped <- is_informative(ped, col_aff = "affection_mods") +isinf(ped(ped)) } diff --git a/man/is_parent.Rd b/man/is_parent.Rd index 5ad27b20..8a277438 100644 --- a/man/is_parent.Rd +++ b/man/is_parent.Rd @@ -2,29 +2,40 @@ % Please edit documentation in R/utils.R \name{is_parent} \alias{is_parent} -\title{Check wich individuals are parents} +\alias{is_parent,character_OR_integer-method} +\alias{is_parent,Ped-method} +\title{Are individuals parents} \usage{ -is_parent(id, dadid, momid, missid = "0") +\S4method{is_parent}{character_OR_integer}(obj, dadid, momid, missid = NA_character_) + +\S4method{is_parent}{Ped}(obj, missid = NA_character_) } \arguments{ -\item{id}{A vector of each subjects identifiers} +\item{obj}{A vector of each subjects identifiers or a Ped object} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ -A vector of boolean of the same size as \code{id} +A vector of boolean of the same size as \strong{obj} with TRUE if the individual is a parent and FALSE otherwise } \description{ Check which individuals are parents. } +\examples{ + +is_parent(c("1", "2", "3", "4"), c("3", "3", NA, NA), c("4", "4", NA, NA)) + +data(sampleped) +ped <- Pedigree(sampleped) +is_parent(ped(ped)) +} \keyword{internal} diff --git a/man/is_valid.Rd b/man/is_valid.Rd deleted file mode 100644 index 84d7fc09..00000000 --- a/man/is_valid.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R -\name{is_valid} -\alias{is_valid} -\title{Pedigree validity method.} -\usage{ -is_valid(object) -} -\arguments{ -\item{object}{A Pedigree object.} -} -\value{ -A logical value or a character vector with the errors. -} -\description{ -Check if the Pedigree object is valid. -} -\details{ -It will check : -the fields of the slots -the values in the columns of the ped, rel and scale slot -} -\keyword{internal} diff --git a/man/is_valid_hints.Rd b/man/is_valid_hints.Rd new file mode 100644 index 00000000..734fe26a --- /dev/null +++ b/man/is_valid_hints.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_hints} +\alias{is_valid_hints} +\title{Check if a Hints object is valid} +\usage{ +is_valid_hints(object) +} +\arguments{ +\item{object}{A Hints object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Check if horder and spouse slots are valid: +\itemize{ +\item horder is named numeric vector +\item spouse is a data.frame +\itemize{ +\item Has the three \code{idr}, \code{idl}, \code{anchor} columns +\item \code{idr} and \code{idl} are different and doesn't contains \code{NA} +\item \code{idr} and \code{idl} couple are unique +\item \code{anchor} column only have \code{right}, \code{left}, \code{either} values +} +\item all ids in spouse needs to be in the names of the horder vector +} +} +\keyword{internal} diff --git a/man/is_valid_ped.Rd b/man/is_valid_ped.Rd new file mode 100644 index 00000000..058d93a0 --- /dev/null +++ b/man/is_valid_ped.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_ped} +\alias{is_valid_ped} +\title{Check if a Ped object is valid} +\usage{ +is_valid_ped(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the ped ids slots have the right values +\item Check that the sex, steril, status, avail and affected slots have the +right values +\item Check that dad are male and mom are female +\item Check that individuals have both parents or none +} +} +\keyword{internal} diff --git a/man/is_valid_pedigree.Rd b/man/is_valid_pedigree.Rd new file mode 100644 index 00000000..9868edba --- /dev/null +++ b/man/is_valid_pedigree.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_pedigree} +\alias{is_valid_pedigree} +\title{Check if a Pedigree object is valid} +\usage{ +is_valid_pedigree(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the all Rel id are in the Ped object +\item Check that twins have same parents +\item Check that MZ twins have same sex +\item Check that all columns used in scales are in the Ped object +\item Check that all fill & border modalities are in the Ped object column +\item Check that all id used in Hints object are in the Ped object +\item Check that all spouse in Hints object are male / female +} +} +\keyword{internal} diff --git a/man/is_valid_rel.Rd b/man/is_valid_rel.Rd new file mode 100644 index 00000000..eb639956 --- /dev/null +++ b/man/is_valid_rel.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_rel} +\alias{is_valid_rel} +\title{Check if a Rel object is valid} +\usage{ +is_valid_rel(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the "id1", "id2", "code", "famid" slots exist +\item Check that the "code" slots have the right values +(i.e. "MZ twin", "DZ twin", "UZ twin", "Spouse") +\item Check that all "id1" are different to "id2" +\item Check that all "id1" are smaller than "id2" +\item Check that no duplicate relation are present +} +} +\keyword{internal} diff --git a/man/is_valid_scales.Rd b/man/is_valid_scales.Rd new file mode 100644 index 00000000..49dc2830 --- /dev/null +++ b/man/is_valid_scales.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_scales} +\alias{is_valid_scales} +\title{Check if a Scales object is valid} +\usage{ +is_valid_scales(object) +} +\arguments{ +\item{object}{A Scales object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Check if the fill and border slots are valid: +\itemize{ +\item fill slot is a data.frame with "order", "column_values", +"column_mods", "mods", "labels", "affected", "fill", +"density", "angle" columns. +\itemize{ +\item "affected" is logical. +\item "density", "angle", "order", "mods" are numeric. +\item "column_values", "column_mods", "labels", "fill" are +character. +} +\item border slot is a data.frame with "column_values", +"column_mods", "mods", "labels", "border" columns. +\itemize{ +\item "column_values", "column_mods", "labels", "border" are +character. +\item "mods" is numeric. +} +} +} +\keyword{internal} diff --git a/man/kindepth.Rd b/man/kindepth.Rd index 50e9963c..65c65d41 100755 --- a/man/kindepth.Rd +++ b/man/kindepth.Rd @@ -1,59 +1,65 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/kindepth.R -\docType{methods} \name{kindepth} \alias{kindepth} -\alias{kindepth,character-method} -\alias{kindepth,character} +\alias{kindepth,character_OR_integer-method} \alias{kindepth,Pedigree-method} -\alias{kindepth,Pedigree} -\title{Compute the depth of each subject in a Pedigree} +\alias{kindepth,Ped-method} +\title{Individual's depth in a pedigree} \usage{ kindepth(obj, ...) -\S4method{kindepth}{character}(obj, dadid, momid, align_parents = FALSE) +\S4method{kindepth}{character_OR_integer}(obj, dadid, momid, align_parents = FALSE) \S4method{kindepth}{Pedigree}(obj, align_parents = FALSE) + +\S4method{kindepth}{Ped}(obj, align_parents = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{...}{Additional arguments} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{align_parents}{If \code{align_parents=T}, go one step further and try to -make both parents of each child have the same depth. +\item{align_parents}{If \code{align_parents = TRUE}, go one step further +and try to make both parents of each child have the same depth. (This is not always possible). -It helps the drawing program by lining up pedigrees that 'join in the middle' -via a marriage.} +It helps the drawing program by lining up pedigrees that +'join in the middle' via a marriage.} } \value{ -an integer vector containing the depth for each subject +An integer vector containing the depth for each subject } \description{ Computes the depth of each subject in the Pedigree. } \details{ -Mark each person as to their depth in a Pedigree; 0 for a founder, otherwise +Mark each person as to their depth in a Pedigree; \code{0} for a founder, +otherwise : -\eqn{depth = 1 + \max(fatherDepth, motherDepth)} +\deqn{depth = 1 + \max(fatherDepth, motherDepth)} -In the case of an inbred Pedigree a perfect alignment obeying -\code{extra=TRUE} may not exist. +In the case of an inbred Pedigree a perfect alignment may not exist. } \examples{ +kindepth( + c("A", "B", "C", "D", "E"), + c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0") +) data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) kindepth(ped1) } \seealso{ \code{\link[=align]{align()}} } \author{ -Terry Therneau +Terry Therneau, updated by Louis Le Nézet } diff --git a/man/kinship.Rd b/man/kinship.Rd index 93429ced..3c8e8e93 100755 --- a/man/kinship.Rd +++ b/man/kinship.Rd @@ -1,41 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/kinship.R -\docType{methods} \name{kinship} \alias{kinship} +\alias{kinship,Ped-method} \alias{kinship,character-method} -\alias{kinship,character} \alias{kinship,Pedigree-method} -\alias{kinship,Pedigree} -\title{Compute a kinship matrix} +\title{Kinship matrix} \usage{ -kinship(obj, ...) +\S4method{kinship}{Ped}(obj, chrtype = "autosome") \S4method{kinship}{character}(obj, dadid, momid, sex, chrtype = "autosome") \S4method{kinship}{Pedigree}(obj, chrtype = "autosome") } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A Pedigree or Ped object or a vector of subject identifiers.} -\item{...}{Additional arguments passed to methods} +\item{chrtype}{chromosome type. The currently supported types are +'autosome' and 'X' or 'x'.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} - -\item{chrtype}{chromosome type. The currently supported types are -'autosome' and 'X' or 'x'.} } \value{ \subsection{When obj is a vector}{ @@ -43,25 +41,25 @@ the gender of the individuals. The following values are recognized: A matrix of kinship coefficients. } -\subsection{When obj is a pedigree}{ +\subsection{When obj is a Pedigree}{ A matrix of kinship coefficients ordered by families present -in the pedigree. +in the Pedigree object. } } \description{ -Compute the kinship matrix for a set of related autosomal subjects. The -function is generic, and can accept a Pedigree, vector as +Compute the kinship matrix for a set of related autosomal subjects. +The function is generic, and can accept a Pedigree, a Ped or a vector as the first argument. } \details{ -The function will usually be called with a Pedigree the -third form is provided for backwards compatibility with an earlier release -of the library that was less capable. The first argument is named \code{id} -for the same reason. Note that when using the third form any information on +The function will usually be called with a Pedigree. +The call with a Ped or a vector is provided for backwards compatibility +with an earlier release of the library that was less capable. +Note that when using with a Ped or a vector, any information on twins is not available to the function. -When called with a pedigree, the routine +When called with a Pedigree, the routine will create a block-diagonal-symmetric sparse matrix object of class \code{dsCMatrix}. Since the \verb{[i, j]} value of the result is 0 for any two unrelated individuals i and j and a \code{Matrix} utilizes sparse @@ -83,15 +81,23 @@ The computation is based on a recursive algorithm described in Lange, which assumes that the founder alleles are all independent. } \section{References}{ - K Lange, Mathematical and Statistical Methods for + +K Lange, Mathematical and Statistical Methods for Genetic Analysis, Springer-Verlag, New York, 1997. } \examples{ + +kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1)) +kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1), + chrtype = "x" +) + data(sampleped) ped <- Pedigree(sampleped) kinship(ped) - } \seealso{ \code{\link[=make_famid]{make_famid()}}, \code{\link[=kindepth]{kindepth()}} diff --git a/man/length-Pedigree-method.Rd b/man/length-Pedigree-method.Rd deleted file mode 100644 index 66aca38e..00000000 --- a/man/length-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{length,Pedigree-method} -\alias{length,Pedigree-method} -\title{Compute the length of a Pedigree object} -\usage{ -\S4method{length}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -The number of individuals in the Pedigree object. -} -\description{ -Compute the length of a Pedigree object -} diff --git a/man/make_famid.Rd b/man/make_famid.Rd index ad7439cf..1f9805d5 100644 --- a/man/make_famid.Rd +++ b/man/make_famid.Rd @@ -1,29 +1,23 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/make_famid.R -\docType{methods} \name{make_famid} \alias{make_famid} \alias{make_famid,character-method} -\alias{make_famid,character} \alias{make_famid,Pedigree-method} -\alias{make_famid,Pedigree} -\title{Get family id} +\title{Compute family id} \usage{ -make_famid(obj, ...) - \S4method{make_famid}{character}(obj, dadid, momid) \S4method{make_famid}{Pedigree}(obj) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -35,10 +29,11 @@ An integer vector giving family groupings \subsection{When used with a Pedigree object}{ An updated Pedigree object with the family id added +and with all ids updated } } \description{ -Construct a family id from Pedigree information +Construct a family identifier from pedigree information } \details{ Create a vector of length n, giving the family 'tree' number of each @@ -47,6 +42,13 @@ tree 1, otherwise the tree numbers represent the disconnected subfamilies. Singleton subjects give a zero for family number. } \examples{ + +make_famid( + c("A", "B", "C", "D", "E", "F"), + c("C", "D", "0", "0", "0", "0"), + c("E", "E", "0", "0", "0", "0") +) + data(sampleped) ped1 <- Pedigree(sampleped[,-1]) make_famid(ped1) diff --git a/man/min_dist_inf.Rd b/man/min_dist_inf.Rd index 485c8c3b..9dac04ed 100644 --- a/man/min_dist_inf.Rd +++ b/man/min_dist_inf.Rd @@ -1,61 +1,44 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/min_dist_inf.R -\docType{methods} \name{min_dist_inf} \alias{min_dist_inf} \alias{min_dist_inf,character-method} -\alias{min_dist_inf,character} \alias{min_dist_inf,Pedigree-method} -\alias{min_dist_inf,Pedigree} +\alias{min_dist_inf,Ped-method} \title{Minimum distance to the informative individuals} \usage{ -min_dist_inf(obj, ...) +\S4method{min_dist_inf}{character}(obj, dadid, momid, sex, id_inf) -\S4method{min_dist_inf}{character}(obj, dadid, momid, sex, avail, affected, informative = "AvAf") +\S4method{min_dist_inf}{Pedigree}(obj, col_aff = NULL, informative = "AvAf", reset = FALSE, ...) -\S4method{min_dist_inf}{Pedigree}( - obj, - col_aff = NULL, - informative = "AvAf", - missid = "0", - reset = FALSE, - ... -) +\S4method{min_dist_inf}{Ped}(obj, informative = "AvAf", reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{...}{Additional arguments} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{id_inf}{An identifiers vector of informative individuals.} -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} \item{informative}{Informative individuals selection can take 5 values: \itemize{ @@ -68,14 +51,7 @@ the gender of the individuals. The following values are recognized: \item A boolean }} -\item{col_aff}{A string with the column name to use for the affection status.} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{reset}{If TRUE, the \code{kin} and if \code{id_inf} columns is reset} +\item{reset}{If TRUE, the \code{kin} and if \code{isinf} columns is reset} } \value{ \subsection{When obj is a vector}{ @@ -87,8 +63,9 @@ and all the others corresponding to the order of the individuals in the \subsection{When obj is a Pedigree}{ -The Pedigree object with a new column named 'kin' containing the kinship -degree. +The Pedigree object with a new slot named 'kin' containing the minimum +distance between each individuals and the informative individuals. +The \code{isinf} slot is also updated with the informative individuals. } } \description{ @@ -107,10 +84,18 @@ is 0.5 and the minimum distance is 1. Each time the kinship degree is divided by 2, the minimum distance is increased by 1. } \examples{ + +min_dist_inf( + c("A", "B", "C", "D", "E"), + c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), + sex = c(1, 2, 1, 2, 1), + id_inf = c("D", "E") +) + data(sampleped) ped <- Pedigree(sampleped) -min_dist_inf(ped, col_aff = "affection_aff")$ped - +kin(ped(min_dist_inf(ped, col_aff = "affection_mods"))) } \seealso{ \code{\link[=kinship]{kinship()}} diff --git a/man/minnbreast.Rd b/man/minnbreast.Rd index f07282c2..d5eeba2c 100644 --- a/man/minnbreast.Rd +++ b/man/minnbreast.Rd @@ -8,34 +8,31 @@ A data frame with 28081 observations, one line per subject, on the following 14 variables. \itemize{ -\item \code{id} subject identifier -\item \code{proband} if 1, this subject is one of the original +\item \code{id} : Subject identifier +\item \code{proband} : If 1, this subject is one of the original 426 probands -\item \code{fatherid} identifier of the father, if the father is part of +\item \code{fatherid} : Identifier of the father, if the father is part of the data set; zero otherwise -\item \code{motherid} identifier of the mother, if the mother is part of +\item \code{motherid} : Identifier of the mother, if the mother is part of the data set; zero otherwise -\item \code{famid} family identifier -\item \code{endage} age at last follow-up or incident cancer -\item \code{cancer} 1= breast cancer (females) or prostate cancer (males), -0=censored -\item \code{yob} year of birth -\item \code{education} amount of education: 1-8 years, 9-12 years, high -school graduate, vocational education -beyond high school, some college but did not graduate, college graduate, -post-graduate education, refused to -answer on the questionnaire -\item \code{marstat} marital status: married, living with someone in a +\item \code{famid} : Family identifier +\item \code{endage} : Age at last follow-up or incident cancer +\item \code{cancer} : \code{1} = breast cancer (females) or prostate cancer (males), +\code{0} = censored +\item \code{yob} : Year of birth +\item \code{education} : Amount of education: 1-8 years, 9-12 years, high +school graduate, vocational education beyond high school, +some college but did not graduate, college graduate, +post-graduate education, refused to answer on the questionnaire +\item \code{marstat} : Marital status: married, living with someone in a marriage-like relationship, separated or divorced, widowed, never married, refused to answer the questionaire -\item \code{everpreg} ever pregnant: never pregnant at the time of -baseline survey, ever pregnant at the time -of baseline survey -\item \code{parity} number of births -\item \code{nbreast} number of breast biopsies -\item \code{sex} M or F -\item \code{bcpc} part of one of the families in the breast/prostate -cancer substudy: 0=no, 1=yes. +\item \code{everpreg} : Ever pregnant at the time of baseline survey +\item \code{parity} : Number of births +\item \code{nbreast} : Number of breast biopsies +\item \code{sex} : \code{M} or \code{F} +\item \code{bcpc} : Part of one of the families in the breast / prostate +cancer substudy: \code{0} = no, \code{1} = yes. Note that subjects who were recruited to the overall study after the date of the BP substudy are coded as zero. } @@ -46,7 +43,7 @@ data(minnbreast) \description{ Data from the Minnesota Breast Cancer Family Study. This contains extended pedigrees from 426 families, each identified by -a single proband in 1945-52, with follow up for incident breast cancer. +a single proband in 1945-1952, with follow up for incident breast cancer. } \details{ The original study was conducted by Dr. Elving Anderson at the @@ -56,11 +53,13 @@ University Hospital were enrolled, and information gathered on parents, siblings, offspring, aunts / uncles, and grandparents with the goal of understanding possible familial aspects of brest cancer. In 1991 the study was resurrected by Dr Tom Sellers. + Of the original 544 he excluded 58 prevalent cases, along with another 19 who had less than 2 living relatives at the time of Dr Anderson's survey. Of the remaining 462 families 10 had no living members, 23 could not be located and 8 refused, leaving 426 families on whom updated pedigrees were obtained. + This gave a study with 13351 males and 12699 females (5183 marry-ins). Primary questions were the relationship of early life exposures, breast density, and pharmacogenomics on incident breast cancer risk. @@ -77,10 +76,12 @@ breastped <- Pedigree(minnbreast, cols_ren_ped = list( "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", "gender" = "sex", "family" = "famid" - ) + ), missid = "0", col_aff = "cancer" ) -print(breastped) -#plot(breastped) #plot family 8, proband is solid, slash for cancers +summary(breastped) +scales(breastped) +#plot family 8, proband is solid, slash for cancers +#plot(breastped[famid(breastped) == "8"]) } \references{ Epidemiologic and genetic follow-up study of 544 Minnesota breast cancer diff --git a/man/na_to_length.Rd b/man/na_to_length.Rd new file mode 100644 index 00000000..e7fb8cbf --- /dev/null +++ b/man/na_to_length.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllConstructor.R +\name{na_to_length} +\alias{na_to_length} +\title{NA to specific length} +\usage{ +na_to_length(x, temp, value) +} +\arguments{ +\item{x}{The vector to check.} + +\item{temp}{A template vector to use to determine the length.} + +\item{value}{The value to use to fill the vector.} +} +\value{ +A vector with the same length as temp. +} +\description{ +Check if all value in a vector is \code{NA}. +If so set all of them to a new value matching the length +of the template. +If not check that the size of the vector is equal to +the template. +} +\examples{ + +na_to_length(NA, rep(0, 4), "NewValue") +na_to_length(c(1, 2, 3, NA), rep(0, 4), "NewValue") +} +\keyword{internal} diff --git a/man/norm_ped.Rd b/man/norm_ped.Rd index 563d1d55..c72f9d19 100644 --- a/man/norm_ped.Rd +++ b/man/norm_ped.Rd @@ -2,53 +2,88 @@ % Please edit documentation in R/norm_data.R \name{norm_ped} \alias{norm_ped} -\title{Normalise dataframe} +\title{Normalise a Ped object dataframe} \usage{ -norm_ped(ped_df, na_strings = c("NA", ""), missid = "0", try_num = FALSE) +norm_ped( + ped_df, + na_strings = c("NA", ""), + missid = NA_character_, + try_num = FALSE +) } \arguments{ \item{ped_df}{A data.frame with the individuals informations. -The minimum columns required are \code{indID}, \code{fatherId}, \code{motherId} and -\code{gender}. -The \code{family} column can also be used to specify the family of the -individuals and will be merge to the \code{id} field separated by an -underscore. -The following columns are also recognize \code{sterilisation}, \code{available}, -\code{vitalStatus}, \code{affection}. The four of them will be transformed with the -\code{\link[=vect_to_binary]{vect_to_binary()}} function. -They respectively correspond to the sterilisation status, -the availability status, the death status and the affection status -of the individuals. The values recognized for those columns are \code{1} or -\code{0}.} +The minimum columns required are: + +\if{html}{\out{
}}\preformatted{- `indID` individual identifiers -> `id` +- `fatherId` biological fathers identifiers -> `dadid` +- `motherId` biological mothers identifiers -> `momdid` +- `gender` sex of the individual -> `sex` +- `family` family identifiers -> `famid` +}\if{html}{\out{
}} + +The \code{family} column, if provided, will be merged to the \emph{ids} field +separated by an underscore using the \code{\link[=upd_famid_id]{upd_famid_id()}} function. + +The following columns are also recognize and will be transformed with the +\code{\link[=vect_to_binary]{vect_to_binary()}} function: -\item{na_strings}{Vector of strings to be considered as NA values} +\if{html}{\out{
}}\preformatted{- `sterilisation` status -> `steril` +- `available` status -> `avail` +- `vitalStatus`, is the individual dead -> `status` +- `affection` status -> `affected` +}\if{html}{\out{
}} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +The values recognized for those columns are \code{1} or \code{0}, \code{TRUE} or \code{FALSE}.} + +\item{na_strings}{Vector of strings to be considered as NA values.} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} \item{try_num}{Boolean defining if the function should try to convert all the columns to numeric.} } \value{ -A dataframe with the errors identified in the \code{error} column +A dataframe with different variable correctly standardized +and with the errors identified in the \code{error} column } \description{ -Normalise dataframe for Pedigree object +Normalise dataframe for a Ped object } \details{ Normalise a dataframe and check for columns correspondance -to be able to use it as an input to create Pedigree object. +to be able to use it as an input to create a Ped object. Multiple test are done and errors are checked. -Sex is calculated based in the \code{gender} column the following notations -are accepted: f, woman, female, 2 and m, man, male, 1. +Sex is calculated based on the \code{gender} column. + The \code{steril} column need to be a boolean either TRUE, FALSE or 'NA'. Will be considered available any individual with no 'NA' values in the \code{available} column. Duplicated \code{indId} will nullify the relationship of the individual. All individuals with errors will be remove from the dataframe and will be transfered to the error dataframe. + +A number of checks are done to ensure the dataframe is correct: +\subsection{On identifiers:}{ +\itemize{ +\item All ids (id, dadid, momid, famid) are not empty (\verb{!= ""}) +\item All \code{id} are unique (no duplicated) +\item All \code{dadid} and \code{momid} are unique in the id column (no duplicated) +\item id is not the same as dadid or momid +\item Either have both parents or none +} +} + +\subsection{On sex}{ +\itemize{ +\item All sex code are either \code{male}, \code{female}, \code{terminated} or \code{unknown}. +\item No parents are steril +\item All fathers are male +\item All mothers are female +} +} } \examples{ df <- data.frame( @@ -57,10 +92,17 @@ df <- data.frame( motherId = c(0, 0, 2, 2, 0, 5, 2, 0, 8, 8), gender = c(1, 2, "m", "man", "f", "male", "m", "m", "f", "f"), available = c("A", "1", 0, NA, 1, 0, 1, 0, 1, 0), - family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), + famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), sterilisation = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, "TRUE"), vitalStatus = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0), affection = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0) ) -norm_ped(df) +tryCatch( + norm_ped(df), + error = function(e) print(e) +) + +} +\seealso{ +\code{\link[=Ped]{Ped()}}, \linkS4class{Ped}, \code{\link[=Pedigree]{Pedigree()}} } diff --git a/man/norm_rel.Rd b/man/norm_rel.Rd index 556692f7..1cd6238b 100644 --- a/man/norm_rel.Rd +++ b/man/norm_rel.Rd @@ -2,16 +2,18 @@ % Please edit documentation in R/norm_data.R \name{norm_rel} \alias{norm_rel} -\title{Normalise relationship dataframe} +\title{Normalise a Rel object dataframe} \usage{ -norm_rel(rel_df, na_strings = c("NA", ""), missid = "0") +norm_rel(rel_df, na_strings = c("NA", ""), missid = NA_character_) } \arguments{ \item{rel_df}{A data.frame with the special relationships between -individuals. +individuals. See \code{\link[=Rel]{Rel()}} for more informations. The minimum columns required are \code{id1}, \code{id2} and \code{code}. -The \code{family} column can also be used to specify the family +The \code{famid} column can also be used to specify the family of the individuals. +If a matrix is given, the columns needs to be ordered as +\code{id1}, \code{id2}, \code{code} and \code{famid}. The code values are: \itemize{ \item \code{1} = Monozygotic twin @@ -23,26 +25,47 @@ The code values are: The value relation code recognized by the function are the one defined by the \code{\link[=rel_code_to_factor]{rel_code_to_factor()}} function.} -\item{na_strings}{Vector of strings to be considered as NA values} +\item{na_strings}{Vector of strings to be considered as NA values.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ A dataframe with the errors identified } \description{ -Normalise relationship dataframe for Pedigree object +Normalise a dataframe and check for columns correspondance +to be able to use it as an input to create a Ped object. +} +\details{ +The \code{famid} column, if provided, will be merged to the \emph{ids} field +separated by an underscore using the \code{\link[=upd_famid_id]{upd_famid_id()}} function. +The \code{code} column will be transformed with the \code{\link[=rel_code_to_factor]{rel_code_to_factor()}}. +Multiple test are done and errors are checked. + +A number of checks are done to ensure the dataframe is correct: +\subsection{On identifiers:}{ +\itemize{ +\item All ids (id1, id2) are not empty (\verb{!= ""}) +\item \code{id1} and \code{id2} are not the same +} +} + +\subsection{On code}{ +\itemize{ +\item All code are recognised as either "MZ twin", "DZ twin", "UZ twin" or +"Spouse" +} +} } \examples{ df <- data.frame( - indId1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), - indId2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), + id1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), + id2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), code = c("MZ twin", "DZ twin", "UZ twin", "Spouse", 1, 2, 3, 4, "MzTwin", "sp oUse"), - family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) + famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) ) norm_rel(df) diff --git a/man/num_child.Rd b/man/num_child.Rd index f9e924a7..2886d92a 100644 --- a/man/num_child.Rd +++ b/man/num_child.Rd @@ -1,36 +1,32 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/num_child.R -\docType{methods} \name{num_child} \alias{num_child} -\alias{num_child,character-method} -\alias{num_child,character} +\alias{num_child,character_OR_integer-method} \alias{num_child,Pedigree-method} -\alias{num_child,Pedigree} -\title{Number of child} +\title{Number of childs} \usage{ -num_child(obj, ...) - -\S4method{num_child}{character}(obj, dadid, momid, rel_df = NULL, missid = "0") +\S4method{num_child}{character_OR_integer}(obj, dadid, momid, rel_df = NULL, missid = NA_character_) \S4method{num_child}{Pedigree}(obj, reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{rel_df}{A data.frame with the special relationships between -individuals. +individuals. See \code{\link[=Rel]{Rel()}} for more informations. The minimum columns required are \code{id1}, \code{id2} and \code{code}. -The \code{family} column can also be used to specify the family +The \code{famid} column can also be used to specify the family of the individuals. +If a matrix is given, the columns needs to be ordered as +\code{id1}, \code{id2}, \code{code} and \code{famid}. The code values are: \itemize{ \item \code{1} = Monozygotic twin @@ -42,10 +38,9 @@ The code values are: The value relation code recognized by the function are the one defined by the \code{\link[=rel_code_to_factor]{rel_code_to_factor()}} function.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} \item{reset}{If TRUE, the \code{num_child_tot}, \code{num_child_ind} and the \code{num_child_dir} columns are reset.} @@ -66,17 +61,29 @@ Pedigree \code{ped} slot. } } \description{ -Compute the number of child per individual +Compute the number of childs per individual } \details{ Compute the number of direct child but also the number of indirect child given by the ones related with the linked spouses. -If a relation ship matrix is given, then even if no children is present +If a relation ship dataframe is given, then even if no children is present between 2 spouses, the indirect childs will still be added. } \examples{ + +num_child( + obj = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), + dadid = c("3", "3", "6", "8", "0", "0", "0", "0", "0", "0"), + momid = c("4", "5", "7", "9", "0", "0", "0", "0", "0", "0"), + rel_df = data.frame( + id1 = "10", + id2 = "3", + code = "Spouse" + ) +) + data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -ped1 <- num_child(ped1) -summary(ped1$ped) +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +ped1 <- num_child(ped1, reset = TRUE) +summary(ped(ped1)) } diff --git a/man/paste0max.Rd b/man/paste0max.Rd index f679a4a1..db81a1cc 100644 --- a/man/paste0max.Rd +++ b/man/paste0max.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R +% Please edit documentation in R/AllValidity.R \name{paste0max} \alias{paste0max} \title{Print0 to max} \usage{ -paste0max(x, max = 5, ...) +paste0max(x, max = 5, sep = "", ...) } \arguments{ \item{x}{A vector.} diff --git a/man/ped_to_legdf.Rd b/man/ped_to_legdf.Rd index e4e0d1c6..abcb04e9 100644 --- a/man/ped_to_legdf.Rd +++ b/man/ped_to_legdf.Rd @@ -1,20 +1,20 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ped_to_legdf.R -\docType{methods} \name{ped_to_legdf} \alias{ped_to_legdf} -\title{Convert a Pedigree to a legend data frame of element to plot} +\alias{ped_to_legdf,Pedigree-method} +\title{Create plotting legend data frame from a Pedigree} \usage{ -ped_to_legdf(ped, boxh = 1, boxw = 1, cex = 1, adjx = 0, adjy = 0) +\S4method{ped_to_legdf}{Pedigree}(obj, boxh = 1, boxw = 1, cex = 1, adjx = 0, adjy = 0) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{boxh}{Height of the legend boxes} +\item{boxh}{Height of the polygons elements} -\item{boxw}{Width of the legend boxes} +\item{boxw}{Width of the polygons elements} -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} \item{adjx}{default=0. Controls the horizontal text adjustment of the labels in the legend.} @@ -27,12 +27,32 @@ A list containing the legend data frame and the user coordinates. } \description{ Convert a Pedigree to a legend data frame for it to -be plotted with afterwards with \code{\link[=plot_fromdf]{plot_fromdf()}}. +be plotted afterwards with \code{\link[=plot_fromdf]{plot_fromdf()}}. +} +\details{ +The data frame contains the following columns: +\itemize{ +\item \code{x0}, \code{y0}, \code{x1}, \code{y1}: coordinates of the elements +\item \code{type}: type of the elements +\item \code{fill}: fill color of the elements +\item \code{border}: border color of the elements +\item \code{angle}: angle of the shading of the elements +\item \code{density}: density of the shading of the elements +\item \code{cex}: size of the elements +\item \code{label}: label of the elements +\item \code{tips}: tips of the elements (used for the tooltips) +\item \code{adjx}: horizontal text adjustment of the labels +\item \code{adjy}: vertical text adjustment of the labels +} + +All those columns are used by \code{\link[=plot_fromdf]{plot_fromdf()}} to plot the graph. } \examples{ data("sampleped") ped <- Pedigree(sampleped) leg_df <- ped_to_legdf(ped) -summary(leg_df$leg_df) -plot_fromdf(leg_df$leg_df) +summary(leg_df$df) +plot_fromdf(leg_df$df, usr = c(-1,15,0,7)) } +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/ped_to_plotdf.Rd b/man/ped_to_plotdf.Rd index e2921653..fff3dedb 100644 --- a/man/ped_to_plotdf.Rd +++ b/man/ped_to_plotdf.Rd @@ -2,15 +2,16 @@ % Please edit documentation in R/ped_to_plotdf.R \name{ped_to_plotdf} \alias{ped_to_plotdf} -\title{Convert a Pedigree to a data frame of element to plot} +\alias{ped_to_plotdf,Pedigree-method} +\title{Create plotting data frame from a Pedigree} \usage{ -ped_to_plotdf( - ped, - packed = FALSE, - width = 10, +\S4method{ped_to_plotdf}{Pedigree}( + obj, + packed = TRUE, + width = 6, align = c(1.5, 2), subreg = NULL, - cex = 0.5, + cex = 1, symbolsize = cex, pconnect = 0.5, branch = 0.6, @@ -20,33 +21,35 @@ ped_to_plotdf( ) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{...}{Other arguments passed to \code{\link[=par]{par()}}} -\item{width}{for a packed output, the minimum width of the plot, in +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} + +\item{width}{For a packed output, the minimum width of the plot, in inches.} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{subreg}{4-element vector for (min x, max x, min depth, max depth), +\item{subreg}{A 4-element vector for (min x, max x, min depth, max depth), used to edit away portions of the plot coordinates returned by \code{\link[=align]{align()}}. This is useful for zooming in on a particular region of the Pedigree.} -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} -\item{symbolsize}{size of the symbols} +\item{symbolsize}{Size of the symbols} -\item{pconnect}{when connecting parent to children the program will try to +\item{pconnect}{When connecting parent to children the program will try to make the connecting line as close to vertical as possible, subject to it lying inside the endpoints of the line that connects the children by at least \code{pconnect} people. Setting this option to a large number will @@ -55,26 +58,51 @@ force the line to connect at the midpoint of the children.} \item{branch}{defines how much angle is used to connect various levels of nuclear families.} -\item{aff_mark}{if TRUE, add a aff_mark to each box corresponding to the +\item{aff_mark}{If \code{TRUE}, add a aff_mark to each box corresponding to the value of the affection column for each filling scale.} -\item{label}{if not NULL, add a label to each box corresponding to the +\item{label}{If not \code{NULL}, add a label to each box corresponding to the value of the column given.} - -\item{...}{other arguments passed to \code{\link[=par]{par()}}} } \value{ A list containing the data frame and the user coordinates. } \description{ -Convert a Pedigree to a data frame of element to plot +Convert a Pedigree to a data frame with all the elements and their +characteristic for them to be plotted afterwards with \code{\link[=plot_fromdf]{plot_fromdf()}}. +} +\details{ +The data frame contains the following columns: +\itemize{ +\item \code{x0}, \code{y0}, \code{x1}, \code{y1}: coordinates of the elements +\item \code{type}: type of the elements +\item \code{fill}: fill color of the elements +\item \code{border}: border color of the elements +\item \code{angle}: angle of the shading of the elements +\item \code{density}: density of the shading of the elements +\item \code{cex}: size of the elements +\item \code{label}: label of the elements +\item \code{tips}: tips of the elements (used for the tooltips) +\item \code{adjx}: horizontal text adjustment of the labels +\item \code{adjy}: vertical text adjustment of the labels +} + +All those columns are used by \code{\link[=plot_fromdf]{plot_fromdf()}} to plot the graph. } \examples{ + data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == 1,]) -ped_to_plotdf(ped1) +ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) +plot_df <- ped_to_plotdf(ped1) +summary(plot_df$df) +plot_fromdf(plot_df$df, usr = plot_df$par_usr$usr, + boxh = plot_df$par_usr$boxh, boxw = plot_df$par_usr$boxw +) + } \seealso{ \code{\link[=plot_fromdf]{plot_fromdf()}} \code{\link[=ped_to_legdf]{ped_to_legdf()}} } +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/permute.Rd b/man/permute.Rd new file mode 100644 index 00000000..3b345c62 --- /dev/null +++ b/man/permute.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/best_hint.R +\name{permute} +\alias{permute} +\title{Generate all possible permutation} +\usage{ +permute(x) +} +\arguments{ +\item{x}{A vector of length \strong{n}} +} +\value{ +A matrix with \strong{n} cols and \strong{n!} rows +} +\description{ +Given a vector of length \strong{n}, generate all possible permutations of +the numbers 1 to \strong{n}. +This is a recursive routine, and is not very efficient. +} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/plot-Pedigree-missing-method.Rd b/man/plot-Pedigree-missing-method.Rd index 84231ce4..b8f741f9 100644 --- a/man/plot-Pedigree-missing-method.Rd +++ b/man/plot-Pedigree-missing-method.Rd @@ -34,44 +34,44 @@ \arguments{ \item{x}{A Pedigree object.} -\item{aff_mark}{if TRUE, add a aff_mark to each box corresponding to the +\item{aff_mark}{If \code{TRUE}, add a aff_mark to each box corresponding to the value of the affection column for each filling scale.} -\item{label}{if not NULL, add a label to each box corresponding to the +\item{label}{If not \code{NULL}, add a label to each box corresponding to the value of the column given.} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} -\item{symbolsize}{size of the symbols} +\item{symbolsize}{Size of the symbols} \item{branch}{defines how much angle is used to connect various levels of nuclear families.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{width}{for a packed output, the minimum width of the plot, in +\item{width}{For a packed output, the minimum width of the plot, in inches.} \item{title}{The title of the plot.} -\item{subreg}{4-element vector for (min x, max x, min depth, max depth), +\item{subreg}{A 4-element vector for (min x, max x, min depth, max depth), used to edit away portions of the plot coordinates returned by \code{\link[=align]{align()}}. This is useful for zooming in on a particular region of the Pedigree.} -\item{pconnect}{when connecting parent to children the program will try to +\item{pconnect}{When connecting parent to children the program will try to make the connecting line as close to vertical as possible, subject to it lying inside the endpoints of the line that connects the children by at least \code{pconnect} people. Setting this option to a large number will @@ -80,7 +80,7 @@ force the line to connect at the midpoint of the children.} \item{fam_to_plot}{default=1. If the Pedigree contains multiple families, this parameter can be used to select which family to plot. It can be a numeric value or a character value. If numeric, it is the -index of the family to plot returned by \code{unique(x$ped$family)}. +index of the family to plot returned by \code{unique(x$ped$famid)}. If character, it is the family id to plot.} \item{legend}{default=FALSE. If TRUE, a legend will be added to the plot.} @@ -110,33 +110,43 @@ an invisible list containing } } \description{ -plot objects created with the Pedigree function +This function is used to plot a Pedigree object. + +It is a wrapper for \code{\link[=plot_fromdf]{plot_fromdf()}} and \code{\link[=ped_to_plotdf]{ped_to_plotdf()}} as well as +\code{\link[=ped_to_legdf]{ped_to_legdf()}} if \code{legend = TRUE}. } \details{ Two important parameters control the looks of the result. One is the user specified maximum width. The smallest possible width is the maximum number of subjects on a line, if the user's suggestion is too low it is -increased to 1 + that amount (to give just a little wiggle room). To make a -Pedigree where all children are centered under parents simply make the width -large enough, however, the symbols may get very small. +increased to 1 + that amount (to give just a little wiggle room). + +To make a Pedigree where all children are centered under parents simply +make the width large enough, however, the symbols may get very small. The second is \code{align}, a vector of 2 alignment parameters \code{a} and \code{b}. For each set of siblings at a set of locations \code{x} and with parents at -\code{p=c(p1,p2)} the alignment penalty is \deqn{(1/k^a)\sum{i=1}{k} [(x_i - -(p1+p2)/2)]^2} sum(x- mean(p))^2/(k^a) where k is the number of siblings in -the set. +\code{p=c(p1,p2)} the alignment penalty is + +\deqn{(1/k^a)\sum{i=1}{k} [(x_i - (p1+p2)/2)]^2} + +\deqn{\sum(x- \overline(p))^2/(k^a)} + +Where k is the number of siblings in the set. + When \code{a = 1} moving a sibship with \code{k} sibs one unit to the left or right of optimal will incur the same cost as moving one with only 1 or two sibs out of place. -If \code{a=0} then large sibships are harder to move than small ones, + +If \code{a = 0} then large sibships are harder to move than small ones, with the default value \code{a = 1.5} they are slightly easier to move than small ones. The rationale for the default is as long as the parents are somewhere between the first and last siblings the result looks fairly -good, so we are more flexible with the spacing of a large family. By -tethering all the sibs to a single spot they are kept close to each other. +good, so we are more flexible with the spacing of a large family. +By tethering all the sibs to a single spot they are kept close to each other. The alignment penalty for spouses is \eqn{b(x_1 - x_2)^2}{b *(x1-x2)^2}, -which tends to keep them together. The size of \code{b} controls the relative +which tends to keep them together. The size of \code{b} controls the relative importance of sib-parent and spouse-spouse closeness. } \section{Side Effects}{ @@ -152,3 +162,4 @@ pedAll <- Pedigree(sampleped) \seealso{ \code{\link[=Pedigree]{Pedigree()}} } +\keyword{Pedigree-plot} diff --git a/man/plot_fromdf.Rd b/man/plot_fromdf.Rd index 31cca332..e576cce5 100644 --- a/man/plot_fromdf.Rd +++ b/man/plot_fromdf.Rd @@ -44,11 +44,11 @@ Only used for \code{segments} and \code{arc}. \item{title}{The title of the plot.} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{boxw}{Width of the legend boxes} +\item{boxw}{Width of the polygons elements} -\item{boxh}{Height of the legend boxes} +\item{boxh}{Height of the polygons elements} \item{add_to_existing}{If \code{TRUE}, the plot will be added to the current plot.} @@ -57,13 +57,19 @@ plot.} an invisible ggplot object and a plot on the current plotting device } \description{ -This function is used to create a plot from a data.frame. If ggplot_gen is -set to TRUE, the plot will be generated with ggplot2 and will be returned -invisibly. +This function is used to create a plot from a data.frame. + +If \code{ggplot_gen = TRUE}, the plot will be generated with ggplot2 and +will be returned invisibly. } \examples{ data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == 1,]) +ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) lst <- ped_to_plotdf(ped1) -#plot_fromdf(lst$df, lst$usr) +#plot_fromdf(lst$df, lst$par_usr$usr, +# boxw = lst$par_usr$boxw, boxh = lst$par_usr$boxh +#) + } +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/polyfun.Rd b/man/polyfun.Rd index a7b086da..51b3b169 100644 --- a/man/polyfun.Rd +++ b/man/polyfun.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/plot_fct.R \name{polyfun} \alias{polyfun} -\title{Generate a polygonal element} +\title{Polygonal element} \usage{ polyfun(nslice, coor) } \arguments{ -\item{nslice}{number of slices in the polygon} +\item{nslice}{Number of slices in the polygon} \item{coor}{Element form which to generate the polygon containing x and y coordinates and theta} @@ -16,6 +16,16 @@ containing x and y coordinates and theta} a list of x and y coordinates } \description{ -Generate a polygonal element +Create a list of x and y coordinates for a polygon +with a given number of slices and a list of coordinates +for the polygon. } -\keyword{internal} +\examples{ +polyfun(2, list( + x = c(-0.5, -0.5, 0.5, 0.5), + y = c(-0.5, 0.5, 0.5, -0.5), + theta = -c(3, 5, 7, 9) * pi / 4 +)) +} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/polygons.Rd b/man/polygons.Rd index 7a4ed743..b27daa7e 100644 --- a/man/polygons.Rd +++ b/man/polygons.Rd @@ -2,18 +2,26 @@ % Please edit documentation in R/plot_fct.R \name{polygons} \alias{polygons} -\title{Create a list of the different polygonal elements} +\title{List of polygonal elements} \usage{ polygons(nslice = 1) } \arguments{ -\item{nslice}{number of slices in each element} +\item{nslice}{Number of slices in each element +If nslice > 1, the elements are created with \code{\link[=polyfun]{polyfun()}}.} } \value{ a list of polygonal elements with x, y coordinates -and theta +and theta by slice. } \description{ -Create a list of the different polygonal elements +Create a list of polygonal elements with x, y coordinates +and theta for the square, circle, diamond and triangle. +The number of slices in each element can be specified. } -\keyword{internal} +\examples{ +polygons() +polygons(4) +} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/prefix_famid.Rd b/man/prefix_famid.Rd deleted file mode 100644 index 48de66f8..00000000 --- a/man/prefix_famid.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/norm_data.R -\name{prefix_famid} -\alias{prefix_famid} -\title{Compute id with family id} -\usage{ -prefix_famid(family_id, ind_id, missid = "0") -} -\arguments{ -\item{family_id}{The family id} - -\item{ind_id}{The individual id} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} -} -\value{ -The id with the family id merged -} -\description{ -Compute id with family id if the family id available -} -\keyword{internal} diff --git a/man/rel_code_to_factor.Rd b/man/rel_code_to_factor.Rd index 3278a274..968bec1b 100644 --- a/man/rel_code_to_factor.Rd +++ b/man/rel_code_to_factor.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/utils.R \name{rel_code_to_factor} \alias{rel_code_to_factor} -\title{Transform a relationship code variable to an ordered factor} +\title{Relationship code variable to ordered factor} \usage{ -rel_code_to_factor(rel_code) +rel_code_to_factor(code) } \arguments{ -\item{rel_code}{A character, factor or numeric vector corresponding to +\item{code}{A character, factor or numeric vector corresponding to the relation code of the individuals: \itemize{ \item MZ twin = Monozygotic twin @@ -25,7 +25,7 @@ an ordered factor vector containing the transformed variable "MZ twin" < "DZ twin" < "UZ twin" < "Spouse" } \description{ -Transform a relationship code variable to an ordered factor +Relationship code variable to ordered factor } \examples{ rel_code_to_factor(c(1, 2, 3, 4, "MZ twin", "DZ twin", "UZ twin", "Spouse")) diff --git a/man/sampleped.Rd b/man/sampleped.Rd index 365963fb..0f32a96d 100644 --- a/man/sampleped.Rd +++ b/man/sampleped.Rd @@ -3,27 +3,27 @@ \docType{data} \name{sampleped} \alias{sampleped} -\title{samplepedigree data} +\title{Sampleped data} \format{ A data frame with 55 observations, one line per subject, on the following 7 variables. \itemize{ -\item \code{family} family identifier -\item \code{id} subject identifier -\item \code{dadid} identifier of the father, if the father is part of the +\item \code{famid} : Family identifier +\item \code{id} : Subject identifier +\item \code{dadid} : Identifier of the father, if the father is part of the data set; zero otherwise -\item \code{momid} identifier of the mother, if the mother is part of the +\item \code{momid} : Identifier of the mother, if the mother is part of the data set; zero otherwise -\item \code{sex} 1 for male or 2 for F -\item \code{affected} 1 or 0 -\item \code{available} 1 or 0 +\item \code{sex} : \code{1} for male or \code{2} for female +\item \code{affected} : \code{1} or \code{0} +\item \code{avail} : \code{1} or \code{0} } } \usage{ data("sampleped") } \description{ -Small sample pedigree data set. +Small sample pedigree data set for testing purposes. } \details{ This is a small fictive pedigree data set, with 55 diff --git a/man/set_plot_area.Rd b/man/set_plot_area.Rd index 2b74565f..7cfc1cd7 100644 --- a/man/set_plot_area.Rd +++ b/man/set_plot_area.Rd @@ -7,23 +7,24 @@ set_plot_area(cex, id, maxlev, xrange, symbolsize, ...) } \arguments{ -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} -\item{id}{A vector of each subjects identifiers} +\item{id}{A character vector with the identifiers of each individuals} -\item{maxlev}{maximum level} +\item{maxlev}{Maximum level} -\item{xrange}{range of x values} +\item{xrange}{Range of x values} -\item{symbolsize}{size of the symbols} +\item{symbolsize}{Size of the symbols} -\item{...}{other arguments passed to \code{\link[=par]{par()}}} +\item{...}{Other arguments passed to \code{\link[=par]{par()}}} } \value{ -a list of user coordinates, old par, box width, box height, +List of user coordinates, old par, box width, box height, label height and leg height } \description{ Set plotting area } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/sex_to_factor.Rd b/man/sex_to_factor.Rd index ba4f11bb..91db0cfc 100644 --- a/man/sex_to_factor.Rd +++ b/man/sex_to_factor.Rd @@ -2,13 +2,15 @@ % Please edit documentation in R/utils.R \name{sex_to_factor} \alias{sex_to_factor} -\title{Transform a gender variable to an ordered factor} +\title{Gender variable to ordered factor} \usage{ sex_to_factor(sex) } \arguments{ \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" @@ -20,7 +22,7 @@ an ordered factor vector containing the transformed variable "male" < "female" < "unknown" < "terminated" } \description{ -Transform a gender variable to an ordered factor +Gender variable to ordered factor } \examples{ sex_to_factor(c(1, 2, 3, 4, "f", "m", "man", "female")) diff --git a/man/shift.Rd b/man/shift.Rd index 31cf6396..11eba380 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/auto_hint.R \name{shift} \alias{shift} -\title{Routine to shift set of siblings to the left or right} +\title{Shift set of siblings to the left or right} \usage{ shift(id, sibs, goleft, hint, twinrel, twinset) } @@ -23,7 +23,7 @@ shift(id, sibs, goleft, hint, twinrel, twinset) The updated hint vector } \description{ -Routine to shift set of siblings to the left or right +Shift set of siblings to the left or right } \details{ This routine is used by \code{auto_hint()}. @@ -37,4 +37,5 @@ together within the set of twins. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/shrink.Rd b/man/shrink.Rd index d540ea7d..001652e9 100644 --- a/man/shrink.Rd +++ b/man/shrink.Rd @@ -2,33 +2,24 @@ % Please edit documentation in R/shrink.R \name{shrink} \alias{shrink} +\alias{shrink,Pedigree-method} +\alias{shrink,Ped-method} \title{Shrink Pedigree object} \usage{ -shrink( - ped, - avail = ped(ped)$avail, - affected = ped(ped)$affected, - max_bits = 16 -) +\S4method{shrink}{Pedigree}(obj, avail = NULL, affected = NULL, max_bits = 16) + +\S4method{shrink}{Ped}(obj, avail = NULL, affected = NULL, max_bits = 16) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree or Ped object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{max_bits}{Optional, the bit size for which to shrink the Pedigree} } @@ -50,6 +41,9 @@ Shrink Pedigree object to specified bit size with priority placed on trimming uninformative subjects. The algorithm is useful for getting a Pedigree condensed to a minimally informative size for algorithms or testing that are limited by size of the Pedigree. + +If \strong{avail} or \strong{affected} are \code{NULL}, they are extracted with their +corresponding accessors from the Ped object. } \details{ Iteratively remove subjects from the Pedigree. The random removal of members @@ -67,7 +61,7 @@ individuals (chosen at random if there are multiple of the same status): } \examples{ data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == '1',]) +ped1 <- Pedigree(sampleped[sampleped$famid == '1',]) shrink(ped1, max_bits = 12) } @@ -75,5 +69,7 @@ shrink(ped1, max_bits = 12) \code{\link[=Pedigree]{Pedigree()}}, \code{\link[=bit_size]{bit_size()}} } \author{ -Original by Dan Schaid, updated by Jason Sinnwell +Original by Dan Schaid, +updated by Jason Sinnwell and Louis Le Nézet } +\keyword{shrink} diff --git a/man/subregion.Rd b/man/subregion.Rd index 552e5cb4..184a2fd4 100644 --- a/man/subregion.Rd +++ b/man/subregion.Rd @@ -2,37 +2,24 @@ % Please edit documentation in R/plot_fct.R \name{subregion} \alias{subregion} -\title{Routine to subset a Pedigree} +\title{Subset a region of a Pedigree} \usage{ subregion(plist, subreg) } \arguments{ \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} -\item{subreg}{4-element vector for (min x, max x, min depth, max depth), +\item{subreg}{A 4-element vector for (min x, max x, min depth, max depth), used to edit away portions of the plot coordinates returned by \code{\link[=align]{align()}}. This is useful for zooming in on a particular region of the Pedigree.} } \value{ -a Pedigree +A Pedigree structure with the specified region } \description{ -Routine to subset a Pedigree +Subset a region of a Pedigree } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/trim.Rd b/man/trim.Rd deleted file mode 100644 index 876b92c6..00000000 --- a/man/trim.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/trim.R -\name{trim} -\alias{trim} -\title{Trim a Pedigree} -\usage{ -trim(ped, id_rm, missid = "0") -} -\arguments{ -\item{ped}{A Pedigree object} - -\item{id_rm}{Vector of ids to remove} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} -} -\value{ -A Pedigree object with the subjects removed -} -\description{ -Carries out the removal of the subjects identified from a Pedigree object. -} -\examples{ -data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -trim(ped1, "1_101") -} diff --git a/man/unrelated.Rd b/man/unrelated.Rd index f3e0a39a..f9173521 100644 --- a/man/unrelated.Rd +++ b/man/unrelated.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/unrelated.R \name{unrelated} \alias{unrelated} -\title{Get unrelated subjects} +\alias{unrelated,Ped-method} +\alias{unrelated,Pedigree-method} +\title{Find Unrelated subjects} \usage{ -unrelated(ped, avail = ped(ped)$avail) +\S4method{unrelated}{Ped}(obj, avail = NULL) + +\S4method{unrelated}{Pedigree}(obj, avail = NULL) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree or Ped object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} } \value{ A vector of the ids of subjects that are unrelated. @@ -27,7 +27,8 @@ Pedigree. \details{ Determine set of maximum number of unrelated available subjects from a Pedigree, given vectors id, father, and mother for a Pedigree structure, and -status vector of T/F for whether each subject is available (e.g. has DNA) +status vector of \code{TRUE} / \code{FALSE} for whether each subject is +available (e.g. has DNA). This is a greedy algorithm that uses the kinship matrix, sequentially removing rows/cols that are non-zero for subjects that have the most number @@ -36,22 +37,24 @@ that has the most number of zeros, and then remove any cols and their corresponding rows that are non-zero. To account for ties of the count of zeros for rows, a random choice is made. Hence, running this function multiple times can return different sets of unrelated subjects. + +If \strong{avail} is \code{NULL}, it is extracted with its +corresponding accessor from the Ped object. } \examples{ -data(sampleped) -fam1 <- sampleped[sampleped$family == 1, ] - +data(sampleped) +fam1 <- sampleped[sampleped$famid == 1, ] ped1 <- Pedigree(fam1) - -## to see plot: -id1 <- unrelated(ped1) - -id1 +unrelated(ped1) ## some possible vectors ## [1] '110' '113' '133' '109' ## [1] '113' '118' '141' '109' ## [1] '113' '118' '140' '109' ## [1] '110' '113' '116' '109' ## [1] '113' '133' '141' '109' + +} +\author{ +Dan Schaid and Shannon McDonnell updated by Jason Sinnwell } diff --git a/man/upd_famid_id.Rd b/man/upd_famid_id.Rd new file mode 100644 index 00000000..9d05c33b --- /dev/null +++ b/man/upd_famid_id.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_famid.R +\name{upd_famid_id} +\alias{upd_famid_id} +\alias{upd_famid_id,character,ANY-method} +\alias{upd_famid_id,Ped,character_OR_integer-method} +\alias{upd_famid_id,Ped,missing-method} +\alias{upd_famid_id,Rel,character_OR_integer-method} +\alias{upd_famid_id,Rel,missing-method} +\alias{upd_famid_id,Pedigree,character_OR_integer-method} +\alias{upd_famid_id,Pedigree,missing-method} +\title{Update family prefix in individuals id} +\usage{ +\S4method{upd_famid_id}{character,ANY}(obj, famid, missid = NA_character_) + +\S4method{upd_famid_id}{Ped,character_OR_integer}(obj, famid) + +\S4method{upd_famid_id}{Ped,missing}(obj) + +\S4method{upd_famid_id}{Rel,character_OR_integer}(obj, famid) + +\S4method{upd_famid_id}{Rel,missing}(obj) + +\S4method{upd_famid_id}{Pedigree,character_OR_integer}(obj, famid) + +\S4method{upd_famid_id}{Pedigree,missing}(obj) +} +\arguments{ +\item{obj}{Ped or Pedigree object or a character vector of individual ids} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} +} +\value{ +A character vector of individual ids with family prefix +updated +} +\description{ +Update the family prefix in the individuals identifiers. +Individuals identifiers are constructed as follow \strong{famid}_\strong{id}. +Therefore to update their family prefix the ids are split by the +first underscore and the first part is overwritten by \strong{famid}. +} +\details{ +If famid is \emph{missing}, then the \code{famid()} function will be called on the +object. +} +\examples{ + +upd_famid_id(c("1", "2", "B_3"), c("A", "B", "A")) +upd_famid_id(c("1", "B_2", "C_3", "4"), c("A", NA, "A", NA)) + +data(sampleped) +ped1 <- Pedigree(sampleped[,-1]) +id(ped(ped1)) +new_fam <- make_famid(id(ped(ped1)), dadid(ped(ped1)), momid(ped(ped1))) +id(ped(upd_famid_id(ped1, new_fam))) + +data(sampleped) +ped1 <- Pedigree(sampleped[,-1]) +make_famid(ped1) +} diff --git a/man/useful_inds.Rd b/man/useful_inds.Rd index c01bb27b..0e6a04b9 100644 --- a/man/useful_inds.Rd +++ b/man/useful_inds.Rd @@ -1,16 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/useful_inds.R -\docType{methods} \name{useful_inds} \alias{useful_inds} \alias{useful_inds,character-method} -\alias{useful_inds,character} \alias{useful_inds,Pedigree-method} -\alias{useful_inds,Pedigree} -\title{Compute the usefulness of individuals} +\alias{useful_inds,Ped-method} +\title{Usefulness of individuals} \usage{ -useful_inds(obj, ...) - \S4method{useful_inds}{character}( obj, dadid, @@ -19,44 +15,30 @@ useful_inds(obj, ...) affected, num_child_tot, informative = "AvAf", - keep_infos = FALSE, - missid = "0" + keep_infos = FALSE ) -\S4method{useful_inds}{Pedigree}( - obj, - informative = "AvAf", - keep_infos = FALSE, - missid = "0", - reset = FALSE -) +\S4method{useful_inds}{Pedigree}(obj, informative = "AvAf", keep_infos = FALSE, reset = FALSE) + +\S4method{useful_inds}{Ped}(obj, informative = "AvAf", keep_infos = FALSE, reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{num_child_tot}{A numeric vector of the number of children of each individuals} @@ -75,11 +57,6 @@ individuals} \item{keep_infos}{Boolean to indicate if individuals with unknown status but available or reverse should be kept} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - \item{reset}{Boolean to indicate if the \code{useful} column should be reset} } \value{ @@ -88,24 +65,25 @@ The default for \code{missid} is \code{"0"}.} A vector of useful individuals identifiers } -\subsection{When obj is a Pedigree}{ +\subsection{When obj is a Pedigree or Ped object}{ -The Pedigree object with a new column named 'useful' containing 1 for -useful individuals and 0 otherwise. +The Pedigree or Ped object with the slot 'useful' containing \code{TRUE} for +useful individuals and \code{FALSE} otherwise. } } \description{ -Check for usefulness of individuals +Compute the usefulness of individuals } \details{ Check for the informativeness of the individuals based on the informative parameter given, the number of children and the usefulness -of their parents. A \code{useful} column is added to the dataframe with the +of their parents. A \code{useful} slot is added to the Ped object with the usefulness of the individual. This boolean is hereditary. } \examples{ + data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -ped1 <- num_child(ped1) -useful_inds(ped1, informative = "AvAf")$ped +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +ped(useful_inds(ped1, informative = "AvAf")) } +\keyword{shrink} diff --git a/man/vect_to_binary.Rd b/man/vect_to_binary.Rd index 3e8fa830..16aaf7f0 100644 --- a/man/vect_to_binary.Rd +++ b/man/vect_to_binary.Rd @@ -2,28 +2,34 @@ % Please edit documentation in R/utils.R \name{vect_to_binary} \alias{vect_to_binary} -\title{Transform a vector variable to binary vector} +\title{Vector variable to binary vector} \usage{ -vect_to_binary(vect) +vect_to_binary(vect, logical = FALSE) } \arguments{ \item{vect}{A character, factor, logical or numeric vector corresponding to -a binary variable (i.e. 0 or 1). +a binary variable (i.e. \code{0} or \code{1}). The following values are recognized: \itemize{ \item character() or factor() : "TRUE", "FALSE", "0", "1", "NA" will be -respectively transformed to 1, 0, 0, 1, NA. +respectively transformed to \code{1}, \code{0}, \code{0}, \code{1}, \code{NA}. Spaces and case are ignored. All other values will be transformed to NA. -\item numeric() : 0 and 1 are kept, all other values are transformed to NA. -\item logical() : TRUE and FALSE are tansformed to 1 and 0. +\item numeric() : \code{0} and \code{1} are kept, all other values are transformed to NA. +\item logical() : \code{TRUE} and \code{FALSE} are tansformed to \code{1} and \code{0}. }} + +\item{logical}{Boolean defining if the output should be a logical vector +instead of a numeric vector (i.e. \code{0} and \code{1} becomes \code{FALSE} and `TRUE).} } \value{ -numeric binary vector of the same size as \code{vect} with 0 and 1 +numeric binary vector of the same size as \strong{vect} +with \code{0} and \code{1} } \description{ -Transform a vector variable to binary vector +Transform a vector to a binary vector. +All values that are not \code{0}, \code{1}, \code{TRUE}, \code{FALSE}, or \code{NA} +are transformed to \code{NA}. } \examples{ vect_to_binary( diff --git a/tests/testthat.R b/tests/testthat.R index 6d001372..f2100946 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -11,5 +11,6 @@ library(testthat) library(Pedixplorer) library(vdiffr) +withr::local_options(width = 150) test_check("Pedixplorer") TRUE diff --git a/tests/testthat/_snaps/align.md b/tests/testthat/_snaps/align.md index 2bf3b1ff..00527de5 100644 --- a/tests/testthat/_snaps/align.md +++ b/tests/testthat/_snaps/align.md @@ -4,60 +4,60 @@ plist Output $n - [1] 8 19 22 8 + [1] 4 17 21 14 $nid [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] - [1,] 5 6 7 8 35 36 42 43 0 0 0 0 0 0 - [2,] 1 2 15 17 16 18 19 20 3 4 37 38 44 45 - [3,] 9 29 30 31 32 33 34 9 10 11 12 18 14 15 - [4,] 21 22 23 24 25 26 27 28 0 0 0 0 0 0 - [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] - [1,] 0 0 0 0 0 0 0 0 - [2,] 46 47 48 50 49 0 0 0 - [3,] 39 40 41 51 52 53 54 55 - [4,] 0 0 0 0 0 0 0 0 + [1,] 35 36 42 43 0 0 0 0 0 0 0 0 0 0 + [2,] 1 2 3 4 37 38 44 45 46 47 48 50 49 5 + [3,] 9 10 11 12 14 39 40 41 51 52 53 54 55 14 + [4,] 21 22 23 24 27 28 25 26 29 30 31 32 33 34 + [,15] [,16] [,17] [,18] [,19] [,20] [,21] + [1,] 0 0 0 0 0 0 0 + [2,] 6 7 8 0 0 0 0 + [3,] 15 12 18 17 16 19 20 + [4,] 0 0 0 0 0 0 0 $pos - [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] - [1,] 3.750000e+00 4.750000 6.500000 7.500000 11.75536 12.75536 17.71 18.71 - [2,] 9.470274e-16 1.000000 2.000000 3.000000 4.00000 5.00000 6.00 7.00 - [3,] 1.000000e-02 1.010000 2.010000 3.010000 4.01000 5.01000 6.01 7.01 - [4,] 6.009999e+00 7.009999 8.009999 9.009999 10.01000 11.01000 12.01 13.01 - [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] - [1,] 0.00000 0.00000 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - [2,] 10.50072 11.50072 13.01 14.01 15.01 16.01 17.01 18.01 19.01 20.01 21.01 - [3,] 8.01000 9.01000 10.01 11.01 12.01 13.01 14.01 15.01 16.01 17.01 18.01 - [4,] 0.00000 0.00000 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - [,20] [,21] [,22] - [1,] 0.00 0.00 0.00 - [2,] 0.00 0.00 0.00 - [3,] 19.01 20.01 21.01 - [4,] 0.00 0.00 0.00 + [,1] [,2] [,3] [,4] [,5] [,6] [,7] + [1,] 3.803847e+00 4.803847 9.576274 10.576274 0.000000 0.000000 0.000000 + [2,] -6.622646e-13 1.000000 2.803847 3.803847 4.803847 5.803847 6.876275 + [3,] 8.609109e-17 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 + [4,] -1.645370e-15 1.000000 2.000000 3.000000 11.010000 12.010000 13.010000 + [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] + [1,] 0.000000 0.000000 0.000000 0.00000 0.00000 0.00000 0.00 0.00 0.00 + [2,] 7.876275 8.876275 9.876275 10.87627 11.87627 12.87627 16.25 17.25 19.01 + [3,] 7.000000 8.000000 9.000000 10.00000 11.00000 12.00000 13.00 14.00 15.00 + [4,] 14.010000 15.010000 16.010000 17.01000 18.01000 19.01000 20.01 0.00 0.00 + [,17] [,18] [,19] [,20] [,21] + [1,] 0.00 0 0 0 0 + [2,] 20.01 0 0 0 0 + [3,] 16.00 17 18 19 20 + [4,] 0.00 0 0 0 0 $fam [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - [2,] 0 0 1 0 1 1 1 3 5 0 0 5 0 7 - [3,] 1 4 7 7 7 7 7 0 9 9 9 0 9 0 - [4,] 8 8 8 8 11 11 13 13 0 0 0 0 0 0 - [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] - [1,] 0 0 0 0 0 0 0 0 - [2,] 7 7 7 0 7 0 0 0 - [3,] 11 11 11 13 13 18 18 18 - [4,] 0 0 0 0 0 0 0 0 + [2,] 0 0 1 0 0 1 0 3 3 3 3 0 3 0 + [3,] 1 3 3 3 3 5 5 5 7 7 12 12 12 0 + [4,] 1 1 1 1 14 14 16 16 18 20 20 20 20 20 + [,15] [,16] [,17] [,18] [,19] [,20] [,21] + [1,] 0 0 0 0 0 0 0 + [2,] 0 0 0 0 0 0 0 + [3,] 14 0 14 0 14 14 16 + [4,] 0 0 0 0 0 0 0 $spouse [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] - [1,] 1 0 1 0 1 0 1 0 0 0 0 0 0 0 - [2,] 1 0 0 1 0 0 1 0 1 0 1 0 1 0 - [3,] 0 0 0 0 0 0 0 1 0 0 1 0 1 0 + [1,] 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + [2,] 1 0 1 0 1 0 1 0 0 0 0 1 0 1 + [3,] 1 0 0 0 0 0 0 0 0 0 0 0 0 1 [4,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] - [1,] 0 0 0 0 0 0 0 0 - [2,] 0 0 0 1 0 0 0 0 - [3,] 0 0 0 0 0 0 0 0 - [4,] 0 0 0 0 0 0 0 0 + [,15] [,16] [,17] [,18] [,19] [,20] [,21] + [1,] 0 0 0 0 0 0 0 + [2,] 0 1 0 0 0 0 0 + [3,] 0 1 0 1 0 1 0 + [4,] 0 0 0 0 0 0 0 # besthint works @@ -66,59 +66,54 @@ plist Output $n - [1] 8 19 21 8 + [1] 4 17 21 14 $nid [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] - [1,] 7 8 5 6 35 36 42 43 0 0 0 0 0 0 - [2,] 20 19 15 17 16 18 1 2 3 4 37 38 44 45 - [3,] 30 31 32 33 34 29 9 10 11 12 18 14 15 39 - [4,] 21 22 23 24 25 26 27 28 0 0 0 0 0 0 + [1,] 35 36 42 43 0 0 0 0 0 0 0 0 0 0 + [2,] 7 8 5 6 3 4 37 38 1 2 44 45 46 47 + [3,] 20 19 17 16 15 18 12 15 14 11 10 39 40 41 + [4,] 30 31 32 33 34 29 25 26 27 28 21 22 23 24 [,15] [,16] [,17] [,18] [,19] [,20] [,21] [1,] 0 0 0 0 0 0 0 - [2,] 46 47 48 50 49 0 0 - [3,] 40 41 51 52 53 54 55 + [2,] 48 50 49 0 0 0 0 + [3,] 10 9 51 52 53 54 55 [4,] 0 0 0 0 0 0 0 $pos - [,1] [,2] [,3] [,4] [,5] [,6] [,7] - [1,] 1.827438e-01 1.182744 3.182744 4.182744 10.751665 11.751665 16.709999 - [2,] 6.827438e-01 1.682744 2.682744 3.682744 4.682744 5.682744 6.682744 - [3,] 7.566676e-14 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 - [4,] 4.999999e+00 5.999999 6.999999 7.999999 9.000000 10.000000 11.000000 - [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] - [1,] 17.709999 0.00000 0.00000 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - [2,] 7.682744 9.49333 10.49333 12.01 13.01 14.01 15.01 16.01 17.01 18.01 19.01 - [3,] 7.000000 8.00000 9.00000 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 - [4,] 12.000000 0.00000 0.00000 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 - [,19] [,20] [,21] - [1,] 0.00 0.00 0.00 - [2,] 20.01 0.00 0.00 - [3,] 18.01 19.01 20.01 - [4,] 0.00 0.00 0.00 + [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] + [1,] 9.010000e+00 10.01 16.71 17.71 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + [2,] 1.855743e-15 1.00 2.76 3.76 8.01 9.01 10.01 11.01 12.01 13.01 14.01 + [3,] 1.000000e-02 1.01 2.01 3.01 4.01 5.01 6.01 7.01 8.01 9.01 10.01 + [4,] 2.880160e-14 1.00 2.00 3.00 4.00 5.00 6.00 7.00 8.00 9.00 13.01 + [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] + [1,] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + [2,] 15.01 16.01 17.01 18.01 19.01 20.01 0.00 0.00 0.00 0.00 + [3,] 11.01 12.01 13.01 14.01 15.01 16.01 17.01 18.01 19.01 20.01 + [4,] 14.01 15.01 16.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 $fam [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - [2,] 1 3 3 0 3 3 0 0 5 0 0 5 0 7 - [3,] 1 1 1 1 1 4 7 9 9 9 0 9 0 11 - [4,] 7 7 7 7 10 10 12 12 0 0 0 0 0 0 + [2,] 0 0 0 0 1 0 0 1 0 0 0 3 3 3 + [3,] 1 3 0 3 3 3 5 0 5 5 5 7 7 7 + [4,] 1 1 1 1 1 3 6 6 8 8 15 15 15 15 [,15] [,16] [,17] [,18] [,19] [,20] [,21] [1,] 0 0 0 0 0 0 0 - [2,] 7 7 7 0 7 0 0 - [3,] 11 11 13 13 18 18 18 + [2,] 3 0 3 0 0 0 0 + [3,] 0 9 11 11 16 16 16 [4,] 0 0 0 0 0 0 0 $spouse [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] - [1,] 1 0 1 0 1 0 1 0 0 0 0 0 0 0 - [2,] 1 0 0 1 0 0 1 0 1 0 1 0 1 0 - [3,] 0 0 0 0 0 0 1 0 0 1 0 1 0 0 + [1,] 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + [2,] 1 0 1 0 1 0 1 0 1 0 1 0 0 0 + [3,] 1 0 1 0 0 1 0 1 0 0 0 0 0 0 [4,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [,15] [,16] [,17] [,18] [,19] [,20] [,21] [1,] 0 0 0 0 0 0 0 - [2,] 0 0 0 1 0 0 0 - [3,] 0 0 0 0 0 0 0 + [2,] 0 1 0 0 0 0 0 + [3,] 1 0 0 0 0 0 0 [4,] 0 0 0 0 0 0 0 diff --git a/tests/testthat/_snaps/align/best-hint.svg b/tests/testthat/_snaps/align/best-hint.svg new file mode 100644 index 00000000..92cca9a1 --- /dev/null +++ b/tests/testthat/_snaps/align/best-hint.svg @@ -0,0 +1,293 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 +0 +0 +0 +1 +1 +1 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +0 +1 +0 +0 +0 +1 +0 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +1 +135 +107 +120 +130 +136 +108 +119 +131 +201 +105 +117 +132 +202 +106 +116 +133 +103 +115 +134 +104 +118 +129 +137 +112 +125 +138 +115 +126 +101 +114 +127 +102 +111 +128 +203 +110 +121 +204 +139 +122 +205 +140 +123 +206 +141 +124 +207 +110 +209 +109 +208 +210 +211 +212 +213 +214 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/align/double-wife.svg b/tests/testthat/_snaps/align/double-wife.svg deleted file mode 100644 index 96a6b28c..00000000 --- a/tests/testthat/_snaps/align/double-wife.svg +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - -1 -4 -5 -2 -3 -7 -6 - - - - - - - - - - - - - - - - - - - - diff --git a/tests/testthat/_snaps/align/sampleped-norel.svg b/tests/testthat/_snaps/align/sampleped-norel.svg new file mode 100644 index 00000000..721fc7c3 --- /dev/null +++ b/tests/testthat/_snaps/align/sampleped-norel.svg @@ -0,0 +1,293 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +0 +1 +1 +1 +0 +1 +1 +1 +0 +0 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +1 +0 +1 +0 +0 +0 +0 +1 +0 +1 +1 +0 +1 +1 +0 +1 +1 +1 +1 +0 +135 +101 +109 +121 +136 +102 +110 +122 +201 +103 +111 +123 +202 +104 +112 +124 +137 +114 +127 +138 +139 +128 +203 +140 +125 +204 +141 +126 +205 +210 +129 +206 +211 +130 +207 +212 +131 +209 +213 +132 +208 +214 +133 +105 +114 +134 +106 +115 +107 +112 +108 +118 +117 +116 +119 +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/align/sampleped-withrel.svg b/tests/testthat/_snaps/align/sampleped-withrel.svg new file mode 100644 index 00000000..3e650bfe --- /dev/null +++ b/tests/testthat/_snaps/align/sampleped-withrel.svg @@ -0,0 +1,305 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +0 +1 +1 +0 +0 +1 +1 +1 +0 +0 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +1 +0 +0 +0 +1 +0 +0 +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +0 +0 +1 +1 +1 +1 +1 +0 +135 +101 +209 +121 +136 +102 +109 +122 +201 +103 +110 +123 +202 +104 +112 +124 +137 +111 +127 +138 +114 +128 +203 +113 +125 +204 +139 +126 +205 +140 +129 +206 +141 +130 +207 +210 +131 +209 +211 +132 +208 +212 +133 +105 +213 +134 +106 +214 +107 +114 +108 +115 +112 +118 +117 +116 +119 +120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/class.md b/tests/testthat/_snaps/class.md new file mode 100644 index 00000000..1e92f50d --- /dev/null +++ b/tests/testthat/_snaps/class.md @@ -0,0 +1,74 @@ +# Class ped work + + Code + ped2 + Output + Ped object with 2 individuals and 0 metadata columns: + id dadid momid sex famid + + ID5 ID5 female + ID4 ID4 male + steril status avail affected useful kin isinf + + ID5 + ID4 + num_child_tot num_child_dir num_child_ind + + ID5 0 0 0 + ID4 0 0 0 + +# Rel class works + + Code + rel2 + Output + Rel object with 2 relationshipswith 1 MZ twin, 0 DZ twin, 0 UZ twin, 1 Spouse: + id1 id2 code famid | A + | + 1 ID3 ID5 MZ twin | 1 + 2 ID2 ID4 Spouse | 2 + +# Hints class works + + Code + hts0 + Output + An object of class "Hints" + Slot "horder": + ID1 ID2 ID3 ID4 + 1 2 3 4 + + Slot "spouse": + idl idr anchor + 1 ID1 ID3 left + 2 ID2 ID4 right + + +# Scales class works + + invalid class "Scales" object: 1: Fill slot affected column(s) must be logical + invalid class "Scales" object: 2: Fill slot angle, order, mods column(s) must be numeric + invalid class "Scales" object: 3: Fill slot column_mods column(s) must be character + +--- + + invalid class "Scales" object: 1: Border slot labels column(s) must be character + invalid class "Scales" object: 2: Border slot mods column(s) must be numeric + +--- + + Code + scl0 + Output + An object of class "Scales" + Slot "fill": + order column_values column_mods mods labels affected fill density angle + 1 2 ID1 ID1 1 ID1 TRUE ID3 1 90 + 2 3 ID2 ID2 2 ID2 FALSE ID2 2 60 + + Slot "border": + column_values column_mods mods labels border + 1 ID1 ID1 1 Lab1 ID1 + 2 ID2 ID2 2 Lab2 ID2 + + diff --git a/tests/testthat/_snaps/fix_parents.md b/tests/testthat/_snaps/fix_parents.md new file mode 100644 index 00000000..1f426749 --- /dev/null +++ b/tests/testthat/_snaps/fix_parents.md @@ -0,0 +1,16 @@ +# fix_parents works with number + + invalid class "Ped" object: dadid values '100', '200' should be in '1', '2', '3', '4', '5'... + +# fix_parents works with character + + invalid class "Ped" object: momid values 'fam112' should be in 'fam101', 'fam102', 'fam103', 'fam104', 'fam105'... + +# fix_parents works with sex errors + + invalid class "Ped" object: dadid values '2_209' should be in '2_201', '2_202', '2_203', '2_204', '2_205'... + +# fix_parents works with famid + + invalid class "Ped" object: dadid values '2_209' should be in '1_101', '1_102', '1_103', '1_104', '1_105'... + diff --git a/tests/testthat/_snaps/is_informative.md b/tests/testthat/_snaps/is_informative.md new file mode 100644 index 00000000..e581e1eb --- /dev/null +++ b/tests/testthat/_snaps/is_informative.md @@ -0,0 +1,4 @@ +# is_informative works with Pedigree + + The column test is not in the scales fill + diff --git a/tests/testthat/_snaps/kindepth/double-marriage.svg b/tests/testthat/_snaps/kindepth/double-marriage.svg new file mode 100644 index 00000000..b065e98f --- /dev/null +++ b/tests/testthat/_snaps/kindepth/double-marriage.svg @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +1 +3 +7 +11 +2 +4 +9 +12 +5 +8 +6 +10 +9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/kindepth/niece-uncle-spouse.svg b/tests/testthat/_snaps/kindepth/niece-uncle-spouse.svg new file mode 100644 index 00000000..3241c57d --- /dev/null +++ b/tests/testthat/_snaps/kindepth/niece-uncle-spouse.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + +1 +3 +5 +7 +2 +4 +6 +5 + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/norm_data.md b/tests/testthat/_snaps/norm_data.md index 5c600add..b446de48 100644 --- a/tests/testthat/_snaps/norm_data.md +++ b/tests/testthat/_snaps/norm_data.md @@ -14,53 +14,66 @@ 8 8 0 0 1 FALSE 0 3 D 9 8 2 0 2 FALSE 3 A 10 9 9 8 3 FALSE Ab 5 B - sex steril avail id dadid momid - 1 terminated 1 NA 1 3 4 - 2 male 0 1 2 0 0 - 3 male 0 0 3 8 7 - 4 female 0 NA 4 6 5 - 5 female 0 NA 5 0 0 - 6 male 0 0 6 0 - 7 male 0 NA 7 0 0 - 8 male 0 0 8 0 0 - 9 female 0 NA 8 2 0 - 10 unknown 0 NA 9 9 8 - error - 1 - 2 isSterilButIsParent - 3 fatherIdDuplicated - 4 - 5 - 6 isSterilButIsParent - 7 isMotherButNotFemale - 8 selfIdDuplicated_isMotherAndFather_isMotherButNotFemale - 9 selfIdDuplicated_oneParentMissing_isMotherAndFather_isFatherButNotMale - 10 motherIdDuplicated_isItsOwnParent_isFatherButNotMale - affected status family vitalStatus affection - 1 NA NA NA NA - 2 NA NA NA NA - 3 NA NA NA NA - 4 NA NA NA NA - 5 NA NA NA NA - 6 NA NA NA NA - 7 NA NA NA NA - 8 NA NA NA NA - 9 NA NA NA NA - 10 NA NA NA NA + sex steril status avail id dadid momid famid + 1 terminated TRUE NA NA 1 3 4 + 2 male FALSE NA TRUE 2 0 0 + 3 male FALSE NA FALSE 3 8 7 + 4 female FALSE NA NA 4 6 5 + 5 female FALSE NA NA 5 0 0 + 6 male FALSE NA FALSE 6 0 + 7 female FALSE NA NA 7 0 0 + 8 female FALSE NA FALSE 8 0 0 + 9 female FALSE NA NA 8 2 0 + 10 male FALSE NA NA 9 9 8 + error affected family + 1 NA + 2 isSterilButIsParent NA + 3 fatherIdDuplicated NA + 4 NA + 5 NA + 6 oneParentMissing_isSterilButIsParent NA + 7 NA + 8 selfIdDuplicated_isMotherAndFather_isFatherButNotMale NA + 9 selfIdDuplicated_isMotherAndFather_isFatherButNotMale NA + 10 motherIdDuplicated_isItsOwnParent NA + vitalStatus affection + 1 NA NA + 2 NA NA + 3 NA NA + 4 NA NA + 5 NA NA + 6 NA NA + 7 NA NA + 8 NA NA + 9 NA NA + 10 NA NA # Norm rel Code rel_df Output - indId1 indId2 code family id1 id2 error - 1 1 2 MZ twin 1 1_1 1_2 - 2 1 3 DZ twin 1 1_1 1_3 - 3 2 3 UZ twin 1 1_2 1_3 - 4 1 2 Spouse 2 2_1 2_2 - 5 3 4 MZ twin 2 2_3 2_4 - 6 6 7 2 2_6 2_7 CodeNotRecognise - 7 8 8 Spouse 2 2_8 2_8 SameId - 8 9 0 Spouse 1 1_9 0 - 9 B 1 1_B indId1length0_CodeNotRecognise + id1 id2 code famid error + 1 1 2 MZ twin + 2 1 3 DZ twin + 3 2 3 UZ twin + 4 1 2 Spouse + 5 3 4 MZ twin + 6 6 7 CodeNotRecognise + 7 8 8 Spouse SameId + 8 9 0 Spouse + 9 B indId1length0_CodeNotRecognise + +--- + + Code + norm_rel(rel_df, missid = "0") + Output + id1 id2 code famid error + 1 1 2 MZ twin + 2 3 2 DZ twin + 3 3 1 DZ twin + 4 3 4 MZ twin + 5 7 Other CodeNotRecognise + 6 spo Use 9 CodeNotRecognise diff --git a/tests/testthat/_snaps/ped_to_legdf.md b/tests/testthat/_snaps/ped_to_legdf.md index e8b5d088..4dce50d6 100644 --- a/tests/testthat/_snaps/ped_to_legdf.md +++ b/tests/testthat/_snaps/ped_to_legdf.md @@ -3,13 +3,13 @@ Code lst Output - $leg_df + $df id x0 y0 x1 y1 type fill border angle 1 titles 0.20000 0.0 NA NA text black NA 2 titles 4.46750 0.0 NA NA text black NA 3 titles 9.20250 0.0 NA NA text black NA 4 titles 14.56333 0.0 NA NA text black NA - 5 titles 19.92417 0.0 NA NA text black NA + 5 titles 20.28375 0.0 NA NA text black NA 6 sex 0.00000 1.0 NA NA square_1_1 white black NA 7 sex 0.00000 3.0 NA NA circle_1_1 white black NA 8 sex_label 1.20000 1.5 NA NA text black NA @@ -33,32 +33,32 @@ 26 affected_2_1 14.36333 3.0 NA NA square_3_2 red black NA 27 affected_label_2_0 15.56333 1.5 NA NA text black NA 28 affected_label_2_1 15.56333 3.5 NA NA text black NA - 29 aff_bkg_3_1 19.72417 1.0 NA NA square_1_1 white black NA - 30 aff_bkg_3_2 19.72417 3.0 NA NA square_1_1 white black NA - 31 aff_bkg_3_3 19.72417 5.0 NA NA square_1_1 white black NA - 32 aff_bkg_3_4 19.72417 7.0 NA NA square_1_1 white black NA - 33 aff_bkg_3_5 19.72417 9.0 NA NA square_1_1 white black NA - 34 aff_bkg_3_6 19.72417 11.0 NA NA square_1_1 white black NA - 35 affected_3_1 19.72417 1.0 NA NA square_3_3 #FFFFFF black NA - 36 affected_3_2 19.72417 3.0 NA NA square_3_3 #9AB1C4 black NA - 37 affected_3_3 19.72417 5.0 NA NA square_3_3 #36648B black NA - 38 affected_3_4 19.72417 7.0 NA NA square_3_3 #FFC0CB black NA - 39 affected_3_5 19.72417 9.0 NA NA square_3_3 #CF70DD black NA - 40 affected_3_6 19.72417 11.0 NA NA square_3_3 #A020F0 black NA - 41 affected_label_3_1 20.92417 1.5 NA NA text black NA - 42 affected_label_3_2 20.92417 3.5 NA NA text black NA - 43 affected_label_3_3 20.92417 5.5 NA NA text black NA - 44 affected_label_3_4 20.92417 7.5 NA NA text black NA - 45 affected_label_3_5 20.92417 9.5 NA NA text black NA - 46 affected_label_3_6 20.92417 11.5 NA NA text black NA + 29 aff_bkg_3_1 20.08375 1.0 NA NA square_1_1 white black NA + 30 aff_bkg_3_2 20.08375 3.0 NA NA square_1_1 white black NA + 31 aff_bkg_3_3 20.08375 5.0 NA NA square_1_1 white black NA + 32 aff_bkg_3_4 20.08375 7.0 NA NA square_1_1 white black NA + 33 aff_bkg_3_5 20.08375 9.0 NA NA square_1_1 white black NA + 34 aff_bkg_3_6 20.08375 11.0 NA NA square_1_1 white black NA + 35 affected_3_1 20.08375 1.0 NA NA square_3_3 #FFFFFF black NA + 36 affected_3_2 20.08375 3.0 NA NA square_3_3 #9AB1C4 black NA + 37 affected_3_3 20.08375 5.0 NA NA square_3_3 #36648B black NA + 38 affected_3_4 20.08375 7.0 NA NA square_3_3 #FFC0CB black NA + 39 affected_3_5 20.08375 9.0 NA NA square_3_3 #CF70DD black NA + 40 affected_3_6 20.08375 11.0 NA NA square_3_3 #A020F0 black NA + 41 affected_label_3_1 21.28375 1.5 NA NA text black NA + 42 affected_label_3_2 21.28375 3.5 NA NA text black NA + 43 affected_label_3_3 21.28375 5.5 NA NA text black NA + 44 affected_label_3_4 21.28375 7.5 NA NA text black NA + 45 affected_label_3_5 21.28375 9.5 NA NA text black NA + 46 affected_label_3_6 21.28375 11.5 NA NA text black NA 47 max_lim 0.00000 0.0 NA NA text black NA - 48 max_lim 26.30875 11.0 NA NA text black NA + 48 max_lim 26.66833 11.0 NA NA text black NA density cex label tips adjx adjy 1 NA 1.2 Sex 0 1 2 NA 1.2 Border 0 1 3 NA 1.2 affection 0 1 4 NA 1.2 avail 0 1 - 5 NA 1.2 indId 0 1 + 5 NA 1.2 val_num 0 1 6 NA NA NA NA 7 NA NA NA NA 8 NA 0.8 Male 0 1 @@ -80,8 +80,8 @@ 24 NA NA NA NA 25 NA NA NA NA 26 NA NA NA NA - 27 NA 0.8 Healthy <= to 0.5 0 1 - 28 NA 0.8 Affected > to 0.5 0 1 + 27 NA 0.8 Healthy are FALSE 0 1 + 28 NA 0.8 Affected are TRUE 0 1 29 NA NA NA NA 30 NA NA NA NA 31 NA NA NA NA @@ -114,7 +114,7 @@ [1] 0.8 $par_usr$usr - [1] 0.00000 26.30875 0.00000 11.50000 + [1] 0.00000 26.66833 0.00000 11.50000 diff --git a/tests/testthat/_snaps/ped_to_legdf/legend-alone.svg b/tests/testthat/_snaps/ped_to_legdf/legend-alone.svg index 033fd718..fbd9f873 100644 --- a/tests/testthat/_snaps/ped_to_legdf/legend-alone.svg +++ b/tests/testthat/_snaps/ped_to_legdf/legend-alone.svg @@ -25,51 +25,51 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -Sex -Border -affection -avail -indId -Male -Female -Non Available -Available -Healthy <= to 0.5 -Affected > to 0.5 -NA -Healthy <= to 0.5 -Affected > to 0.5 -Healthy <= to 115 : [101,106] -Healthy <= to 115 : (106,110] -Healthy <= to 115 : (110,115] -Affected > to 115 : [116,124] -Affected > to 115 : (124,133] -Affected > to 115 : (133,141] + + + + + + + + + + + + + + + + + + + + + + + + + + +Sex +Border +affection +avail +val_num +Male +Female +Non Available +Available +Healthy <= to 0.5 +Affected > to 0.5 +NA +Healthy are FALSE +Affected are TRUE +Healthy <= to 115 : [101,106] +Healthy <= to 115 : (106,110] +Healthy <= to 115 : (110,115] +Affected > to 115 : [116,124] +Affected > to 115 : (124,133] +Affected > to 115 : (133,141] diff --git a/tests/testthat/_snaps/ped_to_legdf/plot-with-legend.svg b/tests/testthat/_snaps/ped_to_legdf/plot-with-legend.svg index de578bab..7469a019 100644 --- a/tests/testthat/_snaps/ped_to_legdf/plot-with-legend.svg +++ b/tests/testthat/_snaps/ped_to_legdf/plot-with-legend.svg @@ -18,317 +18,312 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -1_105 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1_135 1_101 -1_109 -1_121 -1_106 -1_102 -1_129 -1_122 -1_107 -1_115 -1_130 -1_123 -1_108 -1_117 -1_131 -1_124 -1_135 -1_116 -1_132 -1_125 -1_136 -1_118 -1_133 -1_126 -1_119 -1_134 -1_127 -1_120 -1_109 -1_128 -1_103 -1_110 -1_104 -1_111 -1_137 -1_112 -1_138 -1_118 -1_114 -1_115 -1_139 -1_140 -1_141 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +1_109 +1_121 +1_136 +1_102 +1_110 +1_122 +1_103 +1_111 +1_123 +1_104 +1_112 +1_124 +1_137 +1_114 +1_127 +1_138 +1_139 +1_128 +1_105 +1_140 +1_125 +1_106 +1_141 +1_126 +1_107 +1_114 +1_129 +1_108 +1_115 +1_130 +1_112 +1_131 +1_118 +1_132 +1_117 +1_133 +1_116 +1_134 +1_119 +1_120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sex -Border -affection -avail -indId -Male -Female -Non Available -Available -Healthy <= to 0.5 -Affected > to 0.5 -NA -Healthy <= to 0.5 -Affected > to 0.5 -Healthy <= to 115 : [101,106] -Healthy <= to 115 : (106,110] -Healthy <= to 115 : (110,115] -Affected > to 115 : [116,124] -Affected > to 115 : (124,133] -Affected > to 115 : (133,141] + + + + + + + + + + + + + + + + + + + + + + + + + + + +Sex +Border +affection +avail +val_num +Male +Female +Non Available +Available +Healthy <= to 0.5 +Affected > to 0.5 +NA +Healthy are FALSE +Affected are TRUE +Healthy <= to 115 : [101,106] +Healthy <= to 115 : (106,110] +Healthy <= to 115 : (110,115] +Affected > to 115 : [116,124] +Affected > to 115 : (124,133] +Affected > to 115 : (133,141] diff --git a/tests/testthat/_snaps/plot.md b/tests/testthat/_snaps/plot.md index 36eb673a..60a40fac 100644 --- a/tests/testthat/_snaps/plot.md +++ b/tests/testthat/_snaps/plot.md @@ -1,281 +1,179 @@ -# Pedigree other test +# Pedigree plotting test Code lst Output $df - id x0 y0 - 1 polygon 1.499999763 1.000000 - 2 polygon 0.499999820 2.000000 - 3 polygon 0.000000000 3.000000 - 4 polygon 0.499999995 4.000000 - 5 polygon 2.499999763 1.000000 - 6 polygon 1.499999820 2.000000 - 7 polygon 1.000000000 3.000000 - 8 polygon 2.499999820 2.000000 - 9 polygon 2.499999735 3.000000 - 10 polygon 3.499999820 2.000000 - 11 polygon 3.499999735 3.000000 - 12 aff_mark 1.492925717 1.013154 - 13 aff_mark 0.492925773 2.013154 - 14 aff_mark -0.007074047 3.013154 - 15 aff_mark 0.492925948 4.013154 - 16 aff_mark 2.492925717 1.013154 - 17 aff_mark 1.492925773 2.013154 - 18 aff_mark 0.992925953 3.013154 - 19 aff_mark 2.492925773 2.013154 - 20 aff_mark 2.492925688 3.013154 - 21 aff_mark 3.492925773 2.013154 - 22 aff_mark 3.492925688 3.013154 - 23 polygon 1.499999763 1.000000 - 24 polygon 0.499999820 2.000000 - 25 polygon 0.000000000 3.000000 - 26 polygon 0.499999995 4.000000 - 27 polygon 2.499999763 1.000000 - 28 polygon 1.499999820 2.000000 - 29 polygon 1.000000000 3.000000 - 30 polygon 2.499999820 2.000000 - 31 polygon 2.499999735 3.000000 - 32 polygon 3.499999820 2.000000 - 33 polygon 3.499999735 3.000000 - 34 aff_mark 1.507073810 1.013154 - 35 aff_mark 0.507073867 2.013154 - 36 aff_mark 0.007074047 3.013154 - 37 aff_mark 0.507074042 4.013154 - 38 aff_mark 2.507073810 1.013154 - 39 aff_mark 1.507073867 2.013154 - 40 aff_mark 1.007074047 3.013154 - 41 aff_mark 2.507073867 2.013154 - 42 aff_mark 2.507073782 3.013154 - 43 aff_mark 3.507073867 2.013154 - 44 aff_mark 3.507073782 3.013154 - 45 dead 1.483022051 1.028940 - 46 dead 0.483022108 2.028940 - 47 dead 2.483022051 1.028940 - 48 dead 1.483022108 2.028940 - 49 id 1.499999763 1.052769 - 50 id 0.499999820 2.052769 - 51 id 0.000000000 3.052769 - 52 id 0.499999995 4.052769 - 53 id 2.499999763 1.052769 - 54 id 1.499999820 2.052769 - 55 id 1.000000000 3.052769 - 56 id 2.499999820 2.052769 - 57 id 2.499999735 3.052769 - 58 id 3.499999820 2.052769 - 59 id 3.499999735 3.052769 - 60 line_spouses 1.514147857 1.013154 - 61 line_spouses 0.514147914 2.013154 - 62 line_spouses 0.014148094 3.013154 - 63 line_spouses 2.514147914 2.013154 - 64 line_spouses2 0.014148094 3.015785 - 65 line_children_vertical 0.499999820 2.000000 - 66 line_children_vertical 3.499999820 2.000000 - 67 line_children_horizontal 0.499999820 1.960537 - 68 line_parent_mid 1.999999763 1.960537 - 69 line_parent_mid 1.999999763 1.676322 - 70 line_parent_mid 1.999999763 1.297369 - 71 line_children_vertical 1.000000000 3.000000 - 72 line_children_horizontal 1.000000000 2.960537 - 73 line_parent_mid 1.000000000 2.960537 - 74 line_parent_mid 1.000000000 2.676322 - 75 line_parent_mid 0.999999820 2.297369 - 76 line_children_vertical 2.499999735 3.000000 - 77 line_children_vertical 3.499999735 3.000000 - 78 label_children_twin3 2.999999735 2.980268 - 79 line_children_horizontal 2.999999735 2.960537 - 80 line_parent_mid 2.999999735 2.960537 - 81 line_parent_mid 2.999999735 2.676322 - 82 line_parent_mid 2.999999820 2.297369 - 83 line_children_vertical 0.499999995 4.000000 - 84 line_children_horizontal 0.499999995 3.960537 - 85 line_parent_mid 0.499999995 3.960537 - 86 line_parent_mid 0.499999995 3.676322 - 87 line_parent_mid 0.500000000 3.297369 - 88 arc 0.000000000 3.000000 - x1 y1 type fill border - 1 NA NA square_2_1 red black - 2 NA NA square_2_1 white green - 3 NA NA square_2_1 red green - 4 NA NA circle_2_1 white black - 5 NA NA circle_2_1 white green - 6 NA NA circle_2_1 red green - 7 NA NA circle_2_1 white black - 8 NA NA square_2_1 white green - 9 NA NA square_2_1 red green - 10 NA NA circle_2_1 red black - 11 NA NA square_2_1 white green - 12 NA NA text black - 13 NA NA text black - 14 NA NA text black - 15 NA NA text black - 16 NA NA text black - 17 NA NA text black - 18 NA NA text black - 19 NA NA text black - 20 NA NA text black - 21 NA NA text black - 22 NA NA text black - 23 NA NA square_2_2 white black - 24 NA NA square_2_2 white green - 25 NA NA square_2_2 white green - 26 NA NA circle_2_2 white black - 27 NA NA circle_2_2 grey green - 28 NA NA circle_2_2 #c300ff green - 29 NA NA circle_2_2 #c300ff black - 30 NA NA square_2_2 #c300ff green - 31 NA NA square_2_2 white green - 32 NA NA circle_2_2 white black - 33 NA NA square_2_2 white green - 34 NA NA text black - 35 NA NA text black - 36 NA NA text black - 37 NA NA text black - 38 NA NA text black - 39 NA NA text black - 40 NA NA text black - 41 NA NA text black - 42 NA NA text black - 43 NA NA text black - 44 NA NA text black - 45 1.5169775 0.9973691 segments black - 46 0.5169775 1.9973691 segments black - 47 2.5169775 0.9973691 segments black - 48 1.5169775 1.9973691 segments black - 49 NA NA text black - 50 NA NA text black - 51 NA NA text black - 52 NA NA text black - 53 NA NA text black - 54 NA NA text black - 55 NA NA text black - 56 NA NA text black - 57 NA NA text black - 58 NA NA text black - 59 NA NA text black - 60 2.4858517 1.0131545 segments black - 61 1.4858517 2.0131545 segments black - 62 0.9858519 3.0131545 segments black - 63 3.4858517 2.0131545 segments black - 64 0.9858519 3.0157854 segments black - 65 0.4999998 1.9605366 segments black - 66 3.4999998 1.9605366 segments black - 67 3.4999998 1.9605366 segments black - 68 1.9999998 1.6763220 segments black - 69 1.9999998 1.2973691 segments black - 70 1.9999998 1.0131545 segments black - 71 1.0000000 2.9605366 segments black - 72 1.0000000 2.9605366 segments black - 73 1.0000000 2.6763220 segments black - 74 0.9999998 2.2973691 segments black - 75 0.9999998 2.0131545 segments black - 76 2.9999997 2.9605366 segments black - 77 2.9999997 2.9605366 segments black - 78 NA NA text black - 79 2.9999997 2.9605366 segments black - 80 2.9999997 2.6763220 segments black - 81 2.9999998 2.2973691 segments black - 82 2.9999998 2.0131545 segments black - 83 0.5000000 3.9605366 segments black - 84 0.5000000 3.9605366 segments black - 85 0.5000000 3.6763220 segments black - 86 0.5000000 3.2973691 segments black - 87 0.5000000 3.0131545 segments black - 88 2.4999997 3.0000000 arc black - angle density cex label tips adjx adjy - 1 NA NA NA NA NA - 2 NA NA NA NA NA - 3 NA NA NA NA NA - 4 NA NA NA NA NA - 5 NA NA NA NA NA - 6 NA NA NA NA NA - 7 NA NA NA NA NA - 8 NA NA NA NA NA - 9 NA NA NA NA NA - 10 NA NA NA NA NA - 11 NA NA NA NA NA - 12 NA NA 0.5 1 NA NA - 13 NA NA 0.5 0 NA NA - 14 NA NA 0.5 1 NA NA - 15 NA NA 0.5 0 NA NA - 16 NA NA 0.5 0 NA NA - 17 NA NA 0.5 1 NA NA - 18 NA NA 0.5 0 NA NA - 19 NA NA 0.5 0 NA NA - 20 NA NA 0.5 1 NA NA - 21 NA NA 0.5 1 NA NA - 22 NA NA 0.5 0 NA NA - 23 NA NA NA NA NA - 24 NA NA NA NA NA - 25 NA NA NA NA NA - 26 NA NA NA NA NA - 27 NA NA NA NA NA - 28 NA NA NA NA NA - 29 NA NA NA NA NA - 30 NA NA NA NA NA - 31 NA NA NA NA NA - 32 NA NA NA NA NA - 33 NA NA NA NA NA - 34 NA NA 0.5 0 NA NA - 35 NA NA 0.5 0 NA NA - 36 NA NA 0.5 0 NA NA - 37 NA NA 0.5 0 NA NA - 38 NA NA 0.5 NA NA - 39 NA NA 0.5 1 NA NA - 40 NA NA 0.5 1 NA NA - 41 NA NA 0.5 1 NA NA - 42 NA NA 0.5 0 NA NA - 43 NA NA 0.5 0 NA NA - 44 NA NA 0.5 0 NA NA - 45 NA NA 0.5 NA NA - 46 NA NA 0.5 NA NA - 47 NA NA 0.5 NA NA - 48 NA NA 0.5 NA NA - 49 NA NA 0.5 1_1 NA NA - 50 NA NA 0.5 1_3 NA NA - 51 NA NA 0.5 1_8 NA NA - 52 NA NA 0.5 1_10 NA NA - 53 NA NA 0.5 1_2 NA NA - 54 NA NA 0.5 1_5 NA NA - 55 NA NA 0.5 1_7 NA NA - 56 NA NA 0.5 1_6 NA NA - 57 NA NA 0.5 1_8 NA NA - 58 NA NA 0.5 1_4 NA NA - 59 NA NA 0.5 1_9 NA NA - 60 NA NA 0.5 NA NA - 61 NA NA 0.5 NA NA - 62 NA NA 0.5 NA NA - 63 NA NA 0.5 NA NA - 64 NA NA 0.5 NA NA - 65 NA NA 0.5 NA NA - 66 NA NA 0.5 NA NA - 67 NA NA 0.5 NA NA - 68 NA NA 0.5 NA NA - 69 NA NA 0.5 NA NA - 70 NA NA 0.5 NA NA - 71 NA NA 0.5 NA NA - 72 NA NA 0.5 NA NA - 73 NA NA 0.5 NA NA - 74 NA NA 0.5 NA NA - 75 NA NA 0.5 NA NA - 76 NA NA 0.5 NA NA - 77 NA NA 0.5 NA NA - 78 NA NA 0.5 ? NA NA - 79 NA NA 0.5 NA NA - 80 NA NA 0.5 NA NA - 81 NA NA 0.5 NA NA - 82 NA NA 0.5 NA NA - 83 NA NA 0.5 NA NA - 84 NA NA 0.5 NA NA - 85 NA NA 0.5 NA NA - 86 NA NA 0.5 NA NA - 87 NA NA 0.5 NA NA - 88 NA NA 0.5 NA NA + id x0 y0 x1 y1 + 1 polygon 1.000000e+00 1.000000 NA NA + 2 polygon 1.619369e-08 2.000000 NA NA + 3 polygon 7.071068e-01 3.000000 NA NA + 4 polygon 1.207107e+00 4.000000 NA NA + 5 polygon 2.000000e+00 1.000000 NA NA + 6 polygon 1.000000e+00 2.000000 NA NA + 7 polygon 1.707107e+00 3.000000 NA NA + 8 polygon 2.000000e+00 2.000000 NA NA + 9 polygon 2.707107e+00 3.000000 NA NA + 10 polygon 3.000000e+00 2.000000 NA NA + 11 aff_mark 9.751432e-01 1.054658 NA NA + 12 aff_mark -2.485673e-02 2.054658 NA NA + 13 aff_mark 6.822500e-01 3.054658 NA NA + 14 aff_mark 1.182250e+00 4.054658 NA NA + 15 aff_mark 1.975143e+00 1.054658 NA NA + 16 aff_mark 9.751433e-01 2.054658 NA NA + 17 aff_mark 1.682250e+00 3.054658 NA NA + 18 aff_mark 1.975143e+00 2.054658 NA NA + 19 aff_mark 2.682250e+00 3.054658 NA NA + 20 aff_mark 2.975143e+00 2.054658 NA NA + 21 polygon 1.000000e+00 1.000000 NA NA + 22 polygon 1.619369e-08 2.000000 NA NA + 23 polygon 7.071068e-01 3.000000 NA NA + 24 polygon 1.207107e+00 4.000000 NA NA + 25 polygon 2.000000e+00 1.000000 NA NA + 26 polygon 1.000000e+00 2.000000 NA NA + 27 polygon 1.707107e+00 3.000000 NA NA + 28 polygon 2.000000e+00 2.000000 NA NA + 29 polygon 2.707107e+00 3.000000 NA NA + 30 polygon 3.000000e+00 2.000000 NA NA + 31 aff_mark 1.024857e+00 1.054658 NA NA + 32 aff_mark 2.485676e-02 2.054658 NA NA + 33 aff_mark 7.319635e-01 3.054658 NA NA + 34 aff_mark 1.231963e+00 4.054658 NA NA + 35 aff_mark 2.024857e+00 1.054658 NA NA + 36 aff_mark 1.024857e+00 2.054658 NA NA + 37 aff_mark 1.731964e+00 3.054658 NA NA + 38 aff_mark 2.024857e+00 2.054658 NA NA + 39 aff_mark 2.731964e+00 3.054658 NA NA + 40 aff_mark 3.024857e+00 2.054658 NA NA + 41 dead 9.403438e-01 1.120249 1.059656e+00 0.9890683 + 42 dead -5.965618e-02 2.120249 5.965621e-02 1.9890683 + 43 dead 1.940344e+00 1.120249 2.059656e+00 0.9890683 + 44 dead 9.403438e-01 2.120249 1.059656e+00 1.9890683 + 45 id 1.000000e+00 1.164290 NA NA + 46 id 1.619369e-08 2.164290 NA NA + 47 id 7.071068e-01 3.164290 NA NA + 48 id 1.207107e+00 4.164290 NA NA + 49 id 2.000000e+00 1.164290 NA NA + 50 id 1.000000e+00 2.164290 NA NA + 51 id 1.707107e+00 3.164290 NA NA + 52 id 2.000000e+00 2.164290 NA NA + 53 id 2.707107e+00 3.164290 NA NA + 54 id 3.000000e+00 2.164290 NA NA + 55 line_spouses 1.049713e+00 1.054658 1.950286e+00 1.0546585 + 56 line_spouses 4.971351e-02 2.054658 9.502865e-01 2.0546585 + 57 line_spouses 7.568203e-01 3.054658 1.657393e+00 3.0546585 + 58 line_spouses 2.049714e+00 2.054658 2.950287e+00 2.0546585 + 59 line_spouses2 7.568203e-01 3.065590 1.657393e+00 3.0655902 + 60 line_children_vertical 1.619369e-08 2.000000 1.619369e-08 1.8360245 + 61 line_children_vertical 3.000000e+00 2.000000 3.000000e+00 1.8360245 + 62 line_children_horizontal 1.619369e-08 1.836025 3.000000e+00 1.8360245 + 63 line_parent_mid 1.500000e+00 1.836025 1.500000e+00 1.6016147 + 64 line_parent_mid 1.500000e+00 1.601615 1.500000e+00 1.2890683 + 65 line_parent_mid 1.500000e+00 1.289068 1.500000e+00 1.0546585 + 66 line_children_vertical 7.071068e-01 3.000000 7.071068e-01 2.8360245 + 67 line_children_horizontal 7.071068e-01 2.836025 7.071068e-01 2.8360245 + 68 line_parent_mid 7.071068e-01 2.836025 7.071068e-01 2.6016147 + 69 line_parent_mid 7.071068e-01 2.601615 5.000000e-01 2.2890683 + 70 line_parent_mid 5.000000e-01 2.289068 5.000000e-01 2.0546585 + 71 line_children_vertical 1.707107e+00 3.000000 2.207107e+00 2.8360245 + 72 line_children_vertical 2.707107e+00 3.000000 2.207107e+00 2.8360245 + 73 label_children_twin3 2.207107e+00 2.918012 NA NA + 74 line_children_horizontal 2.207107e+00 2.836025 2.207107e+00 2.8360245 + 75 line_parent_mid 2.207107e+00 2.836025 2.207107e+00 2.6016147 + 76 line_parent_mid 2.207107e+00 2.601615 2.500000e+00 2.2890683 + 77 line_parent_mid 2.500000e+00 2.289068 2.500000e+00 2.0546585 + 78 line_children_vertical 1.207107e+00 4.000000 1.207107e+00 3.8360245 + 79 line_children_horizontal 1.207107e+00 3.836025 1.207107e+00 3.8360245 + 80 line_parent_mid 1.207107e+00 3.836025 1.207107e+00 3.6016147 + 81 line_parent_mid 1.207107e+00 3.601615 1.207107e+00 3.2890683 + 82 line_parent_mid 1.207107e+00 3.289068 1.207107e+00 3.0546585 + type fill border angle density cex label tips adjx adjy + 1 square_2_1 red black NA NA NA NA NA + 2 square_2_1 white green NA NA NA NA NA + 3 circle_2_1 white black NA NA NA NA NA + 4 circle_2_1 white black NA NA NA NA NA + 5 circle_2_1 white green NA NA NA NA NA + 6 circle_2_1 red green NA NA NA NA NA + 7 square_2_1 red green NA NA NA NA NA + 8 square_2_1 white green NA NA NA NA NA + 9 square_2_1 white green NA NA NA NA NA + 10 circle_2_1 red black NA NA NA NA NA + 11 text black NA NA 1 1 NA NA + 12 text black NA NA 1 0 NA NA + 13 text black NA NA 1 0 NA NA + 14 text black NA NA 1 0 NA NA + 15 text black NA NA 1 0 NA NA + 16 text black NA NA 1 1 NA NA + 17 text black NA NA 1 1 NA NA + 18 text black NA NA 1 0 NA NA + 19 text black NA NA 1 0 NA NA + 20 text black NA NA 1 1 NA NA + 21 square_2_2 white black NA NA NA NA NA + 22 square_2_2 white green NA NA NA NA NA + 23 circle_2_2 #c300ff black NA NA NA NA NA + 24 circle_2_2 white black NA NA NA NA NA + 25 circle_2_2 grey green NA NA NA NA NA + 26 circle_2_2 #c300ff green NA NA NA NA NA + 27 square_2_2 white green NA NA NA NA NA + 28 square_2_2 #c300ff green NA NA NA NA NA + 29 square_2_2 white green NA NA NA NA NA + 30 circle_2_2 white black NA NA NA NA NA + 31 text black NA NA 1 0 NA NA + 32 text black NA NA 1 0 NA NA + 33 text black NA NA 1 1 NA NA + 34 text black NA NA 1 0 NA NA + 35 text black NA NA 1 NA NA + 36 text black NA NA 1 1 NA NA + 37 text black NA NA 1 0 NA NA + 38 text black NA NA 1 1 NA NA + 39 text black NA NA 1 0 NA NA + 40 text black NA NA 1 0 NA NA + 41 segments black NA NA 1 NA NA + 42 segments black NA NA 1 NA NA + 43 segments black NA NA 1 NA NA + 44 segments black NA NA 1 NA NA + 45 text black NA NA 1 1_1 NA NA + 46 text black NA NA 1 1_3 NA NA + 47 text black NA NA 1 1_7 NA NA + 48 text black NA NA 1 1_10 NA NA + 49 text black NA NA 1 1_2 NA NA + 50 text black NA NA 1 1_5 NA NA + 51 text black NA NA 1 1_8 NA NA + 52 text black NA NA 1 1_6 NA NA + 53 text black NA NA 1 1_9 NA NA + 54 text black NA NA 1 1_4 NA NA + 55 segments black NA NA 1 NA NA + 56 segments black NA NA 1 NA NA + 57 segments black NA NA 1 NA NA + 58 segments black NA NA 1 NA NA + 59 segments black NA NA 1 NA NA + 60 segments black NA NA 1 NA NA + 61 segments black NA NA 1 NA NA + 62 segments black NA NA 1 NA NA + 63 segments black NA NA 1 NA NA + 64 segments black NA NA 1 NA NA + 65 segments black NA NA 1 NA NA + 66 segments black NA NA 1 NA NA + 67 segments black NA NA 1 NA NA + 68 segments black NA NA 1 NA NA + 69 segments black NA NA 1 NA NA + 70 segments black NA NA 1 NA NA + 71 segments black NA NA 1 NA NA + 72 segments black NA NA 1 NA NA + 73 text black NA NA 1 ? NA NA + 74 segments black NA NA 1 NA NA + 75 segments black NA NA 1 NA NA + 76 segments black NA NA 1 NA NA + 77 segments black NA NA 1 NA NA + 78 segments black NA NA 1 NA NA + 79 segments black NA NA 1 NA NA + 80 segments black NA NA 1 NA NA + 81 segments black NA NA 1 NA NA + 82 segments black NA NA 1 NA NA $par_usr $par_usr$usr - [1] -0.01414809 3.51414791 4.11605893 - [4] 1.00000000 + [1] -0.04971348 3.04971351 4.28881699 1.00000000 $par_usr$old_par $par_usr$old_par$xpd @@ -283,16 +181,16 @@ $par_usr$boxw - [1] 0.02829619 + [1] 0.09942699 $par_usr$boxh - [1] 0.02630893 + [1] 0.109317 $par_usr$labh - [1] 0.02205037 + [1] 0.04581105 $par_usr$legh - [1] 0.0394634 + [1] 0.1639755 diff --git a/tests/testthat/_snaps/plot/ped-2-affections-ggplot.svg b/tests/testthat/_snaps/plot/ped-2-affections-ggplot.svg index db459144..47fb68ce 100644 --- a/tests/testthat/_snaps/plot/ped-2-affections-ggplot.svg +++ b/tests/testthat/_snaps/plot/ped-2-affections-ggplot.svg @@ -27,93 +27,87 @@ - - - - - - - - - - - - - - - - - - - - - - -1 -0 -1 -0 -0 -1 -0 -0 -1 -1 -0 -0 -0 -0 -0 -1 -1 -1 -0 -0 -0 -1_1 -1_3 -1_8 -1_10 -1_2 -1_5 -1_7 -1_6 -1_8 -1_4 -1_9 -? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + +1 +0 +0 +0 +0 +1 +1 +0 +0 +1 +0 +0 +1 +0 +1 +0 +1 +0 +0 +1_1 +1_3 +1_7 +1_10 +1_2 +1_5 +1_8 +1_6 +1_9 +1_4 +? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Pedigree diff --git a/tests/testthat/_snaps/plot/ped-simple-affection-ggplot.svg b/tests/testthat/_snaps/plot/ped-simple-affection-ggplot.svg index 0475effe..87854e5b 100644 --- a/tests/testthat/_snaps/plot/ped-simple-affection-ggplot.svg +++ b/tests/testthat/_snaps/plot/ped-simple-affection-ggplot.svg @@ -27,71 +27,67 @@ - - - - - - - - - - - -1_1 -1_3 -1_8 -1_10 -1_2 -1_5 -1_7 -1_6 -1_8 -1_4 -1_9 -0 -0 -0 -0 -1 -1 -1 -0 -0 -0 -? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + +1_1 +1_3 +1_7 +1_10 +1_2 +1_5 +1_8 +1_6 +1_9 +1_4 +0 +0 +1 +0 +1 +0 +1 +0 +0 +? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/ped-simple-affection.svg b/tests/testthat/_snaps/plot/ped-simple-affection.svg index 4c658f93..b3800dee 100644 --- a/tests/testthat/_snaps/plot/ped-simple-affection.svg +++ b/tests/testthat/_snaps/plot/ped-simple-affection.svg @@ -18,71 +18,67 @@ - - - - - - - - - + + + + + + + + + - -1 -0 -1 -0 -0 -1 -0 -0 -1 +1 +0 +0 +0 +0 +1 +1 +0 +0 1 -0 -1_1 -1_3 -1_8 -1_10 -1_2 -1_5 -1_7 -1_6 -1_8 +1_1 +1_3 +1_7 +1_10 +1_2 +1_5 +1_8 +1_6 +1_9 1_4 -1_9 -? - - - - - - - - - - +? + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/ped1.svg b/tests/testthat/_snaps/plot/ped1.svg index 7de422a4..ac09ae64 100644 --- a/tests/testthat/_snaps/plot/ped1.svg +++ b/tests/testthat/_snaps/plot/ped1.svg @@ -18,215 +18,211 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 -0 -1 -1 -0 -0 -1 -0 -0 -0 -0 -1 -1 -1 -1 -0 -0 -1 -0 -0 -1 -1 -1 -0 -0 -1 -1 -1 -0 -1 -1 -1 -1 -0 -1 -0 -0 -1_105 +0 +1 +1 +1 +0 +1 +1 +0 +0 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +1 +0 +1 +0 +1 +1 +1 +0 +1_135 1_101 -1_109 -1_121 -1_106 -1_102 -1_129 -1_122 -1_107 -1_115 -1_130 -1_123 -1_108 -1_117 -1_131 -1_124 -1_135 -1_116 -1_132 -1_125 -1_136 -1_118 -1_133 -1_126 -1_119 -1_134 -1_127 -1_120 -1_109 -1_128 -1_103 -1_110 -1_104 -1_111 -1_137 -1_112 -1_138 -1_118 -1_114 -1_115 -1_139 -1_140 -1_141 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +1_109 +1_121 +1_136 +1_102 +1_110 +1_122 +1_103 +1_111 +1_123 +1_104 +1_112 +1_124 +1_137 +1_114 +1_127 +1_138 +1_139 +1_128 +1_105 +1_140 +1_125 +1_106 +1_141 +1_126 +1_107 +1_114 +1_129 +1_108 +1_115 +1_130 +1_112 +1_131 +1_118 +1_132 +1_117 +1_133 +1_116 +1_134 +1_119 +1_120 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/ped1reorder.svg b/tests/testthat/_snaps/plot/ped1reorder.svg index 478328ff..9fdc07f8 100644 --- a/tests/testthat/_snaps/plot/ped1reorder.svg +++ b/tests/testthat/_snaps/plot/ped1reorder.svg @@ -18,211 +18,215 @@ - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 -0 -0 -0 -1 -0 -1 -0 -1 -1 -1 -0 -1 -1 -0 -1 -1 -0 -0 -1 -0 -1 -0 -1 -1 -1 -1 -0 -1 -0 -0 -0 -1 -0 -0 -1 -1_135 -1_137 +1 +0 +0 +1 +0 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 +0 +0 +1 +0 +1 +0 +0 +0 +1 +0 +0 +1 +1 +0 +1 +0 +1 +1 +1 +1 +0 +1_135 +1_137 1_139 -1_125 -1_136 -1_138 -1_140 -1_126 -1_105 -1_103 -1_141 -1_127 -1_106 -1_104 -1_111 -1_128 -1_107 -1_101 -1_112 -1_121 -1_108 -1_102 -1_118 -1_122 -1_115 -1_114 -1_123 -1_117 -1_115 -1_124 -1_116 -1_110 -1_118 -1_109 -1_119 -1_129 -1_120 -1_130 -1_131 -1_132 -1_133 -1_134 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +1_121 +1_136 +1_138 +1_140 +1_122 +1_103 +1_141 +1_123 +1_104 +1_111 +1_124 +1_101 +1_112 +1_127 +1_102 +1_110 +1_128 +1_105 +1_109 +1_125 +1_106 +1_114 +1_126 +1_107 +1_109 +1_129 +1_108 +1_114 +1_130 +1_115 +1_131 +1_112 +1_132 +1_118 +1_133 +1_117 +1_134 +1_116 +1_119 +1_120 + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/shrink/pedigree-shrink-1.svg b/tests/testthat/_snaps/shrink/pedigree-shrink-1.svg index 2ff3ec03..3ac05063 100644 --- a/tests/testthat/_snaps/shrink/pedigree-shrink-1.svg +++ b/tests/testthat/_snaps/shrink/pedigree-shrink-1.svg @@ -18,33 +18,56 @@ - - - + + + - - - - - - - - + + + + + + + + - - - - - + + + + + - - - - - - - + + + + + + + +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 5_44 5_70 5_46 diff --git a/tests/testthat/_snaps/shrink/pedigree-shrink-2.svg b/tests/testthat/_snaps/shrink/pedigree-shrink-2.svg index 679635af..ada83a66 100644 --- a/tests/testthat/_snaps/shrink/pedigree-shrink-2.svg +++ b/tests/testthat/_snaps/shrink/pedigree-shrink-2.svg @@ -18,36 +18,65 @@ - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +0 +0 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 8_135 8_161 8_137 diff --git a/tests/testthat/_snaps/useful_inds.md b/tests/testthat/_snaps/useful_inds.md new file mode 100644 index 00000000..ea421f19 --- /dev/null +++ b/tests/testthat/_snaps/useful_inds.md @@ -0,0 +1,4 @@ +# useful_inds works with Pedigree + + The useful slot already has values in the Ped object and reset is set to FALSE + diff --git a/tests/testthat/test-align.R b/tests/testthat/test-align.R index b5e9a130..fb724e52 100644 --- a/tests/testthat/test-align.R +++ b/tests/testthat/test-align.R @@ -1,72 +1,105 @@ test_that("align works", { data("sampleped") - ped <- Pedigree(sampleped) - ped1 <- ped[ped(ped)$family == 1] + pedi <- Pedigree(sampleped) + ped1 <- pedi[famid(pedi) == "1"] plist1 <- align(ped1) - expect_equal(plist1$n, c(6, 12, 17, 8)) + expect_equal(plist1$n, c(2, 10, 16, 14)) - ped2 <- ped[ped(ped)$family == 2] - withr::local_options(width = 50) + ped2 <- pedi[famid(pedi) == 2] plist2 <- align(ped2) expect_equal(plist2$n, c(2, 7, 5)) - plist <- align(ped) + plist <- align(pedi) expect_equal(plist[["1"]]$n, plist1$n) expect_equal(plist[["2"]]$n, plist2$n) - }) test_that("test auto_hint works", { data("sampleped") - ped <- Pedigree(sampleped) - expect_equal(sum(kindepth(ped)), 73) - expect_error(auto_hint(ped)) #this fixes up marriages and such + pedi <- Pedigree(sampleped) + expect_equal(sum(kindepth(pedi)), 73) + expect_error(auto_hint(pedi)) # Works only on 1 family - ped <- Pedigree(sampleped[-1]) - newhint <- auto_hint(ped) - plist <- align(ped, packed = TRUE, + pedi <- Pedigree(sampleped[-1]) + newhint <- auto_hint(pedi) + plist <- align(pedi, packed = TRUE, align = TRUE, width = 8, hints = newhint ) expect_snapshot(plist) + + ## With rel matrix + rel_df <- data.frame( + id1 = c(112, 113, 133, 209), + id2 = c(110, 114, 132, 109), + code = c(1, 4, 4, 4) + ) + pedi <- Pedigree(sampleped[-1], rel_df) + newhint <- auto_hint(pedi) + expect_equal(horder(newhint), + setNames(c( + 1, 2, 3, 4, 5, 6, 7, 8, 1, 1, 3, 2, 1, 4, 1, + 3, 9, 2, 4, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 2, 3, 10, 11, 11, 12, 13, 4, + 5, 12, 13, 14, 15, 16, 17, 18, 14, 15, 16, 17, 18 + ), id(ped(pedi))) + ) + expect_equal(as.data.frame(spouse(newhint)), + data.frame( + idl = c("112", "114", "109"), + idr = c("118", "115", "110"), + anchor = anchor_to_factor(c("right", "right", "left")) + ) + ) }) -test_that("test alignement with inbreeding and relationship matrix", { +test_that("test alignment with inbreeding and relationship matrix", { data("sampleped") rel_df <- data.frame( id1 = c(112, 113, 133, 209), id2 = c(110, 114, 132, 109), code = c(1, 4, 4, 4) ) - ped <- Pedigree(sampleped[-1], rel_df) - plist <- align(ped) + ped_withrel <- Pedigree(sampleped[-1], rel_df) + plist <- align(ped_withrel) - ped_sr <- Pedigree(sampleped[-1]) - plist_sr <- align(ped_sr) + ped_norel <- Pedigree(sampleped[-1]) + plist_sr <- align(ped_norel) expect_equal(plist$nid[1, ], - c(35, 36, 5, 6, 7, 8, 42, 43, rep(0, 16)) + c(35, 36, 42, 43, rep(0, 19)) ) expect_equal(plist_sr$nid[1, ], - c(5, 6, 7, 8, 35, 36, 42, 43, rep(0, 14)) + c(35, 36, 42, 43, rep(0, 17)) + ) + vdiffr::expect_doppelganger("sampleped_withrel", + function() plot(ped_withrel) + ) + vdiffr::expect_doppelganger("sampleped_norel", + function() plot(ped_norel) ) }) test_that("besthint works", { data("sampleped") - ped <- Pedigree(sampleped) - expect_error(best_hint(ped)) #this fixes up marriages and such + pedi <- Pedigree(sampleped) + expect_error(best_hint(pedi)) #this fixes up marriages and such ped1 <- Pedigree(sampleped[-1]) - newhint1 <- best_hint(ped1) + hints(ped1) <- best_hint(ped1) + + vdiffr::expect_doppelganger("Best hint", + function() plot(ped1) + ) + plist <- align(ped1, packed = TRUE, - align = TRUE, width = 8, hints = newhint1 + align = TRUE, width = 8 ) expect_snapshot(plist) }) -test_that("Alignement with spouse", { +test_that("Alignment with spouse", { data(sampleped) - df1 <- sampleped[sampleped$family == 1, ] + df1 <- sampleped[sampleped$famid == 1, ] relate1 <- data.frame( indId1 = 113, indId2 = 114, @@ -75,29 +108,20 @@ test_that("Alignement with spouse", { ) ped1 <- Pedigree(df1, relate1) hints <- auto_hint(ped1) - expect_equal(as.vector(hints$spouse), c(9, 10, 2)) - expect_equal(hints$order, - c( - 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, - 2, 3, 5, 4, 5, 6, 7, 8, 9, 10, - 1, 2, 3, 4, 5, 6, 7, 8, 6, 7, - 8, 9, 10, 11, 6, 7, 11, 12, 12, - 13, 14 + expect_equal(spouse(hints), + data.frame( + idl = c("1_112", "1_114", "1_109"), + idr = c("1_118", "1_115", "1_110"), + anchor = anchor_to_factor(c("right", "right", "left")) ) ) - align(ped1) -}) - -test_that("Double wife", { - ## reported on github in 2023 - ## version 1.9.6 failed to plot subject 3 second marriage and kids - ## fix in 9/2023 to revert to some version 1.8.5 version of kindepth - df <- data.frame( - id = 1:7, dadid = c(0, 0, 0, 1, 3, 0, 3), - momid = c(0, 0, 0, 2, 4, 0, 6), sex = c(1, 2, 1, 2, 1, 2, 1) + expect_equal(horder(hints), + setNames(c( + 1, 2, 3, 4, 5, 6, 7, 8, 1, 1, + 2, 3, 1, 4, 1, 3, 9, 2, 4, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 2, 3, 10, 11, 11, + 12, 13 + ), id(ped(ped1))) ) - ped <- Pedigree(df) - vdiffr::expect_doppelganger("double_wife", - function() plot(ped) - ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-bitSize.R b/tests/testthat/test-bitSize.R index a6e8a682..84bfcab0 100644 --- a/tests/testthat/test-bitSize.R +++ b/tests/testthat/test-bitSize.R @@ -3,10 +3,19 @@ test_that("bit_size works", { minnped <- Pedigree(minnbreast, cols_ren_ped = list( "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", "gender" = "sex", "family" = "famid" - )) - bs_ped <- bit_size(minnped, missid = "0") - bs_char <- bit_size(as.character(minnbreast$fatherid), - as.character(minnbreast$motherid), missid = "0" + ), missid = "0") + bs_pedi <- bit_size(minnped) + bs_char <- bit_size( + as.character(minnbreast$fatherid), + as.character(minnbreast$motherid), + missid = "0" ) + + ped <- with(minnbreast, + Ped(id, sex, fatherid, motherid, missid = "0") + ) + bs_ped <- bit_size(ped) + expect_equal(bs_ped, bs_char) + expect_equal(bs_pedi, bs_char) }) diff --git a/tests/testthat/test-class.R b/tests/testthat/test-class.R new file mode 100644 index 00000000..18257e6c --- /dev/null +++ b/tests/testthat/test-class.R @@ -0,0 +1,298 @@ +test_that("Class ped work", { + ped0 <- new("Ped") + expect_s4_class(ped0, "Ped") + expect_equal(length(ped0), 0) + expect_equal(length(as.list(ped0)), 15) + expect_equal(dim(as.data.frame(ped0)), c(0, 15)) + + ped2 <- Ped( + obj = c("ID5", "ID4"), + momid = c(NA, NA), + dadid = c(NA, NA), + sex = factor(c("female", "male")) + ) + expect_s4_class(ped2, "Ped") + expect_equal(length(ped2), 2) + expect_equal(length(as.list(ped2)), 15) + expect_equal(dim(as.data.frame(ped2)), c(2, 15)) + expect_snapshot(ped2) + + expect_error(Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c(NA, NA), + dadid = c(NA, NA) + )) + expect_error(Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c(NA, NA, NA), + dadid = c(NA, NA, NA), + sex = c(1, 2) + )) + + expect_error(Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c("ID3", NA, NA), + dadid = c(NA, NA, NA), + sex = c("male", "female", "male") + )) + expect_no_error(Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c("ID2", NA, NA), + dadid = c("ID3", NA, NA), + sex = c("male", "female", "male") + )) + expect_no_error(Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c("ID2", NA, NA), + dadid = c("ID3", NA, NA), + sex = c("male", "female", "male"), + famid = c("F1", "F1", "F2") + )) + expect_error(Ped( + obj = c("ID3", "ID2", ""), + momid = c("ID2", NA, NA), + dadid = c("ID3", NA, NA), + sex = c("male", "female", "male"), + famid = c("F1", "F1", "F2") + )) + + #### Metadata #### + ped3 <- Ped( + obj = c("ID1", "ID2", "ID3"), + momid = c("ID2", NA, NA), + dadid = c("ID3", NA, NA), + sex = c("male", "female", "male"), + famid = c("F1", "F1", "F2") + ) + + mcols(ped3) <- list(A = c("test", 1, 3), B = c("test3", 6, 8)) + mcols(ped3)$Test <- c("test2", 3, 4) + expect_equal(dim(mcols(ped3)), c(3, 3)) + + expect_error(mcols(ped3) <- list( + A = c("test", 1, 3, 6), B = c("test3", 6, 8, 9) + )) + expect_equal(length(as.list(ped3)), 18) + expect_equal(dim(as.data.frame(ped3)), c(3, 18)) + + df <- data.frame( + id = c("ID1", "ID2", "ID3"), + momid = c("ID2", NA, NA), + dadid = c("ID3", NA, NA), + sex = c("male", "female", "male"), + famid = c("F1", "F1", "F2"), + test = c("test", 1, 3), + test2 = c("test2", 3, 4) + ) + ped3 <- Ped(df) + + expect_equal(ped3[1]@id, "ID1") + expect_equal(ped3[1:2]@id, c("ID1", "ID2")) + + expect_equal(dim(as.data.frame(ped3)), c(3, 17)) + expect_equal(dim(mcols(ped3)), c(3, 2)) + + expect_error(c(ped3, ped3)) + ped5 <- suppressWarnings(c(ped3, ped2)) + + expect_equal(dim(as.data.frame(ped5)), c(5, 17)) + + ## Subsetting + expect_error(subset(ped3, "ID1")) + ped1_char <- subset(ped3, "ID1", del_parents = TRUE) + ped1_num <- subset(ped3, 1, del_parents = TRUE) + ped1_log <- subset(ped3, c(TRUE, FALSE, FALSE), del_parents = TRUE) + + expect_equal(ped1_char, ped1_num) + expect_equal(ped1_char, ped1_log) +}) + +test_that("Rel class works", { + rel0 <- new("Rel") + expect_s4_class(rel0, "Rel") + expect_equal(length(rel0), 0) + expect_equal(length(as.list(rel0)), 4) + expect_equal(dim(as.data.frame(rel0)), c(0, 4)) + rel2 <- Rel( + obj = c("ID5", "ID4"), + id2 = c("ID3", "ID2"), + code = c(1, 4), + ) + mcols(rel2) <- list("A" = c(1, 2)) + expect_s4_class(rel2, "Rel") + expect_equal(length(rel2), 2) + expect_equal(length(as.list(rel2)), 5) + expect_equal(dim(as.data.frame(rel2)), c(2, 5)) + expect_snapshot(rel2) + + expect_error(rel4 <- c(rel2, rel2)) + + expect_error(rel3 <- Rel( + obj = c("ID5", "ID2", "ID4"), + id2 = c("ID3", "ID3", "ID2"), + code = c(1, 2), + )) + + rel3 <- Rel( + obj = c("ID6", "ID2", "ID4"), + id2 = c("ID3", "ID3", "ID1"), + code = c(1, 2, 3), + ) + expect_equal(dim(as.data.frame(c(rel3, rel2))), c(5, 5)) + + expect_equal(length(subset(rel3, "ID6")), 0) + expect_equal(length(subset(rel3, c("ID2", "ID3"))), 1) + expect_equal(length(subset(rel3, c("ID2", "ID3", "ID6"))), 2) + +}) + +test_that("Hints class works", { + ## From scratch + hts0 <- Hints() + expect_equal(horder(hts0), numeric()) + expect_error(horder(hts0) <- c(1, 2)) + horder(hts0) <- c("ID1" = 1, "ID2" = 2) + expect_equal(horder(hts0), c("ID1" = 1, "ID2" = 2)) + expect_equal(dim(spouse(hts0)), c(0, 3)) + expect_error(spouse(hts0) <- data.frame( + idl = c("ID1", "ID2"), + idr = c("ID3", "ID4"), + anchor = factor(c("left", "right")) + )) + horder(hts0) <- c("ID1" = 1, "ID2" = 2, "ID3" = 3, "ID4" = 4) + spouse(hts0) <- data.frame( + idl = c("ID1", "ID2"), + idr = c("ID3", "ID4"), + anchor = factor(c("left", "right")) + ) + expect_equal(dim(spouse(hts0)), c(2, 3)) + expect_snapshot(hts0) + + ## With constructor + hts2 <- Hints( + horder = c("ID1" = 1, "ID2" = 2, "ID3" = 3, "ID4" = 4), + spouse = data.frame( + idl = c("ID1", "ID2"), + idr = c("ID3", "ID4"), + anchor = factor(c("left", "right")) + ) + ) + expect_equal(hts0, hts2) + + ## With missing values + expect_error(Hints(horder = c("ID1" = 1, "ID2" = NA, "ID3" = 3, "ID4" = 4))) + + expect_error(Hints( + horder = c("ID1" = 1, "ID2" = 2, "ID3" = 3, "ID4" = 4), + spouse = data.frame( + idl = c("ID1", "ID2"), + idr = c("ID3", NA), + anchor = factor(c("left", "right")) + ) + )) + + hts1 <- subset(hts2, "ID1") + expect_equal(horder(hts1), c("ID1" = 1)) + expect_equal(dim(spouse(hts1)), c(0, 3)) + + hts13 <- subset(hts2, c("ID1", "ID3")) + expect_equal(horder(hts13), c("ID1" = 1, "ID3" = 3)) + expect_equal(dim(spouse(hts13)), c(1, 3)) +}) + +test_that("Scales class works", { + ## From scratch + scl0 <- Scales() + expect_equal(dim(fill(scl0)), c(0, 9)) + expect_equal(dim(border(scl0)), c(0, 5)) + + expect_error(fill(scl0) <- c("ID1", "ID2")) + expect_error(border(scl0) <- c("ID1", "ID2")) + + expect_error(fill(scl0)$column_values <- c("ID1", "ID2")) + + expect_snapshot_error(fill(scl0) <- data.frame( + order = c("A", 3), + column_values = c("ID1", "ID2"), + column_mods = c(1, 2), + mods = c("ID1", "ID2"), + labels = c("ID1", "ID2"), + affected = c("A", FALSE), + fill = c("ID1", "ID2"), + density = c(1, 2), + angle = c("A", 60) + )) + expect_snapshot_error(border(scl0) <- data.frame( + column_values = c("ID1", "ID2"), + column_mods = c("ID1", "ID2"), + mods = c("ID1", "ID2"), + labels = c(1, 2), + border = c("ID1", "ID2") + )) + + + fill(scl0) <- data.frame( + order = c(2, 3), + column_values = c("ID1", "ID2"), + column_mods = c("ID1", "ID2"), + mods = c(1, 2), + labels = c("ID1", "ID2"), + affected = c(TRUE, FALSE), + fill = c("ID1", "ID2"), + density = c(1, 2), + angle = c(90, 60) + ) + expect_equal(dim(fill(scl0)), c(2, 9)) + fill(scl0)$fill[1] <- "ID3" + expect_equal(fill(scl0)$fill[1], "ID3") + + border(scl0) <- data.frame( + column_values = c("ID1", "ID2"), + column_mods = c("ID1", "ID2"), + mods = c(1, 2), + labels = c("Lab1", "Lab2"), + border = c("ID1", "ID2") + ) + + expect_equal(dim(border(scl0)), c(2, 5)) + expect_snapshot(scl0) + + ## With constructor + scl2 <- Scales( + fill = data.frame( + order = c(2, 3), + column_values = c("ID1", "ID2"), + column_mods = c("ID1", "ID2"), + mods = c(1, 2), + labels = c("ID1", "ID2"), + affected = c(TRUE, FALSE), + fill = c("ID3", "ID2"), + density = c(1, 2), + angle = c(90, 60) + ), + border = data.frame( + column_values = c("ID1", "ID2"), + column_mods = c("ID1", "ID2"), + mods = c(1, 2), + labels = c("Lab1", "Lab2"), + border = c("ID1", "ID2") + ) + ) + expect_equal(scl2, scl0) +}) + +test_that("Pedigree class works", { + pedi <- Pedigree() + expect_equal(length(pedi), 0) + expect_equal(length(as.list(pedi)), 4) + expect_s4_class(scales(pedi), "Scales") + expect_s4_class(hints(pedi), "Hints") + expect_s4_class(ped(pedi), "Ped") + expect_s4_class(rel(pedi), "Rel") + expect_equal(horder(pedi), numeric()) + expect_equal(dim(spouse(pedi)), c(0, 3)) + expect_equal(dim(fill(pedi)), c(0, 9)) + expect_equal(dim(border(pedi)), c(0, 5)) + expect_equal(length(ped(pedi)), 0) + expect_equal(length(rel(pedi)), 0) +}) diff --git a/tests/testthat/test-descendants.R b/tests/testthat/test-descendants.R index 6298b56e..ebcb1f91 100644 --- a/tests/testthat/test-descendants.R +++ b/tests/testthat/test-descendants.R @@ -1,10 +1,16 @@ test_that("descendants works", { data("sampleped") idlist <- c("1_101", "2_208") - ped <- Pedigree(sampleped) - desc <- descendants(idlist, ped) + + desc_char <- with(sampleped, descendants(c("101", "208"), id, dadid, momid)) + + pedi <- Pedigree(sampleped) + desc <- descendants(idlist, pedi) expect_equal(desc, c( "1_109", "2_212", "2_213", "2_214", "1_121", "1_122", "1_123", "1_124" )) + ped <- with(sampleped, Ped(id, sex, dadid, momid, famid, missid = "0")) + idlist <- c("101", "208") + expect_equal(descendants(idlist, ped), desc_char) }) diff --git a/tests/testthat/test-fix_parents.R b/tests/testthat/test-fix_parents.R index 888e7e7a..6a271d53 100644 --- a/tests/testthat/test-fix_parents.R +++ b/tests/testthat/test-fix_parents.R @@ -2,9 +2,9 @@ test_that("fix_parents works with number", { materdf <- data.frame(id = 1:5, momid = c(0, 1, 1, 2, 2), sex = "female") materdf$dadid <- materdf$momid * 100 materdf <- as.data.frame(lapply(materdf, as.character)) - expect_error(Pedigree(materdf)) + expect_snapshot_error(Pedigree(materdf, missid = "0")) peddf <- with(materdf, fix_parents(id, dadid, momid, sex, missid = "0")) - expect_no_error(Pedigree(peddf)) + expect_no_error(Pedigree(peddf, missid = "0")) }) test_that("fix_parents works with character", { @@ -17,51 +17,41 @@ test_that("fix_parents works with character", { "fam107", "fam107", "fam107", "fam112" ) ) - expect_error(Pedigree(test1char)) + expect_snapshot_error(Pedigree(test1char, missid = "0")) test1newmom <- with(test1char, fix_parents(id, dadid, momid, sex, missid = "0") ) - expect_no_error(Pedigree(test1newmom)) + expect_no_error(Pedigree(test1newmom, missid = "0")) }) test_that("fix_parents works with sex errors", { data("sampleped") - datped2 <- sampleped[sampleped$family %in% 2, ] + datped2 <- sampleped[sampleped$famid %in% 2, ] datped2[datped2$id %in% 203, "sex"] <- 2 datped2 <- datped2[-which(datped2$id %in% 209), ] ## this gets an error - expect_warning(Pedigree(datped2)) + expect_snapshot_error(Pedigree(datped2)) - ## This fix the error - datped2[, c("id", "momid", "dadid")] <- as.data.frame(lapply( - datped2[, c("id", "momid", "dadid")], as.character - )) - fixped2 <- with(datped2, fix_parents(id, dadid, momid, sex, missid = "0")) + ## This fix the error and keep the dataframe dimensions + fixped2 <- with(datped2, + fix_parents(id, dadid, momid, sex, missid = NA_character_) + ) + expect_equal(fixped2$sex[fixped2$id == 203], 1) + expect_contains(fixped2$id, "209") expect_no_error(Pedigree(fixped2)) }) - -test_that("fix_parents_df works with sex errors and with family", { +test_that("fix_parents works with famid", { data("sampleped") - datped2 <- sampleped[sampleped$family %in% 2, ] - # Set individual 203 as female - datped2[datped2$id %in% 203, "sex"] <- 2 - # Delete individual 209 from id - datped2 <- datped2[-which(datped2$id %in% 209), ] + datped <- sampleped[-which(sampleped$id %in% 209), ] ## this gets an error - expect_warning(Pedigree(datped2)) + expect_snapshot_error(Pedigree(datped)) + fixped <- fix_parents(datped) - ## This fix the error and keep the dataframe dimensions - datped2[, c("id", "momid", "dadid")] <- as.data.frame(lapply( - datped2[, c("id", "momid", "dadid")], as.character - )) - fixped2 <- fix_parents(datped2, delete = TRUE) - expect_no_error(Pedigree(fixped2)) - expect_equal(dim(fixped2), c(13, 7)) - - fixped2 <- fix_parents(datped2, delete = FALSE) - expect_no_error(Pedigree(fixped2)) - expect_equal(dim(fixped2), c(14, 7)) -}) \ No newline at end of file + expect_contains(fixped$id, "2_209") + expect_equal(fixped$sex[fixped$id == "2_209"], 1) + expect_equal(fixped$famid[fixped$id == "2_209"], "2") + expect_no_error(Pedigree(fixped)) +}) diff --git a/tests/testthat/test-generate_scales.R b/tests/testthat/test-generate_scales.R index 2f9aae7e..0f5eeefb 100644 --- a/tests/testthat/test-generate_scales.R +++ b/tests/testthat/test-generate_scales.R @@ -37,10 +37,15 @@ test_that("generate aff inds works", { }) test_that("generate border works", { + vect_to_binary(ped_df$avail) border <- generate_border(ped_df$avail) - expect_equal(border$mods, c(NA, 1, 0)) - expect_equal(border$border, c("grey", "green", "black")) - expect_equal(border$labels, c("NA", "Available", "Non Available")) + expect_equal(border$mods, c(0, 1, NA, NA, NA, 0, NA, NA, 0, NA)) + expect_equal(border$avail, + c(FALSE, TRUE, NA, NA, NA, FALSE, NA, NA, FALSE, NA) + ) + expect_equal(border$sc_bord$mods, c(NA, 1, 0)) + expect_equal(border$sc_bord$border, c("grey", "green", "black")) + expect_equal(border$sc_bord$labels, c("NA", "Available", "Non Available")) }) test_that("generate fill full scale off", { @@ -51,9 +56,9 @@ test_that("generate fill full scale off", { aff_fact$labels, keep_full_scale = FALSE ) expect_equal(list_num$mods, aff_num$mods) - expect_equal(list_num$fill_scale$fill, c("white", "red", "grey")) + expect_equal(list_num$sc_fill$fill, c("white", "red", "grey")) expect_equal(list_fact$mods, aff_fact$mods) - expect_equal(list_fact$fill_scale$fill, c("grey", "white", "red")) + expect_equal(list_fact$sc_fill$fill, c("grey", "white", "red")) }) test_that("generate fill full scale on", { @@ -65,12 +70,12 @@ test_that("generate fill full scale on", { ) expect_equal( - list_num$fill_scale$fill, c("#FFFFFF", "#9AB1C4", "#36648B", + list_num$sc_fill$fill, c("#FFFFFF", "#9AB1C4", "#36648B", "#FF0000", "grey", "#F67700", "#EEEE00" ) ) expect_equal( - list_fact$fill_scale$fill, + list_fact$sc_fill$fill, c("grey", "#FFFFFF", "#FF0000", "#EEEE00", "#36648B") ) @@ -80,7 +85,7 @@ test_that("generate fill full scale on", { list_num_rev <- generate_fill(ped_df$NumOther, aff_num_notsup$affected, aff_num_notsup$labels, keep_full_scale = TRUE ) - expect_equal(list_num_rev$fill_scale$fill, c( + expect_equal(list_num_rev$sc_fill$fill, c( "#FF0000", "#F67700", "#EEEE00", "#FFFFFF", "grey", "#9AB1C4", "#36648B" )) @@ -88,31 +93,32 @@ test_that("generate fill full scale on", { test_that("generate colors works on Pedigree object", { data("sampleped") - ped <- Pedigree(sampleped[sampleped$family == "1", -1]) - ped(ped)$id <- as.numeric(ped(ped)$id) - ped_aff <- generate_colors(ped, col_aff = "id", + ped <- Pedigree(sampleped[sampleped$famid == "1", -1]) + mcols(ped)$"id_num" <- as.numeric(id(ped(ped))) + ped_aff <- generate_colors(ped, col_aff = "id_num", threshold = 120, sup_thres_aff = TRUE, add_to_scale = FALSE ) - expect_equal(ped_aff$ped$id_aff, c(rep(0, 20), rep(1, 21))) - expect_equal(ped_aff$scales$fill$fill, c("white", "red")) - expect_equal(ped_aff$scales$fill$labels, + expect_equal(mcols(ped_aff)$id_num_mods, c(rep(0, 20), rep(1, 21))) + expect_equal(sum(mcols(ped_aff)$avail_mods), 16) + expect_equal(fill(ped_aff)$fill, c("white", "red")) + expect_equal(fill(ped_aff)$labels, c("Healthy <= to 120", "Affected > to 120") ) - expect_equal(ped_aff$scales$fill$mods, c(0, 1)) - expect_equal(ped_aff$scales$fill$affected, c("FALSE", "TRUE")) + expect_equal(fill(ped_aff)$mods, c(0, 1)) + expect_equal(fill(ped_aff)$affected, c(FALSE, TRUE)) }) test_that("generate with full scale", { data("sampleped") + sampleped$val_num <- as.numeric(sampleped$id) ped <- Pedigree(sampleped) - ped <- ped[ped(ped)$family == "1", ] - ped(ped)$indId <- as.numeric(ped(ped)$indId) - ped <- generate_colors(ped, add_to_scale = FALSE, "indId", threshold = 115, + ped <- ped[famid(ped) == "1"] + ped <- generate_colors( + ped, add_to_scale = FALSE, "val_num", threshold = 115, colors_aff = c("pink", "purple"), keep_full_scale = TRUE ) - expect_equal(scales(ped)$fill$labels[c(1, 4)], + expect_equal(fill(ped)$labels[c(1, 4)], c("Healthy <= to 115 : [101,106]", "Affected > to 115 : [116,124]") ) - expect(nrow(scales(ped)$fill), 6) + expect(nrow(fill(ped)), 6) }) -TRUE \ No newline at end of file diff --git a/tests/testthat/test-is_informative.R b/tests/testthat/test-is_informative.R index e438250c..86b404a8 100644 --- a/tests/testthat/test-is_informative.R +++ b/tests/testthat/test-is_informative.R @@ -3,8 +3,8 @@ test_that("is_informative works", { # Test for character id <- as.character(sampleped$id) - avail <- sampleped$available - affected <- sampleped$affected + avail <- sampleped$avail + affected <- sampleped$affection expect_equal(is_informative(id, avail, affected), c( @@ -25,28 +25,24 @@ test_that("is_informative works", { 23 ) expect_equal( - is_informative(id, avail, affected, informative = c(1, 110, 150, 214)), + is_informative( + id, avail, affected, + informative = c("1", "110", "150", "214") + ), c("110", "214") ) + length(id) expect_equal( - is_informative(id, avail, affected, informative = c(TRUE, FALSE, TRUE)), + is_informative( + id, avail, affected, + informative = c(TRUE, FALSE, TRUE, rep(FALSE, 52)) + ), c("101", "103") ) expect_equal( length(is_informative(id, avail, affected, informative = "All")), 55 ) - - sampleped$avail <- sampleped$available - sampleped$id <- as.character(sampleped$id) - expect_equal(with(sampleped, - is_informative(id, avail, affected, informative = "AvAf") - ), - c( - "110", "116", "118", "119", "124", "127", - "128", "201", "203", "206", "207", "214" - ) - ) }) test_that("is_informative works with Pedigree", { @@ -59,47 +55,49 @@ test_that("is_informative works with Pedigree", { ) - df <- is_informative(ped, col_aff = "affection_aff", + ped_upd <- is_informative(ped, col_aff = "affection_mods", informative = "AvAf" - )$ped + ) + expect_equal( - df$id[df$id_inf == 1], + id(ped(ped_upd))[isinf(ped(ped_upd)) == TRUE], c( "1_110", "1_116", "1_118", "1_119", "1_124", "1_127", "1_128", "2_201", "2_203", "2_206", "2_207", "2_214" ) ) ped <- Pedigree(sampleped[c(2:5, 7)]) - expect_error(is_informative(ped, informative = "AvAf")) - expect_error(is_informative(ped, column = "test", informative = "AvAf")) + expect_snapshot_error(is_informative( + ped, col_aff = "test", informative = "AvAf" + )) + ped <- generate_colors(ped, col_aff = "sex", mods_aff = "male", add_to_scale = FALSE ) expect_equal( - sum(is_informative(ped, col_aff = "sex_aff", - informative = "Af" - )$ped$id_inf), - length(ped(ped)[ped(ped)$sex == "male", "id"]) + sum(isinf(ped(is_informative( + ped, col_aff = "sex_mods", informative = "Af" + )))), + length(ped(ped, "id")[ped(ped, "sex") == "male"]) ) data(minnbreast) - summary(minnbreast) ped <- Pedigree(minnbreast, cols_ren_ped = list( "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", - "gender" = "sex" - )) + "gender" = "sex", + "family" = "famid" + ), missid = "0") ped <- generate_colors(ped, col_aff = "education", threshold = 3, sup_thres_aff = TRUE, keep_full_scale = TRUE, add_to_scale = FALSE ) expect_equal( - sum(is_informative(ped, - col_aff = "education_aff", informative = "Af" - )$ped$id_inf - ), + sum(isinf(ped(is_informative( + ped, col_aff = "education_mods", informative = "Af" + )))), sum(minnbreast$education > 3, na.rm = TRUE) ) }) diff --git a/tests/testthat/test-kindepth.R b/tests/testthat/test-kindepth.R index 7a832def..d65884fa 100644 --- a/tests/testthat/test-kindepth.R +++ b/tests/testthat/test-kindepth.R @@ -1,15 +1,15 @@ -test_that("fix_parents_df works with sex errors and with family", { +test_that("Kindepth works", { data("sampleped") - datped2 <- sampleped[sampleped$family %in% 2, ] + datped2 <- sampleped[sampleped$famid %in% 2, ] ## this gets an error ped <- Pedigree(datped2) expect_equal( - kindepth(ped, align = TRUE), + kindepth(ped, align_parents = TRUE), c(0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2) ) expect_equal( - kindepth(ped, align = FALSE), + kindepth(ped, align_parents = FALSE), c(0, 0, 0, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2) ) @@ -17,7 +17,49 @@ test_that("fix_parents_df works with sex errors and with family", { ped <- Pedigree(minnbreast, cols_ren_ped = list( "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", "gender" = "sex", "family" = "famid" - )) + ), missid = "0") expect_equal(sum(kindepth(ped)), 33147) - expect_equal(sum(kindepth(ped, align = TRUE)), 39087) -}) \ No newline at end of file + expect_equal(sum(kindepth(ped, align_parents = TRUE)), 39091) + + df <- data.frame( + id = 1:7, + dadid = c(0, 0, 0, 1, 3, 0, 3), + momid = c(0, 0, 0, 2, 4, 0, 6), + sex = c(1, 2, 1, 2, 1, 2, 1) + ) + pedi <- Pedigree(df, missid = "0") + expect_equal(kindepth(pedi, align_parents = TRUE), c(0, 0, 1, 1, 2, 1, 2)) + expect_equal(kindepth(pedi), c(0, 0, 0, 1, 2, 0, 1)) + + ## Uncle / Niece spouse + df <- data.frame( + id = 1:7, + dadid = c(0, 0, 0, 1, 1, 3, 5), + momid = c(0, 0, 0, 2, 2, 4, 6), + sex = c(1, 2, 1, 2, 1, 2, 1) + ) + pedi <- Pedigree(df, missid = "0") + expect_equal(kindepth(pedi, align_parents = TRUE), c(0, 0, 1, 1, 1, 2, 3)) + vdiffr::expect_doppelganger("Niece Uncle spouse", + function() plot(pedi) + ) + + ## Double marriage + ## reported on github in 2023 + ## version 1.9.6 failed to plot subject 3 second marriage and kids + ## fix in 9/2023 to revert to some version 1.8.5 version of kindepth + df <- data.frame( + id = 1:12, + dadid = c(0, 0, 1, 0, 0, 0, 3, 3, 5, 5, 7, 10), + momid = c(0, 0, 2, 0, 0, 0, 4, 4, 6, 6, 9, 8), + sex = c(1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2) + ) + pedi <- Pedigree(df, missid = "0") + expect_equal( + kindepth(pedi, align_parents = TRUE), + c(0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3) + ) + vdiffr::expect_doppelganger("Double marriage", + function() plot(pedi) + ) +}) diff --git a/tests/testthat/test-kinship.R b/tests/testthat/test-kinship.R index 677e2066..7eb635cb 100644 --- a/tests/testthat/test-kinship.R +++ b/tests/testthat/test-kinship.R @@ -39,19 +39,19 @@ test_that("kinship works", { ## Test with no special relationship kmat_char <- with(twindat, kinship(id, dadid, momid)) - tped <- Pedigree(twindat) + tped <- Pedigree(twindat, missid = "0") kmat_ped <- kinship(tped) expect_equal(kmat_char, kmat_ped) ## Test with no special relationship with chr_type to X kmat_char <- with(twindat, kinship(id, dadid, momid, sex, chrtype = "X")) - tped <- Pedigree(twindat) + tped <- Pedigree(twindat, missid = "0") kmat_ped <- kinship(tped, chrtype = "X") expect_equal(kmat_char, kmat_ped) ## Test with monozygotic relationship - tped <- Pedigree(twindat, relate) + tped <- Pedigree(twindat, relate, missid = "0") kmat <- kinship(tped) ## should show kinship coeff of 0.5 for where MZ twins are @@ -71,7 +71,7 @@ test_that("kinship works", { relate$id2 <- match(relate$id2, indx) - 1 # Build the Pedigree and kinship - tped <- Pedigree(twindat, relate) + tped <- Pedigree(twindat, relate, missid = "0") kmat <- kinship(tped) truth <- matrix( @@ -103,7 +103,7 @@ test_that("Kinship Claus Ekstrom 09/2012", { ) relation <- data.frame(id1 = c(3), id2 = c(4), famid = c(1), code = c(1)) - ped <- Pedigree(mydata, relation) + ped <- Pedigree(mydata, relation, missid = "0") kmat <- kinship(ped) expect_true(all(kmat[3:4, 3:4] == 0.5)) @@ -130,14 +130,14 @@ test_that("kinship works with X chromosoms", { names(ped2df) <- c("fam", "id", "dadid", "momid", "sex") rel_df <- as.data.frame(matrix(c(8, 9, 1), ncol = 3)) names(rel_df) <- c("id1", "id2", "code") - ped2 <- Pedigree(ped2df, rel_df) + ped2 <- Pedigree(ped2df, rel_df, missid = "0") ## regular kinship matrix expect_snapshot(kinship(ped2)) expect_snapshot(kinship(ped2, chr = "X")) ped3 <- ped2 - ped3$ped$sex[10] <- "unknown" + sex(ped(ped3))[10] <- "unknown" ## regular again, should be same as above expect_equal(kinship(ped2), kinship(ped3)) @@ -162,12 +162,12 @@ test_that("Kinship with 2 different family", { ), ncol = 5, byrow = TRUE) ped2df <- as.data.frame(ped2mat) - names(ped2df) <- c("family", "id", "dadid", "momid", "sex") + names(ped2df) <- c("famid", "id", "dadid", "momid", "sex") ## testing when only one subject in a family peddf <- rbind(ped2df, c(2, 1, 0, 0, 1)) - peds <- Pedigree(peddf) + peds <- Pedigree(peddf, missid = "0") kinfam <- kinship(peds) expect_true(all(kinfam["2_1", 1:10] == 0)) @@ -176,7 +176,7 @@ test_that("Kinship with 2 different family", { c(2, 2, 0, 0, 2), c(2, 3, 1, 2, 1) ) - peds <- Pedigree(peddf) + peds <- Pedigree(peddf, missid = "0") kin2fam <- kinship(peds) expect_true(all(kin2fam[11:13, 1:10] == 0)) }) diff --git a/tests/testthat/test-make_famid.R b/tests/testthat/test-make_famid.R index b1ec745b..a351303b 100644 --- a/tests/testthat/test-make_famid.R +++ b/tests/testthat/test-make_famid.R @@ -1,12 +1,3 @@ -test_that("make_famid works", { - id <- as.character(1:20) - mom <- as.character(c(0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 2, 7, 7, 11, 14)) - dad <- as.character(c(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 3, 3, 0, 3, 0, 3, 8, 8, 10, 13)) - famid <- c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) - temp <- make_famid(id, mom, dad) - expect_equal(temp, famid) -}) - rel_df <- c( 213, 214, 1, 3, 210, 211, 2, 3, @@ -15,9 +6,55 @@ rel_df <- c( ) rel_df <- matrix(rel_df, ncol = 4, byrow = TRUE) -dimnames(rel_df) <- list(NULL, c("id1", "id2", "code", "family")) +dimnames(rel_df) <- list(NULL, c("id1", "id2", "code", "famid")) rel_df <- data.frame(rel_df) +test_that("upd_famid_id works", { + id <- c("A_1", "B_", "_3", "4", "E_5_A", "NA", NA_character_) + famid <- c(1, 2, 3, 4, 5, 6, 7) + expect_equal( + upd_famid_id(id, famid), + c("1_1", "2_", "3_3", "4_4", "5_5_A", "6_NA", NA_character_) + ) + + data("sampleped") + + pedi <- Pedigree(sampleped[-1], rel_df[c(1:3)]) + pedi <- make_famid(pedi) + ids_all <- paste(famid(pedi), c(101:141, 201:214), sep = "_") + expect_equal( + id(upd_famid_id(ped(pedi), famid(pedi))), + ids_all + ) + expect_equal( + id(upd_famid_id(ped(pedi))), + ids_all + ) + expect_equal( + id(ped(upd_famid_id(pedi, famid(pedi)))), + ids_all + ) + expect_equal( + id1(rel(upd_famid_id(pedi))), + c("2_213", "2_210", "1_140", "1_133") + ) +}) + +test_that("make_famid works", { + id <- as.character(1:20) + mom <- as.character(c( + 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 2, 7, 7, 11, 14 + )) + dad <- as.character(c( + 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 3, 3, 0, 3, 0, 3, 8, 8, 10, 13 + )) + famid <- as.character( + c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) + ) + temp <- make_famid(id, mom, dad) + expect_equal(temp, famid) +}) + test_that("make_famid works with Pedigree", { ## Simple case with no family id data("sampleped") @@ -25,19 +62,21 @@ test_that("make_famid works with Pedigree", { ped <- make_famid(ped) ## Expected values - fam <- sampleped$family + fam <- sampleped$famid fam[sampleped$id == "113"] <- 0 # singleton id <- paste(fam, c(101:141, 201:214), sep = "_") - expect_equal(ped(ped)$id, id) - expect_equal(rel(ped)$id1, c("2_213", "2_210", "1_140", "1_133")) + expect_equal(id(ped(ped)), id) + expect_equal(id1(rel(ped)), c("2_213", "2_210", "1_140", "1_133")) ## Updating already present family id data("sampleped") - sampleped$family[sampleped$family == "2"] <- 3 + sampleped$famid[sampleped$famid == "2"] <- 3 + rel_df[c(1:3)] ped <- Pedigree(sampleped, rel_df) + ped ped <- make_famid(ped) - expect_equal(ped(ped)$id, id) - expect_equal(rel(ped)$id1, c("2_213", "2_210", "1_140", "1_133")) + expect_equal(id(ped(ped)), id) + expect_equal(id1(rel(ped)), c("2_213", "2_210", "1_140", "1_133")) }) test_that("Family check works", { @@ -46,7 +85,7 @@ test_that("Family check works", { ## check them giving separate ped ids fcheck_df_sep <- with(sampleped, - family_check(id, dadid, momid, family) + family_check(id, dadid, momid, famid) ) fcheck_ped_sep <- family_check(ped) expect_equal(as.numeric(as.vector(fcheck_df_sep[1, ])), c(1, 41, 1, 1, 0)) @@ -56,7 +95,7 @@ test_that("Family check works", { fcheck_df_combined <- with(sampleped, family_check( as.character(id), dadid, momid, rep(1, nrow(sampleped)) )) - sampleped$family[sampleped$family == "2"] <- 1 + sampleped$famid[sampleped$famid == "2"] <- 1 ped <- Pedigree(sampleped) fcheck_ped_combined <- family_check(ped) expect_equal(as.numeric(as.vector(fcheck_df_combined[1, ])), @@ -80,4 +119,3 @@ test_that("Family check works", { ) }) - diff --git a/tests/testthat/test-max_kin_inf.R b/tests/testthat/test-max_kin_inf.R deleted file mode 100644 index 1d4798ee..00000000 --- a/tests/testthat/test-max_kin_inf.R +++ /dev/null @@ -1,36 +0,0 @@ -test_that("min_dist_inf works", { - data("sampleped") - colnames(sampleped) <- c("ped", "id", "dadid", "momid", - "sex", "affected", "avail" - ) - sampleped[c("id", "dadid", "momid")] <- as.data.frame(lapply( - sampleped[c("id", "dadid", "momid")], as.character - ), stringsAsFactors = FALSE) - summary(sampleped) - - res <- with(sampleped, min_dist_inf(id, informative = "AvAf", - dadid, momid, sex, avail, affected - )) - expect_equal(sum(res, na.rm = TRUE), 97) - mxkin <- with(sampleped, min_dist_inf(id, informative = "Av", - dadid, momid, sex, avail, affected - )) - expect_equal(sum(mxkin, na.rm = TRUE), 90) - mxkin <- with(sampleped, min_dist_inf(id, informative = "AvOrAf", - dadid, momid, sex, avail, affected - )) - expect_equal(sum(mxkin, na.rm = TRUE), 76) -}) - -test_that("min_dist_inf works with Pedigree", { - data("sampleped") - ped <- Pedigree(sampleped) - ped <- generate_colors(ped, col_aff = "affection", - threshold = 0.5, sup_thres_aff = TRUE - ) - - mxkin <- min_dist_inf(ped, col_aff = "affection_aff", informative = "Av") - expect_s4_class(mxkin, "Pedigree") - expect_equal(sum(mxkin$ped$kin, na.rm = TRUE), 90) -}) -TRUE diff --git a/tests/testthat/test-min_dist_inf.R b/tests/testthat/test-min_dist_inf.R new file mode 100644 index 00000000..3a83a4a0 --- /dev/null +++ b/tests/testthat/test-min_dist_inf.R @@ -0,0 +1,47 @@ +test_that("min_dist_inf works", { + data("sampleped") + colnames(sampleped) <- c("ped", "id", "dadid", "momid", + "sex", "affected", "avail" + ) + sampleped[c("id", "dadid", "momid")] <- as.data.frame(lapply( + sampleped[c("id", "dadid", "momid")], as.character + ), stringsAsFactors = FALSE) + + id_inf <- with(sampleped, is_informative( + id, avail, affected, informative = "AvAf" + )) + + res <- with(sampleped, + min_dist_inf(id, dadid, momid, sex, id_inf + ) + ) + expect_equal(sum(res, na.rm = TRUE), 97) + + id_inf <- with(sampleped, is_informative( + id, avail, affected, informative = "Av" + )) + mxkin <- with(sampleped, + min_dist_inf(id, dadid, momid, sex, id_inf) + ) + expect_equal(sum(mxkin, na.rm = TRUE), 90) + + id_inf <- with(sampleped, is_informative( + id, avail, affected, informative = "AvOrAf" + )) + mxkin <- with(sampleped, + min_dist_inf(id, dadid, momid, sex, id_inf) + ) + expect_equal(sum(mxkin, na.rm = TRUE), 76) +}) + +test_that("min_dist_inf works with Pedigree", { + data("sampleped") + ped <- Pedigree(sampleped) + ped <- generate_colors(ped, col_aff = "affection", + threshold = 0.5, sup_thres_aff = TRUE + ) + expect_equal(sum(affected(ped(ped)), na.rm = TRUE), 23) + mxkin <- min_dist_inf(ped, col_aff = "affection_mods", informative = "Av") + expect_s4_class(mxkin, "Pedigree") + expect_equal(sum(kin(ped(mxkin)), na.rm = TRUE), 90) +}) diff --git a/tests/testthat/test-norm_data.R b/tests/testthat/test-norm_data.R index 7243831e..2d7b2970 100644 --- a/tests/testthat/test-norm_data.R +++ b/tests/testthat/test-norm_data.R @@ -20,9 +20,9 @@ test_that("Norm ped", { ped_df <- suppressWarnings(norm_ped( ped_df, na_strings = c("None", "NA") )) - expect_equal(dim(ped_df), c(10, 20)) + expect_equal(dim(ped_df), c(10, 21)) expect_snapshot(ped_df) - expect_equal(sum(is.na(ped_df$error)), 3) + expect_equal(sum(is.na(ped_df$error)), 4) }) test_that("Norm rel", { @@ -39,31 +39,26 @@ test_that("Norm rel", { ) rel_df <- matrix(rel_df, ncol = 4, byrow = TRUE) - dimnames(rel_df) <- list(NULL, c("indId1", "indId2", "code", "family")) + dimnames(rel_df) <- list(NULL, c("id1", "id2", "code", "family")) rel_df <- data.frame(rel_df) rel_df <- norm_rel(rel_df) - expect_equal(dim(rel_df), c(9, 7)) + expect_equal(dim(rel_df), c(9, 5)) expect_snapshot(rel_df) expect_equal(sum(is.na(rel_df$error)), 6) -}) - -test_that("prefix_famid works", { - family_id <- NULL - ind_id <- c("A", "B", "0", NA) - missid <- "0" - - a <- prefix_famid(family_id, ind_id, missid) - expect_equal(a, ind_id) - - family_id <- "1" - b <- prefix_famid(family_id, ind_id, missid) - expect_equal(b, c("1_A", "1_B", "0", NA)) - - family_id <- c("1", "2", "0", NA) - c <- prefix_famid(family_id, ind_id, missid) - expect_equal(c, c("1_A", "2_B", "0", NA)) - family_id <- c("1", "2", "0") - expect_error(prefix_famid(family_id, ind_id, missid)) -}) \ No newline at end of file + rel_df <- c( + 1, 2, 1, + 1, 3, 2, + 2, 3, 3, + 1, 2, 4, + 3, 4, "MZ twin", + 6, 7, "Other", + 8, "8", "spo Use", + 9, "0", "4" + ) + rel_df <- matrix(rel_df, ncol = 4, byrow = TRUE) + dimnames(rel_df) <- list(NULL, c("id1", "id2", "code", "family")) + rel_df <- data.frame(rel_df) + expect_snapshot(norm_rel(rel_df, missid = "0")) +}) diff --git a/tests/testthat/test-num_child.R b/tests/testthat/test-num_child.R index f8518400..7417b160 100644 --- a/tests/testthat/test-num_child.R +++ b/tests/testthat/test-num_child.R @@ -25,11 +25,7 @@ test_that("Num child", { code = c("Spouse", "Spouse", "MZ twin") ) - id <- df$id - dadid <- df$dadid - momid <- df$momid - - df_num <- num_child(id, dadid, momid, relation) + df_num <- with(df, num_child(id, dadid, momid, relation, missid = "0")) expect_equal(df_num$num_child_dir, c(5, 4, 4, 4, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0) @@ -41,8 +37,9 @@ test_that("Num child", { c(1, 1, 0, 0, 0, 0, 1, 4, 4, 0, 0, 0, 0, 0) ) - ped <- Pedigree(df, relation) - ped <- num_child(ped) - expect_equal(ped(ped)[colnames(df_num)], df_num) + ped <- Pedigree(df, relation, missid = "0") + ped <- num_child(ped, reset = TRUE) + cols <- c("num_child_dir", "num_child_tot", "num_child_ind") + rownames(df_num) <- df_num$id + expect_equal(as.data.frame(ped(ped))[cols], df_num[cols]) }) -TRUE diff --git a/tests/testthat/test-ped_to_legdf.R b/tests/testthat/test-ped_to_legdf.R index 08c4580a..59e9d1e1 100644 --- a/tests/testthat/test-ped_to_legdf.R +++ b/tests/testthat/test-ped_to_legdf.R @@ -15,22 +15,21 @@ new_par <- list( test_that("Pedigree legend works", { data("sampleped") + sampleped$val_num <- as.numeric(sampleped$id) ped <- Pedigree(sampleped) - ped <- ped[ped(ped)$family == "1", ] - ped <- generate_colors(ped, add_to_scale = TRUE, "avail") - ped(ped)$indId <- as.numeric(ped(ped)$indId) + ped <- ped[ped(ped, "famid") == "1"] + ped <- generate_colors(ped, add_to_scale = TRUE, "avail", mods_aff = TRUE) ped <- generate_colors(ped, - add_to_scale = TRUE, "indId", threshold = 115, + add_to_scale = TRUE, "val_num", threshold = 115, colors_aff = c("pink", "purple"), keep_full_scale = TRUE ) - lst <- ped_to_legdf(ped, boxh = 1, boxw = 1, cex = 0.8) expect_snapshot(lst) vdiffr::expect_doppelganger("Legend alone", function() { - plot_fromdf(lst$leg_df, - usr = c(-1, max(lst$leg_df$x0) + 1, -1, max(lst$leg_df$y0) + 1), + plot_fromdf(lst$df, + usr = c(-1, max(lst$df$x0) + 1, -1, max(lst$df$y0) + 1), add_to_existing = FALSE ) } diff --git a/tests/testthat/test-pedigreeClass.R b/tests/testthat/test-pedigreeClass.R index 515f73f2..aea6b3e5 100644 --- a/tests/testthat/test-pedigreeClass.R +++ b/tests/testthat/test-pedigreeClass.R @@ -4,22 +4,23 @@ test_that("Pedigree works", { dadid = character(), momid = character(), sex = numeric(), - family = character(), - available = numeric(), - affected = numeric() + famid = character(), + avail = numeric(), + affection = numeric() )) expect_s4_class(ped, "Pedigree") - expect_equal(nrow(ped@ped), 0) - expect_equal(nrow(ped@rel), 0) - expect_equal(length(ped@scales), 2) - expect_equal(length(ped@scales$fill), 9) - expect_equal(length(ped@scales$border), 4) + expect_equal(length(ped@ped), 0) + expect_equal(length(ped@rel), 0) + expect_equal(dim(fill(ped)), c(0, 9)) + expect_equal(dim(border(ped)), c(0, 5)) + expect_equal(dim(spouse(ped)), c(0, 3)) + expect_equal(length(horder(ped)), 0) }) test_that("Pedigree old usage compatibility", { data(sampleped) ped1 <- with(sampleped, - Pedigree(id, dadid, momid, sex, family, available, affected) + Pedigree(id, dadid, momid, sex, famid, avail, affection) ) expect_equal(ped1, Pedigree(sampleped)) @@ -37,22 +38,25 @@ test_that("Pedigree old usage compatibility", { ), ncol = 5, byrow = TRUE) ped2df <- as.data.frame(ped2mat) - names(ped2df) <- c("family", "id", "dadid", "momid", "sex") + names(ped2df) <- c("famid", "id", "dadid", "momid", "sex") + ped2df$id <- as.integer(ped2df$id) ## 1 2 3 4 5 6 7 8 9 10,11,12,13,14,15,16 ped2df$disease <- c(NA, NA, 1, 0, 0, 0, 0, 1, 1, 1) ped2df$smoker <- c(0, NA, 0, 0, 1, 1, 1, 0, 0, 0) ped2df$available <- c(0, 0, 1, 1, 0, 1, 1, 1, 1, 1) ped2df$status <- c(1, 1, 1, 0, 1, 0, 0, 0, 0, 0) - ped2 <- with(ped2df, Pedigree(id, dadid, momid, sex, family, + ## With vectors + ped2 <- with(ped2df, Pedigree(id, dadid, momid, sex, famid, available, status, affected = cbind(disease, smoker, available), - relation = matrix(c(8, 9, 1, 1), ncol = 4) + rel_df = matrix(c(8, 9, 1, 1), ncol = 4), missid = "0" )) - rel_df <- data.frame(id1 = 8, id2 = 9, code = 1, family = 1) + ## With dataframes + rel_df <- data.frame(id1 = 8, id2 = 9, code = 1, famid = 1) expect_equal(ped2, Pedigree(ped2df, col_aff = c("disease", "smoker", "available"), - rel_df + rel_df, missid = "0" ) ) }) @@ -60,33 +64,30 @@ test_that("Pedigree old usage compatibility", { test_that("Pedigree from sampleped and affectation", { # Here is a case where the levels fail to line up properly data("sampleped") - df1 <- sampleped[sampleped$family == 1, ] - colnames(df1) + df1 <- sampleped[sampleped$famid == 1, ] ped1 <- Pedigree(df1, cols_ren_ped = list( "indId" = "id", "fatherId" = "dadid", "motherId" = "momid", "gender" = "sex", "available" = "avail", - "affection" = "affected" + "affection" = "affected", + "family" = "famid" )) - expect_equal(nrow(ped1@ped), 41) - expect_equal(ncol(ped1@ped), 19) - expect_equal(nrow(ped1@rel), 0) - expect_equal(ncol(ped1@rel), 7) - - expect_error(ped1$ped$id <- "1") - expect_error(ped1$ped$id[1] <- "1") - expect_error(ped1$ped$id[1] <- "102") - expect_error(ped1$ped$dadid[1] <- "101") - expect_no_error(ped1$ped$dadid[3] <- "1_103") - expect_warning(expect_error(ped1$ped$sex[3] <- "103")) - expect_error(ped1$ped$sex[3] <- "female") - expect_error(ped1$ped$sex[3] <- "unknown") - expect_no_error(ped1$ped$sex[41] <- "unknown") - - expect_equal(as.data.frame(ped1), ped1$ped) + expect_equal(dim(as.data.frame(ped(ped1))), c(41, 27)) + expect_equal(dim(as.data.frame(rel(ped1))), c(0, 4)) + + expect_error(id(ped(ped1)) <- "1") + expect_error(id(ped(ped1))[1] <- "1") + expect_error(id(ped(ped1))[1] <- "102") + expect_no_error(id(ped(ped1))[41] <- "142") + expect_equal(id(ped(ped1))[41], "142") + expect_no_error(dadid(ped(ped1))[3] <- "1_103") + expect_warning(expect_error(sex(ped(ped1))[3] <- "103")) + expect_error(sex(ped(ped1))[3] <- "female") + expect_error(sex(ped(ped1))[3] <- "unknown") + expect_no_error(sex(ped(ped1))[41] <- "male") }) test_that("Pedigree subscripting", { @@ -95,71 +96,53 @@ test_that("Pedigree subscripting", { "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", "gender" = "sex", "family" = "famid", "affection" = "cancer" - )) - expect_equal(nrow(ped(minnped)), 28081) - expect_equal(ncol(minnped[["ped"]]), 28) + ), missid = "0") + expect_equal(length(minnped), 28081) + expect_equal(dim(as.data.frame(ped(minnped))), c(28081, 36)) - ped8 <- minnped[ped(minnped)$family == "8", - c("id", "dadid", "momid", "sex", "affection") - ] + ped8 <- minnped[famid(ped(minnped)) == "8"] - expect_equal(nrow(ped8$ped), 40) - expect_equal(ncol(ped8$ped), 11) + expect_equal(dim(as.data.frame(ped(ped8))), c(40, 36)) # Subjects 150, 152, 154, 158 are children, # and 143, 162, 149 are parents and a child droplist <- paste("8", c(150, 152, 154, 158, 143, 162, 149), sep = "_") - keep1 <- !(ped8$ped$id %in% droplist) # logical + keep1 <- !(id(ped(ped8)) %in% droplist) # logical keep2 <- which(keep1) # numeric - keep3 <- as.character(ped8$ped$id[keep1]) # character + keep3 <- as.character(id(ped(ped8))[keep1]) # character keep4 <- factor(keep3) - test1 <- ped8[keep1, ] - test2 <- ped8[keep2, ] - test3 <- ped8[keep3, ] - test4 <- ped8[keep4, ] + test1 <- ped8[keep1] + test2 <- ped8[keep2] + test3 <- ped8[keep3] + test4 <- ped8[keep4] expect_equal(test1, test2) expect_equal(test1, test3) expect_equal(test1, test4) - pedcol <- minnped[, c("id", "dadid", "momid", "sex", "affection")] - expect_equal(nrow(pedcol$ped), 28081) - expect_equal(ncol(pedcol$ped), 11) pedrow <- minnped[c("8_150", "8_163", "8_145", "8_135", "8_136")] - expect_equal(nrow(pedrow$ped), 5) - expect_equal(ncol(pedrow$ped), 28) -}) - -test_that("Pedigree to dataframe", { - data("sampleped") - ped <- Pedigree(sampleped) - expect_equal(dim(as.data.frame(ped)), c(55, 19)) -}) - -test_that("Pedigree length", { - data("sampleped") - ped <- Pedigree(sampleped) - expect_equal(length(ped), 55) + expect_equal(length(pedrow), 5) }) -test_that("Pedigree getters", { +test_that("Pedigree generic", { data("sampleped") - ped <- Pedigree(sampleped) - expect_equal(ped$ped, ped(ped)) - expect_equal(ped$rel, rel(ped)) - expect_equal(ped$scales, scales(ped)) - expect_equal(ped$hints, hints(ped)) - scales(ped)$fill + pedi <- Pedigree(sampleped) + expect_equal(dim(as.data.frame(ped(pedi))), c(55, 27)) + expect_equal(names(as.list(pedi)), c("ped", "rel", "scales", "hints")) + expect_equal(length(pedi), 55) }) -test_that("Pedigree setters", { +test_that("Pedigree accessors", { data("sampleped") - ped <- Pedigree(sampleped) - peddf <- ped(ped) - peddf[1, "family"] <- "2" - ped(ped) <- peddf - expect_equal(ped(ped)[1, "family"], "2") + pedi <- Pedigree(sampleped) + expect_equal(pedi@ped, ped(pedi)) + expect_equal(pedi@rel, rel(pedi)) + expect_equal(pedi@hints, hints(pedi)) + expect_equal(pedi@hints@horder, horder(pedi)) + expect_equal(pedi@hints@spouse, spouse(pedi)) + expect_equal(pedi@scales@fill, fill(pedi)) + expect_equal(pedi@scales@border, border(pedi)) }) diff --git a/tests/testthat/test-plot.R b/tests/testthat/test-plot.R index 8019bb07..3956cb0c 100644 --- a/tests/testthat/test-plot.R +++ b/tests/testthat/test-plot.R @@ -1,4 +1,4 @@ -test_that("Pedigree other test", { +test_that("Pedigree plotting test", { ped2mat <- matrix( c( 1, 1, 0, 0, 1, 1, 0, @@ -13,7 +13,6 @@ test_that("Pedigree other test", { 1, 10, 8, 7, 2, 0, 0 ), ncol = 7, byrow = TRUE ) - withr::local_options(width = 50) ped2df <- as.data.frame(ped2mat) names(ped2df) <- c("family", "indId", "fatherId", "motherId", "gender", "affection", "available" @@ -25,8 +24,8 @@ test_that("Pedigree other test", { ped2df$vitalStatus <- c(1, 1, 1, 0, 1, 0, 0, 8, 0, 0) rel_df <- data.frame(indId1 = 8, indId2 = 9, code = 3, family = 1) - ped <- Pedigree(ped2df, rel_df) - + ped <- Pedigree(ped2df, rel_df, missid = "0") + ped vdiffr::expect_doppelganger("Ped simple affection", function() plot(ped) ) @@ -42,7 +41,7 @@ test_that("Pedigree other test", { lst <- ped_to_plotdf(ped) expect_equal(length(lst), 2) - expect_equal(dim(lst$df), c(88, 15)) + expect_equal(dim(lst$df), c(82, 15)) expect_snapshot(lst) p <- plot(ped, title = "Pedigree", ggplot_gen = TRUE) vdiffr::expect_doppelganger("Ped 2 affections ggplot", @@ -53,7 +52,7 @@ test_that("Pedigree other test", { test_that("Pedigree fails to line up", { # Here is a case where the levels fail to line up properly data(sampleped) - df1 <- sampleped[sampleped$family == "1", ] + df1 <- sampleped[sampleped$famid == "1", ] ped1 <- Pedigree(df1) vdiffr::expect_doppelganger("ped1", function() plot(ped1) diff --git a/tests/testthat/test-shrink.R b/tests/testthat/test-shrink.R index 14894b7d..4ad0be38 100644 --- a/tests/testthat/test-shrink.R +++ b/tests/testthat/test-shrink.R @@ -6,10 +6,10 @@ test_that("Pedigree shrink works", { ped_mb <- Pedigree(minnbreast, cols_ren_ped = list(fatherId = "fatherid", motherId = "motherid", indId = "id", gender = "sex", family = "famid" - ) + ), missid = "0" ) ped_mb <- generate_colors(ped_mb, col_aff = "cancer", add_to_scale = FALSE) - mn2 <- ped_mb[ped_mb$ped$family == "5", ] + mn2 <- ped_mb[famid(ped_mb) == "5"] ## this Pedigree as one person with cancer. The Pedigree is not informative @@ -22,9 +22,10 @@ test_that("Pedigree shrink works", { ) ## breaks in trim - avail <- ifelse(is.na(mn2$ped$cancer), 0, mn2$ped$cancer) + avail(ped(mn2)) <- ifelse(is.na(mcols(mn2)$cancer), 0, mcols(mn2)$cancer) - mn2_s <- shrink(mn2, avail) + find_unavailable(ped(mn2)) + mn2_s <- shrink(mn2) expect_equal(mn2_s$id_lst$unavail, paste("5", c( @@ -35,12 +36,12 @@ test_that("Pedigree shrink works", { ), sep = "_") ) - mn8 <- ped_mb[ped_mb$ped$family == "8", ] + mn8 <- ped_mb[famid(ped_mb) == "8"] vdiffr::expect_doppelganger("Pedigree shrink 2", function() plot(mn8) ) - avail <- ifelse(is.na(mn8$ped$cancer), 0, mn8$ped$cancer) + avail <- ifelse(is.na(mcols(mn8)$cancer), 0, mcols(mn8)$cancer) mn8_s <- shrink(mn8, avail) @@ -59,25 +60,26 @@ test_that("Pedigree shrink error if missing info", { ## use sampleped from the package data("sampleped") ped <- Pedigree(sampleped) - ped2 <- ped[ped(ped)$family == "2", ] - ped2$ped$sex[c(13, 12)] <- c("unknown", "terminated") + ped2 <- ped[famid(ped) == "2"] + sex(ped(ped2))[c(13, 12)] <- c("unknown", "terminated") ## set 2nd col of affected to NA - ped2$ped$affected[c(7, 9)] <- NA - expect_error(shrink(ped = ped2, avail = ped2$ped$affected, max_bits = 32)) + expect_no_error(shrink(ped2, max_bits = 32)) + avail(ped(ped2))[c(7, 9)] <- NA + expect_error(shrink(ped2, max_bits = 32)) }) test_that("Pedigree shrink avail test", { ## use sampleped from the package data("sampleped") ped <- Pedigree(sampleped) - ped1 <- ped[ped(ped)$family == "1", ] + ped1 <- ped[famid(ped) == "1"] set.seed(10) - ped1_s_av_32 <- shrink(ped = ped1, max_bits = 32) + ped1_s_av_32 <- shrink(ped1, max_bits = 32) set.seed(10) - ped1_s_av_25 <- shrink(ped = ped1, max_bits = 25) + ped1_s_av_25 <- shrink(ped1, max_bits = 25) expect_equal(ped1_s_av_32$id_trim, paste("1", c( @@ -97,12 +99,12 @@ test_that("Pedigree shrink avail test", { test_that("Pedigree shrink with character", { ## use sampleped from the package data("sampleped") - sampleped$family[sampleped$family == 1] <- "A" + sampleped$famid[sampleped$famid == 1] <- "A" ped <- Pedigree(sampleped) - ped1 <- ped[ped(ped)$family == "A", ] + ped1 <- ped[famid(ped) == "A"] set.seed(100) - ped1_s_av_32 <- shrink(ped = ped1, max_bits = 32) + ped1_s_av_32 <- shrink(ped1, max_bits = 32) expect_equal(ped1_s_av_32$id_trim, c( "A_101", "A_102", "A_107", "A_108", "A_111", "A_113", "A_121", "A_122", @@ -110,7 +112,7 @@ test_that("Pedigree shrink with character", { )) set.seed(100) - ped1_s_av_25 <- shrink(ped = ped1, max_bits = 25) + ped1_s_av_25 <- shrink(ped1, max_bits = 25) expect_equal(ped1_s_av_25$id_trim, c( "A_101", "A_102", "A_107", "A_108", "A_111", "A_113", "A_121", "A_122", @@ -122,7 +124,7 @@ test_that("Pedigree shrink with character", { test_that("Shrink works", { data("sampleped") ped <- Pedigree(sampleped) - ped2 <- ped[ped(ped)$family == "2", ] + ped2 <- ped[famid(ped) == "2"] ped2_s <- shrink(ped2) vdiffr::expect_doppelganger("Whole ped", @@ -132,4 +134,3 @@ test_that("Shrink works", { function() plot(ped2_s$pedObj, title = "Shrinked ped") ) }) -TRUE diff --git a/tests/testthat/test-unavail.R b/tests/testthat/test-unavail.R index 17cf0eb4..ffc6349e 100644 --- a/tests/testthat/test-unavail.R +++ b/tests/testthat/test-unavail.R @@ -6,8 +6,7 @@ test_that("unavailable detection works", { 101, 102, 107, 108, 111, 113, 121, 122, 123, 131, 132, 134, 139 ), sep = "_"), paste("2", c(205, 210, 213), sep = "_")) ) - find_avail_affected(ped) - ped(ped)$affected[25] <- NA + affected(ped(ped))[25] <- NA expect_equal(as.vector(find_avail_affected(ped)$id_trimmed), "1_125") expect_equal(find_avail_noninform(ped), c(paste("1", c( @@ -21,8 +20,8 @@ test_that("Unrelated detection works", { ped <- Pedigree(sampleped) - ped1 <- ped[ped(ped)$family == 1, ] - ped2 <- ped[ped(ped)$family == 2, ] + ped1 <- ped[famid(ped) == 1] + ped2 <- ped[famid(ped) == 2] set.seed(10) expect_equal(unrelated(ped1), diff --git a/tests/testthat/test-useful_inds.R b/tests/testthat/test-useful_inds.R index 93d76fa9..b7547d04 100644 --- a/tests/testthat/test-useful_inds.R +++ b/tests/testthat/test-useful_inds.R @@ -12,10 +12,10 @@ test_that("useful_inds works", { )[c("id", "num_child_tot")] df <- merge(sampleped, numdf) - use_id <- with(df, + use_id_avaff <- with(df, useful_inds(id, dadid, momid, avail, affected, num_child_tot) ) - expect_equal(df$id[!df$id %in% use_id], + expect_equal(df$id[!df$id %in% use_id_avaff], c("101", "102", "107", "108", "113", "117") ) }) @@ -23,15 +23,14 @@ test_that("useful_inds works", { test_that("useful_inds works with Pedigree", { data("sampleped") ped <- Pedigree(sampleped) - ped <- num_child(ped) + ped <- useful_inds(ped, informative = "Av") - expect_equal(ped(ped)$id[!ped(ped)$useful], + expect_equal(id(ped(ped))[useful(ped(ped)) == 0], c("1_101", "1_102", "1_107", "1_108", "1_117") ) - expect_error(useful_inds(ped, informative = "AvOrAf")) + expect_snapshot_error(useful_inds(ped, informative = "AvOrAf")) ped <- useful_inds(ped, informative = "AvOrAf", reset = TRUE) - expect_equal(ped(ped)$id[!ped(ped)$useful], c("1_101", "1_108")) + expect_equal(id(ped(ped))[useful(ped(ped)) == 0], c("1_101", "1_108")) }) -TRUE diff --git a/tests/testthat/test-validity.R b/tests/testthat/test-validity.R index 984783ca..14b23038 100644 --- a/tests/testthat/test-validity.R +++ b/tests/testthat/test-validity.R @@ -12,7 +12,7 @@ test_that("check_slot_fd works", { expect_error(check_slot_fd(obj, "C", "A")) expect_equal( check_slot_fd(obj, "x", c("B", "C", "D")), - "`C, D` column(s) is not present in slot x." + "'C', 'D' column(s) is not present in slot x." ) expect_equal(check_slot_fd(lst, "X", "a"), NULL) }) @@ -20,27 +20,31 @@ test_that("check_slot_fd works", { test_that("check_values works", { expect_equal( check_values(obj@y$B, 1:15), - "Values 16, 17, 18, 19, 20 should be in 1, 2, 3, 4, 5 ...." + paste0("Values '16', '17', '18', '19', '20' ", + "should be in '1', '2', '3', '4', '5'..." + ) ) expect_equal(check_values(obj@x$A, 1:15), NULL) expect_error(check_values(obj@x[c("B", "A")], 1:15)) expect_equal( check_values(obj@x$B, 1:2), - "Values A, B, C, D, E ... should be in 1, 2." + "Values 'A', 'B', 'C', 'D', 'E'... should be in '1', '2'" ) expect_equal( check_values(obj@x$A, 1:5), - "Values 6, 7, 8, 9, 10 should be in 1, 2, 3, 4, 5." + "Values '6', '7', '8', '9', '10' should be in '1', '2', '3', '4', '5'" ) ## test present = FALSE expect_equal( check_values(obj@x$A, 1:15, present = FALSE), - "Values 1, 2, 3, 4, 5 ... should not be in 1, 2, 3, 4, 5 ...." + paste0("Values '1', '2', '3', '4', '5'... ", + "should not be in '1', '2', '3', '4', '5'..." + ) ) }) test_that("paste0max works", { - expect_equal(paste0max(1:10), "1, 2, 3, 4, 5 ...") - expect_equal(paste0max(1:3), "1, 2, 3") + expect_equal(paste0max(1:10), "'1', '2', '3', '4', '5'...") + expect_equal(paste0max(1:3), "'1', '2', '3'") }) diff --git a/vignettes/pedigree.Rmd b/vignettes/pedigree.Rmd index dab52c54..45057f06 100644 --- a/vignettes/pedigree.Rmd +++ b/vignettes/pedigree.Rmd @@ -1,5 +1,5 @@ --- -title: "Pedixplorer Pedigree" +title: "Pedixplorer tutorial" author: "Louis Le Nézet" date: "31/08/2023" url: "https://github.com/LouisLeNezet/Pedixplorer" @@ -9,44 +9,53 @@ output: toc_depth: 2 header-includes: \usepackage{tabularx} vignette: | - %\VignetteIndexEntry{Pedixplorer Pedigree} + %\VignetteIndexEntry{Pedixplorer tutorial} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} +editor_options: + markdown: + wrap: 72 --- ```{r width_control, echo = FALSE} options(width = 100) ``` -Introduction -=============== +# Introduction -This document is a tutorial for the `Pedixplorer` package, with examples of -creating Pedigree objects and kinship matrices and other Pedigree utilities. +This document is a tutorial for the `Pedixplorer` package, with examples +of creating Pedigree objects and kinship matrices and other Pedigree +utilities. -The `Pedixplorer` package contains the outines to handle family data with a -Pedigree object. -The initial purpose was to create correlation structures that describe family -relationships such as kinship and identity-by-descent, which can be used to -model family data in mixed effects models, such as in the coxme function. -It also includes tools for Pedigree drawing and filtering which is focused -on producing compact layouts without intervention. -Recent additions include utilities to trim the Pedigree object with -various criteria, and kinship for the X chromosome. +The `Pedixplorer` package is an updated version of the +[`Kinship2`](https://github.com/mayoverse/kinship2) package, featuring a +change in maintainer and repository from CRAN to Bioconductor for +continued development and support. -Suplementary vignettes are available to explain: +It contains the routines to handle family data with a Pedigree object. +The initial purpose was to create correlation structures that describe +family relationships such as kinship and identity-by-descent, which can +be used to model family data in mixed effects models, such as in the +coxme function. It also includes tools for Pedigree drawing and +filtering which is focused on producing compact layouts without +intervention. Recent additions include utilities to trim the Pedigree +object with various criteria, and kinship for the X chromosome. -- The [Pedigree() constructor]("pedigree_constructor.html") used to create a Pedigree object `vignette("Pedigree() constructor", package = "Pedixplorer")` -- The [alignment algorithm]("alignement_details.html") used create the Pedigree structure `vignette("Pedigree alignement details", package = "Pedixplorer")` -- The [kinship() algorithm]("kinship_details.html") `vignette("Pedigree kinship() details", package = "Pedixplorer")` -- The [plotting algorithm]("pedigree_plot_details.html") used to plot the Pedigree `vignette("Pedigree plotting details", package = "Pedixplorer")` +Supplementary vignettes are available to explain: +- The **$Pedigree$ object** + `vignette("pedigree_object", package = "Pedixplorer")` +- The **alignment algorithm** used create the Pedigree structure + `vignette("pedigree_alignment", package = "Pedixplorer")` +- The **kinship algorithm** + `vignette("pedigree_kinship", package = "Pedixplorer")` +- The **plotting algorithm** used to plot the Pedigree + `vignette("pedigree_plot", package = "Pedixplorer")` +# Installation -Installation -=============== -The `Pedixplorer` package is available on Bioconductor and can be installed -with the following command: +The $Pedixplorer$ package is available on Bioconductor and can be +installed with the following command: ```{r BiocManager_install, eval = FALSE} if (!requireNamespace("BiocManager", quietly = TRUE)) { @@ -61,63 +70,61 @@ The package can then be loaded with the following command: library(Pedixplorer) ``` -The Pedigree S4 object -=================== +# The Pedigree S4 object -The Pedigree object is a list of dataframes that describe the family structure. -It contains the following components: +The $Pedigree$ object is a list of dataframes that describe the family +structure. It contains the following components: -- ped: a dataframe with the Pedigree information -- rel: a dataframe with the relationship information -- scales: a list of 2 dataframe with the filling and borders -informations for the plot -- hints: a list of 2 elements indicating the order and the spouse to -organise the Pedigree structure +- ped: a $Ped$ object with the Pedigree information `help(Ped)`. +- rel: a $Rel$ object with the relationship information `help(Rel)`. +- scales: a $Scales$ object of 2 dataframe with the filling and borders + informations for the plot `help(Scales)`. +- hints: a $Hints$ objects with 2 slots indicating the horder and the + spouse to organise the Pedigree structure `help(Hints)`. -Basic Usage -============= +# Basic Usage ## Example Data -Two datasets are provided within the Pedixplorer package: -+ minnbreast: 17 families from a breast cancer study -+ sampleped: two samplepedigrees, with 41 and 14 subjects +Two datasets are provided within the $Pedixplorer$ package: + minnbreast: +17 families from a breast cancer study + sampleped: two sample pedigrees, +with 41 and 14 subjects -This vignette uses the two pedigrees in ~sampleped~. -For more information on these datasets, see -`help(minnbreast)` and `help(sampleped)`. +This vignette uses the two pedigrees in $sampleped$. For more +information on these datasets, see `help(minnbreast)` and +`help(sampleped)`. ## Pedigree -First, we load $samplepe$ and look at some of the values -in the dataset, and create a *Pedigree* object using the -*Pedigree()* function. -This function automaticaly detect the necessary columns in the dataframe. -If necessary you can modify the columns names with *cols_ren*. -To create a Pedigree object, with multiple families, the dataframe just need -a family column in the *ped_df* dataframe. When this is the case, the family -column will be pasted to the id of each individuals separated by an -underscore to create a unique id for each individual in the Pedigree object. +First, we load $sampleped$ and look at some of the values in the dataset, +and create a $Pedigree$ object using the `Pedigree()` function. This +function automaticaly detect the necessary columns in the dataframe. If +necessary you can modify the columns names with *cols_ren*. To create a +$Pedigree$ object, with multiple families, the dataframe just need a +family column in the *ped_df* dataframe. When this is the case, the +famid column will be pasted to the id of each individuals separated by +an underscore to create a unique id for each individual in the $Pedigree$ +object. ```{r, Pedigree_creation} data("sampleped") -sampleped[1:10, ] -ped <- Pedigree(sampleped) +print(sampleped[1:10, ]) +ped <- Pedigree(sampleped[c(3, 4, 10, 35, 36), ]) print(ped) ``` -For more information on the *Pedigree()* function, see `help(Pedigree)`. +For more information on the `Pedigree()` function, see `help(Pedigree)`. -The {\em Pedigree} object can be subset to individual pedigrees -by their family id. The Pedigree object has a print, summary and plot method, -which we show below. -The print method prints a short summary of the Pedigree. -The summary method prints a more detailed summary of the Pedigree. -Finally the plot method displays the Pedigree. +The $Pedigree$ object can be subset to individual pedigrees by their +family id. The $Pedigree$ object has a print, summary and plot method, +which we show below. The print method prints the $Ped$ and $Rel$ object of +the pedigree. The summary method prints a short summary of the pedigree. +Finally the plot method displays the pedigree. ```{r, ped1} -ped1 <- ped[ped(ped)$family == "1"] -print(ped1) +ped <- Pedigree(sampleped) +print(famid(ped)) +ped1 <- ped[famid(ped) == "1"] summary(ped1) plot(ped1) ``` @@ -128,35 +135,34 @@ You can add a title and a legend to the plot with the following command: plot(ped1, title = "Pedigree 1", legend = TRUE, leg_loc = c(5, 15, 4.5, 5)) ``` -Fixing Pedigree Issues -======================= +# Fixing Pedigree Issues -To "break" the Pedigree, we can manipulate the sex value to not match the -parent value (in this example, we change $203$ from a male to a female, -even though $203$ is a father). To do this, we first subset ~datped2~, -locate the *id* column, and match it to a specific id (in this case, $203$). -Within id $203$, then locate in the *sex* column. +To "break" the pedigree, we can manipulate the sex value to not match +the parent value (in this example, we change $203$ from a male to a +female, even though $203$ is a father). To do this, we first subset +$datped2$, locate the *id* column, and match it to a specific id (in +this case, $203$). Within id $203$, then locate in the *sex* column. Assign this subset to the incorrect value of *2* (female) to change the original/correct value of *1* (male). -To further break the Pedigree, we can delete subjects who seem irrelevant to -the Pedigree (in this example, we delete $209$ because he is a married-in -father). -To do this, we subset ~datped2~ and use the *-which()* function to locate and -delete the specified subject (in this case, $209$). Reassign this code to -~datped22~ to drop the specified subject entirely. +To further break the pedigree, we can delete subjects who seem +irrelevant to the pedigree (in this example, we delete $209$ because he +is a married-in father). To do this, we subset $datped2$ and use the +*-which()* function to locate and delete the specified subject (in this +case, $209$). Reassign this code to $datped22$ to drop the specified +subject entirely. ```{r, datped2} -datped2 <- sampleped[sampleped$family == 2, ] +datped2 <- sampleped[sampleped$famid == 2, ] datped2[datped2$id %in% 203, "sex"] <- 2 datped2 <- datped2[-which(datped2$id %in% 209), ] ``` -An error occurs when the *Pedigree()* function notices that id $203$ is not -coded to be male (*1*) but is a father. To correct this, we simply employ the -*fix_parents()* function to adjust the *sex* value to match either *momid* -or *dadid*. *fix_parents()* will also add back in any deleted subjects, -further fixing the Pedigree. +An error occurs when the `Pedigree()` function notices that id $203$ is +not coded to be male (*1*) but is a father. To correct this, we simply +employ the `fix_parents()` function to adjust the *sex* value to match +either *momid* or *dadid*. `fix_parents()` will also add back in any +deleted subjects, further fixing the Pedigree. ```{r, fixped2} tryout <- try({ @@ -168,46 +174,46 @@ ped2 <- Pedigree(fixped2) plot(ped2) ``` -If the fix is straightforward (changing one sex value based on either being -a mother or father), *fix_parents()* will resolve the issue. If the issue is -more complicated, say if $203$ is coded to be both a father *and* a mother, -*fix_parents()* will not know which one is correct and therefore the issue -will not be resolved. +If the fix is straightforward (changing one sex value based on either +being a mother or father), `fix_parents()` will resolve the issue. If +the issue is more complicated, say if $203$ is coded to be both a father +and a mother, `fix_parents()` will not know which one is correct and +therefore the issue will not be resolved. -Kinship -============== +# Kinship -A common use for pedigrees is to make a matrix of kinship coefficients that -can be used in mixed effect models. A kinship coefficient is the probability -that a randomly selected allele from two people at a given locus will be -identical by descent (IBD), assuming all founder alleles are independent. -For example, we each have two alleles per autosomal marker, so sampling two -alleles with replacement from our own DNA has only $p=0.50$ probability of -getting the same allele twice. +A common use for pedigrees is to make a matrix of kinship coefficients +that can be used in mixed effect models. A kinship coefficient is the +probability that a randomly selected allele from two people at a given +locus will be identical by descent (IBD), assuming all founder alleles +are independent. For example, we each have two alleles per autosomal +marker, so sampling two alleles with replacement from our own DNA has +only $p=0.50$ probability of getting the same allele twice. ## Kinship for Pedigree object -We use {\em kinship} to calculate the kinship matrix for $ped2$. The -result is a special symmetrix matrix class from the -[Matrix R package](https://CRAN.R-project.org/package=Matrix/), -which is stored efficiently to avoid repeating elements. + +We use `kinship()` to calculate the kinship matrix for $ped2$. The +result is a special symmetrix matrix class from the [Matrix R +package](https://CRAN.R-project.org/package=Matrix/), which is stored +efficiently to avoid repeating elements. ```{r, calc_kinship} kin2 <- kinship(ped2) kin2[1:9, 1:9] ``` -For family 2, see that the row and column names match the id in -the figure below, and see that each kinship coefficient with -themselves is $0.50$, siblings are $0.25$ (e.g. $204-205$), and Pedigree -marry-ins only share alleles IBD with their children with coefficient $0.25$ -(e.g. $203-210$). The plot can be used to verify other kinship coefficients. +For family 2, see that the row and column names match the id in the +figure below, and see that each kinship coefficient with themselves is +$0.50$, siblings are $0.25$ (e.g. $204-205$), and pedigree marry-ins +only share alleles IBD with their children with coefficient $0.25$ (e.g. +$203-210$). The plot can be used to verify other kinship coefficients. ## Kinship for Pedigree with multiple families -The kinship function also works on a *Pedigree* object with multiple families. -We show how to create the kinship matrix, then -show a snapshot of them for the two families, where the row and columns names -are the ids of the subject. +The `kinship()` function also works on a $Pedigree$ object with multiple +families. We show how to create the kinship matrix, then show a snapshot +of them for the two families, where the row and columns names are the +ids of the subject. ```{r, kin_all} ped <- Pedigree(sampleped) @@ -221,14 +227,13 @@ kin_all[42:46, 42:46] Specifying twin relationships in a Pedigree with multiple families object is complicated by the fact that the user must specify the family -id to which the ~id1~ and ~id2~ belong. -We show below the relation matrix requires the family id to be in the last -column, with the column names as done below, to make the plotting and -kinship matrices to show up with the monozygotic twins correctly. -We show how to specify monozygosity for subjects 206 and 207 in -samplePedigree 2, and subjects 125 and 126 in Pedigree 1. -We check it by looking at the kinship matrix for these pairs, -which are correctly at $0.5$. +id to which the *id1* and *id2* belong. We show below the relation +matrix requires the family id to be in the last column, with the column +names as done below, to make the plotting and kinship matrices to show +up with the monozygotic twins correctly. We show how to specify +monozygosity for subjects $206$ and $207$ in $ped2$, and subjects +$125$ and $126$ in $ped1$. We check it by looking at the kinship matrix +for these pairs, which are correctly at $0.5$. ```{r, kin_twins} reltwins <- as.data.frame(rbind(c(206, 207, 1, 2), c(125, 126, 1, 1))) @@ -240,43 +245,45 @@ kin_all[24:27, 24:27] kin_all[46:50, 46:50] ``` -Note that subject $113$ is not in Pedigree 1 because they -are a marry-in without children in the Pedigree. Subject $113$ is in their own -Pedigree of size 1 in the $kin_all$ matrix at index $41$. We later show how -to handle such marry-ins for plotting. +Note that subject $113$ is not in $ped1$ because they are a marry-in +without children in the $Pedigree$. Subject $113$ is in their own $Pedigree$ +of size 1 in the $kin_all$ matrix at index $41$. We later show how to +handle such marry-ins for plotting. -Optional Pedigree Informations -=============================== +# Optional Pedigree Informations -We use Pedigree 2 from $sampleped$ to sequentially -add optional information to the Pedigree object. +We use $ped2$ from $sampleped$ to sequentially add optional +information to the $Pedigree$ object. ## Status -The example below shows how to specify a $status$ indicator, such as -vital status. The $sampleped$ data does not include such an -indicator, so we create one to indicate that the first generation of -Pedigree 2, subjects 1 and 2, are deceased. The $status$ indicator -is used to cross out the individuals in the Pedigree plot. + +The example below shows how to specify a $status$ indicator, such as +vital status. The $sampleped$ data does not include such an\ +indicator, so we create one to indicate that the first generation of +$ped2$, subjects $1$ and $2$, are deceased. The $status$ indicator is +used to cross out the individuals in the Pedigree plot. ```{r, status} -df2 <- sampleped[sampleped$family == 2, ] +df2 <- sampleped[sampleped$famid == 2, ] names(df2) df2$status <- c(1, 1, rep(0, 12)) ped2 <- Pedigree(df2) -summary(ped2$ped$status) +summary(status(ped(ped2))) plot(ped2) ``` ## Labels -Here we show how to use the $label$ argument in the plot method to add -additional information under each subject. In the example below, we add -names to the existing plot by adding a new column to the *ped* dataframe. -As space permits, more lines and characters per line can be made using the -a {/em \n} character to indicate a new line. +Here we show how to use the $label$ argument in the plot method to add +additional information under each subject. In the example below, we add +names to the existing plot by adding a new column to the $elementMetadata$ +of the $Ped$ object of the $Pedigree$. + +As space permits, more lines and characters per line can be +made using the a {/em \n} character to indicate a new line. ```{r, labels} -ped2$ped$Names <- c( +mcols(ped2)$Names <- c( "John\nDalton", "Linda", "Jack", "Rachel", "Joe", "Deb", "Lucy", "Ken", "Barb", "Mike", "Matt", "Mindy", "Mark", "Marie\nCurie" @@ -284,80 +291,84 @@ ped2$ped$Names <- c( plot(ped2, label = "Names") ``` - ## Affected Indicators -We show how to specify affected status with a single indicator and -multiple indicators. First, we use the affected indicator from -$sampleped$, which contains 0/1 indicators and NA as missing, and let it -it indicate blue eyes. Next, we create a vector as an indicator for baldness. -And add it as a second filling scale for the plot with -*generate_colors(add_to_scale = TRUE)*. -The plot shapes for each subject is therefore divided into two equal parts and -shaded differently to indicate the two affected indicators. +We show how to specify affected status with a single indicator and +multiple indicators. First, we use the affected indicator from +$sampleped$, which contains $0$/$1$ indicators and $NA$ as missing, and let it +it indicate blue eyes. Next, we create a vector as an indicator for +baldness. And add it as a second filling scale for the plot with +`generate_colors(add_to_scale = TRUE)`. The plot shapes for each subject +is therefore divided into two equal parts and shaded differently to +indicate the two affected indicators. ```{r, two_affection} -df2$bald <- as.factor(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1)) -ped2 <- Pedigree(df2) +mcols(ped2)$bald <- as.factor(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1)) ped2 <- generate_colors(ped2, col_aff = "bald", add_to_scale = TRUE) plot(ped2, legend = TRUE) ``` ## Special Relationships -Special Pedigree relationships can be specified in a matrix -as the $relation$ argument. There are 4 relationships that can -be specified by numeric codes: 1=Monozygotic twins, 2=Dizygotic twins, -3=twins of unknown zygosity, and 4=Spouse. The spouse relationship can -indicate a marry-in when a couple does not have children together. +Special pedigree relationships can be specified in a matrix as the +$relation$ argument. There are 4 relationships that can be specified by +numeric codes: + +- 1 = Monozygotic twins +- 2 = Dizygotic twins +- 3 = twins of unknown zygosity +- 4 = Spouse + +The spouse relationship can indicate a +marry-in when a couple does not have children together. ### Twins -Below, we create a matrix of relationships for monozygotic and -unknown-zygosity twins in the most recent generation of Pedigree 2. -The twin relationships are both represented with -diverging lines from a single point. The monozygotic twins have an -additional line connecting the diverging lines, while the other twins have -a question mark to indicate unknown zygosity. +Below, we create a matrix of relationships for monozygotic and +unknown-zygosity twins in the most recent generation of $ped2$. The +twin relationships are both represented with diverging lines from a +single point. The monozygotic twins have an additional line connecting +the diverging lines, while the other twins have a question mark to +indicate unknown zygosity. ```{r, twins} ## create twin relationships rel_df <- data.frame( - indId1 = c(210, 212), - indId2 = c(211, 213), + indId1 = c("210", "212"), + indId2 = c("211", "213"), code = c(1, 3), - family = c(2, 2) + family = c("2", "2") ) -ped2 <- Pedigree(df2, rel_df) +rel(ped2) <- upd_famid_id(with(rel_df, Rel(indId1, indId2, code, family))) plot(ped2) ``` ### Inbreeding -Another special relationship is inbreeding. Inbreeding of founders implies -the founders' parents are related (the maternal and paternal genes descended -from a single ancestral gene). -One thing we can do is add more people to the Pedigree to show this inbreeding. +Another special relationship is inbreeding. Inbreeding of founders +implies the founders' parents are related (the maternal and paternal +genes descended from a single ancestral gene). One thing we can do is +add more people to the pedigree to show this inbreeding. -To show that a pair of founders (subjects $201$ and $202$) are inbred, we -must show that their parents are siblings. -To do this, we create subjects $197$ and $198$ to be the parents of $201$ -and also create subjects $199$ and $200$ to be the parents of $202$. -To make subjects $198$ and $199$ siblings, we give *them* the same parents, -creating subjects $195$ and $196$. -This results in subjects $201$ and $202$ being first cousins, and therefore -inbred. +To show that a pair of founders (subjects $201$ and $202$) are inbred, +we must show that their parents are siblings. To do this, we create +subjects $197$ and $198$ to be the parents of $201$ and also create +subjects $199$ and $200$ to be the parents of $202$. To make subjects +$198$ and $199$ siblings, we give them the same parents, creating +subjects $195$ and $196$. This results in subjects $201$ and $202$ being +first cousins, and therefore inbred. ```{r, inbreeding} indid <- 195:202 -dadid <- c(0, 0, 0, 196, 196, 0, 197, 199) -momid <- c(0, 0, 0, 195, 195, 0, 198, 200) +dadid <- c(NA, NA, NA, 196, 196, NA, 197, 199) +momid <- c(NA, NA, NA, 195, 195, NA, 198, 200) sex <- c(2, 1, 1, 2, 1, 2, 1, 2) ped3 <- data.frame( - indId = indid, fatherId = dadid, - motherId = momid, gender = sex + id = indid, dadid = dadid, + momid = momid, sex = sex ) -ped4df <- rbind.data.frame(ped2$ped[-c(1, 2), 2:5], ped3) + +ped4df <- rbind.data.frame(df2[-c(1, 2), 2:5], ped3) ped4 <- Pedigree(ped4df) plot(ped4) ``` @@ -365,44 +376,44 @@ plot(ped4) ### Marry-ins Spouse with no child can also be specified with the $rel_df$ argument by -setting the code value to "Spouse" or "4". -If we use the Pedigree 2 from earlier and add a new spouse relationship -between the individuals $212$ and $211$ we get the following plot. +setting the code value to $Spouse$ or $4$. If we use the $ped2$ from +earlier and add a new spouse relationship between the individuals $212$ +and $211$ we get the following plot. ```{r, spouse} ## create twin relationships -rel_df <- data.frame( - indId1 = c(210, 212, 211), - indId2 = c(211, 213, 212), - code = c(1, 3, 4), - family = c(2, 2, 2) +rel_df2 <- data.frame( + id1 = "211", + id2 = "212", + code = 4, + famid = "2" ) -ped2 <- Pedigree(df2, rel_df) +new_rel <- c(rel(ped2), with(rel_df2, Rel(id1, id2, code, famid))) +rel(ped2) <- upd_famid_id(new_rel) plot(ped2) ``` -Pedigree Plot Details -=========================== +# Pedigree Plot Details -The plot method attempts to adhere to many standards -in Pedigree plotting, as presented by -[Bennet et al. 2008](https://pubmed.ncbi.nlm.nih.gov/18792771/). +The plot method attempts to adhere to many standards in pedigree +plotting, as presented by [Bennet et al. +2008](https://pubmed.ncbi.nlm.nih.gov/18792771/). -To show some other tricks with Pedigree plotting, we use Pedigree 1 -from ~sampleped~, which has 41 subjects in 4 generations, including a -generation with double first cousins. After the first marriage of $114$, they -remarried subject $113$ without children between them. If we do not -specify the marriage with the $relation$ argument, the plot method excludes -subject $113$ from the plot. The basic plot of Pedigree 1 is shown -in the figure below. +To show some other tricks with pedigree plotting, we use $ped1$ from +$sampleped$, which has 41 subjects in 4 generations, including a +generation with double first cousins. After the first marriage of $114$, +they remarried subject $113$ without children between them. If we do not +specify the marriage with the $relation$ argument, the plot method +excludes subject $113$ from the plot. The basic plot of $ped1$ is +shown in the figure below. ```{r, plotped1} -df1 <- sampleped[sampleped$family == 1, ] +df1 <- sampleped[sampleped$famid == 1, ] relate1 <- data.frame( - indId1 = 113, - indId2 = 114, + id1 = 113, + id2 = 114, code = 4, - family = 1 + famid = 1 ) ped1 <- Pedigree(df1, relate1) plot(ped1) @@ -410,11 +421,12 @@ plot(ped1) ## Align by Input Order -The plot method does a decent job aligning subjects given the order of the -subjects when the Pedigree object is made, and sometimes has to make two -copies of a subject. If we change the order of the subjects when creating -the Pedigree, we can help the plot method reduce the need to duplicate -subjects, as Figure~\ref{reordPed1} no longer has subject $110$ duplicated. +The plot method does a decent job aligning subjects given the order of +the subjects when the Pedigree object is made, and sometimes has to make +two copies of a subject. If we change the order of the subjects when +creating the Pedigree, we can help the plot method reduce the need to +duplicate subjects, as Figure\~\ref{reordPed1} no longer has subject +$110$ duplicated. ```{r, ordering} df1reord <- df1[c(35:41, 1:34), ] @@ -422,101 +434,112 @@ ped1reord <- Pedigree(df1reord, relate1) plot(ped1reord) ``` -Pedigree Utility Functions -================================= +## Modify the scales + +You can modify the colors of each modality used for the filling as well +as for the bordering by modifying the $Scales$ data.frame. + +To do so you can do as follow: + +```{r, scales modif} +scales(ped1) +fill(ped1)$fill <- c("green", "blue", "purple") +fill(ped1)$density <- c(30, 15, NA) +fill(ped1)$angle <- c(45, 0, NA) +border(ped1)$border <- c("red", "black", "orange") +plot(ped1, legend = TRUE) +``` + +# Pedigree Utility Functions -## Pedigree as a Data.Frame +## Ped as a data.frame -A main features of a Pedigree object are vectors with an element -for each subject. It is sometimes useful to extract these vectors from -the Pedigree object into a $data.frame$ with basic information that can -be used to construct a new Pedigree object. This is possible with the -$as.data.frame()$ method, as shown below. +A main features of a Pedigree object are vectors with an element for +each subject. It is sometimes useful to extract these vectors from the +Pedigree object into a $data.frame$ with basic information that can be +used to construct a new $Pedigree$ object. This is possible with the +`as.data.frame()` method, as shown below. ```{r, ped2df, eval = FALSE} -dfped2 <- as.data.frame(ped2) +dfped2 <- as.data.frame(ped(ped2)) dfped2 ``` ## Subsetting and Trimming -Pedigrees with large size can be a bottleneck for programs that run -calculations on them. The Pedixplorer package contains some routines to -identify which subjects to remove. We show how a subject -(e.g. subject 210) can be removed from -~ped2~, and how the Pedigree object is changed by verifying that the -~relation~ dataframe no longer has the twin relationship between subjects -210 and 211, as indicated by $id1$ and ~id2~. +Pedigrees with large size can be a bottleneck for programs that run +calculations on them. The Pedixplorer package contains some routines to +identify which subjects to remove. We show how a subject (e.g. subject +210) can be removed from ~ped2~, and how the Pedigree object is changed +by verifying that the ~relation~ dataframe no longer has the twin +relationship between subjects 210 and 211, as indicated by $id1$ and +~id2~. ```{r, subset} ped2_rm210 <- ped2[-10] -ped2_rm210$rel -ped2$rel +rel(ped2_rm210) +rel(ped2) ``` -The steps above only work for subsetting by the index of the -Pedigree object vectors, not by the ~id~ of the subjects themselves. -We provide *trim()*, which trims subjects from a Pedigree by -their $id$. Below is an example of removing subject 110, as done above, -then we further trim the Pedigree by a vector of subject ids. We check the -trimming by looking at the $id$ vector and the $relation$ matrix. - -```{r, trim} -ped2_trim210 <- trim(ped2, "2_210") -ped2_trim210$ped$id -ped2_trim210$rel -ped2_trim_more <- trim(ped2_trim210, c("2_212", "2_214")) -ped2_trim_more$ped$id -ped2_trim_more$rel +The steps above also works by the ~id~ of the subjects themselves.\ +We provide *subset()*, which trims subjects from a Pedigree by their +$id$ or other argument. Below is an example of removing subject 110, as +done above, then we further trim the Pedigree by a vector of subject +ids. We check the trimming by looking at the $id$ vector and the +$relation$ matrix. + +```{r, subset_more} +ped2_trim210 <- subset(ped2, "2_210", keep = FALSE) +id(ped(ped2_trim210)) +rel(ped2_trim210) +ped2_trim_more <- subset(ped2_trim210, c("2_212", "2_214"), keep = FALSE) +id(ped(ped2_trim_more)) +rel(ped2_trim_more) ``` -Shrinking -============== +# Shrinking -An additional function in Pedixplorer is *shrink()*, which shrinks a Pedigree -to a specified bit size while maintaining the maximal amount of -information for genetic linkage and association studies. Using an indicator -for availability and affected status, it removes subjects in this order: - + unavailable with no available descendants - + available and are not parents - + available who have missing affected status - + available who are unaffected - + available who are affected +An additional function in Pedixplorer is *shrink()*, which shrinks a +Pedigree to a specified bit size while maintaining the maximal amount of +information for genetic linkage and association studies. Using an +indicator for availability and affected status, it removes subjects in +this order: + unavailable with no available descendants + available and +are not parents + available who have missing affected status + available +who are unaffected + available who are affected -We show how to shrink Pedigree 1 to bit size $30$, which happens -to be the bit size after removing only the unavailable subjects. We show how -to extract the shrunken Pedigree object from the $shrink$ result, -and plot it. +We show how to shrink Pedigree 1 to bit size $30$, which happens to be +the bit size after removing only the unavailable subjects. We show how +to extract the shrunken Pedigree object from the $shrink$ result, and +plot it. ```{r, shrink1} set.seed(200) shrink1_b30 <- shrink(ped1, max_bits = 30) -print(shrink1_b30) +print(shrink1_b30[c(2:8)]) plot(shrink1_b30$pedObj) ``` -Now shrink Pedigree 1 to bit size $25$, which requires removing -subjects who are informative. If there is a tie between multiple subjects -about who to remove, the method randomly chooses one of them. With this -seed setting, the method removes subjects $140$ then $141$. +Now shrink Pedigree 1 to bit size $25$, which requires removing subjects +who are informative. If there is a tie between multiple subjects about +who to remove, the method randomly chooses one of them. With this seed +setting, the method removes subjects $140$ then $141$. ```{r, shrink2} set.seed(10) shrink1_b25 <- shrink(ped1, max_bits = 25) -shrink1_b25 +print(shrink1_b25[c(2:8)]) plot(shrink1_b25$pedObj) ``` -Select Unrelateds -======================= +# Select Unrelateds -In this section we briefly show how to use $unrelated$ to find -a set of the maximum number of unrelated available subjects from a Pedigree. -The input required is a Pedigree object and a vector indicating availability. -In some Pedigrees there are numerous sets of subjects that satisfy the maximum -number of unrelateds, so the method randomly chooses from the set. We -show two sets of subject ids that are selected by the routine and discuss -below. +In this section we briefly show how to use $unrelated$ to find a set of +the maximum number of unrelated available subjects from a Pedigree. The +input required is a Pedigree object and a vector indicating +availability. In some Pedigrees there are numerous sets of subjects that +satisfy the maximum number of unrelateds, so the method randomly chooses +from the set. We show two sets of subject ids that are selected by the +routine and discuss below. ```{r, unrelateds} ped2 <- Pedigree(df2) @@ -527,14 +550,14 @@ set2 <- unrelated(ped2) set2 ``` -We can easily verify the sets selected by $unrelated$ by -referring to Figure~\ref{fixped} and see that subjects $203$ and $206$ are -unrelated to everyone else in the Pedigree except their children. Furthermore, -we see in df2 that of these two, only subject 203 is available. Therefore, -any set of unrelateds who are available must include subject -203 and one of the these subjects: 201, 204, 206, 207, 212, -and 214, as indicated by the kinship matrix for Pedigree 2 subset to those -with availability status of 1. +We can easily verify the sets selected by $unrelated$ by referring to +Figure\~\ref{fixped} and see that subjects $203$ and $206$ are unrelated +to everyone else in the Pedigree except their children. Furthermore, we +see in df2 that of these two, only subject 203 is available. Therefore, +any set of unrelateds who are available must include subject 203 and one +of the these subjects: 201, 204, 206, 207, 212, and 214, as indicated by +the kinship matrix for Pedigree 2 subset to those with availability +status of 1. ```{r, unrelkin} df2 @@ -542,8 +565,7 @@ is_avail <- df2$id[df2$avail == 1] kin2[is_avail, is_avail] ``` -Session information -=================== +# Session information ```{r} sessionInfo() diff --git a/vignettes/alignment_details.Rmd b/vignettes/pedigree_alignment.Rmd similarity index 97% rename from vignettes/alignment_details.Rmd rename to vignettes/pedigree_alignment.Rmd index c2dcc1ef..49ee0739 100644 --- a/vignettes/alignment_details.Rmd +++ b/vignettes/pedigree_alignment.Rmd @@ -1,14 +1,14 @@ --- -title: Pedigree alignement details +title: Pedigree alignment details author: TM Therneau date: '`r format(Sys.time(),"%d %B, %Y")`' output: - rmarkdown::html_vignette: - toc: yes + BiocStyle::html_document: + toc: true toc_depth: 2 header-includes: \usepackage{tabularx} vignette: | - %\VignetteIndexEntry{Pedigree alignement details} + %\VignetteIndexEntry{Pedigree alignment details} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} --- @@ -90,7 +90,7 @@ to figure that out. The first thing to be done is to check on twins. They increase the complexity, since twins need to move together. -The `rel(ped)$code` object is a factor, so first turn that into numeric. +The `rel(ped, "code")` object is a factor, so first turn that into numeric. We create 3 vectors: - $twinrel$ is a matrix containing pairs of twins and @@ -147,10 +147,13 @@ test1 <- data.frame(id = 1:11, dadid = c(0, 0, 1, 1, 1, 0, 0, 6, 6, 6, 9), momid = c(0, 0, 2, 2, 2, 0, 0, 7, 7, 7, 4) ) -ped1 <- Pedigree(test1) +ped1 <- Pedigree(test1, missid = "0") -temp2 <- Pedigree(test1, hints = list( - order = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) +temp2 <- Pedigree(test1, missid = "0", hints = list( + horder = setNames( + c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), + 1:11 + ) )) par(mfrow = c(1, 2)) @@ -196,7 +199,7 @@ We'll look at more complex cases below when looking at the $duporder$ (order the duplicates) function, which returns a matrix with columns 1 and 2 being a pair of duplicates, and 3 a direction. Note that in the following code $idlist$ refers to the row numbers of each subject in the Pedigree, not to -their label `ped(ped)$id`. +their label `ped(ped, "id")`. ## duporder @@ -232,9 +235,9 @@ test2 <- data.frame(id = c(1:13, 21:41), 1, 2, 1, 1, 2 ) ) -ped2 <- Pedigree(test2) -ped2a <- Pedigree(test2, hints = list( - order = seq_along(test2$id) +ped2 <- Pedigree(test2, missid = "0") +ped2a <- Pedigree(test2, missid = "0", hints = list( + horder = setNames(seq_along(test2$id), test2$id) )) par(mfrow = c(1, 2)) plot(ped2a, title = "Before auto_hint") @@ -312,8 +315,8 @@ test3 <- data.frame(id = 1:14, momid = c(0, 7, 1, 1, 1, 0, 0, 12, 0, 1, 7, 0, 7, 12), affected = c(0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0) ) -ped3 <- Pedigree(test3, hints = list( - order = 1:14 +ped3 <- Pedigree(test3, missid = "0", hints = list( + horder = setNames(1:14, 1:14) )) test4 <- data.frame(id = 1:17, @@ -328,8 +331,8 @@ test4 <- data.frame(id = 1:17, ) ) -ped4 <- Pedigree(test4, hints = list( - order = 1:17 +ped4 <- Pedigree(test4, missid = "0", hints = list( + horder = setNames(1:17, 1:17) )) par(mfrow = c(1, 2)) diff --git a/vignettes/kinship_details.Rmd b/vignettes/pedigree_kinship.Rmd similarity index 93% rename from vignettes/kinship_details.Rmd rename to vignettes/pedigree_kinship.Rmd index 4f141461..921242c3 100644 --- a/vignettes/kinship_details.Rmd +++ b/vignettes/pedigree_kinship.Rmd @@ -3,8 +3,8 @@ title: Pedigree kinship() details author: TM Therneau date: '`r format(Sys.time(),"%d %B, %Y")`' output: - rmarkdown::html_vignette: - toc: yes + BiocStyle::html_document: + toc: true toc_depth: 2 header-includes: \usepackage{tabularx} vignette: | @@ -105,7 +105,7 @@ a for loop containing operations on single rows/columns. At one point below we use a vectorized version. It looks like the snippet below -```{r, eval=FALSE} +```{r, kinship_algo, eval=FALSE} for (g in 1:max(depth)) { indx <- which(depth == g) kmat[indx, ] <- (kmat[mother[indx], ] + kmat[father[indx], ]) / 2 @@ -133,7 +133,7 @@ for backwards compatability with an older version of the routine. We give founders a fake parent of subject $n+1$ who is not related to anybody (even themself); it avoids some if-then-else constructions. -## Pedigree Object +## With a Pedigree object The method for a Pedigree object is an almost trivial modification. Since the mother and father are already indexed into the id list it has @@ -179,6 +179,13 @@ If all of the subject ids are unique, across all families, the final matrix is labeled with the subject id, otherwise it is labeled with family/subject. +```{r, kinship} +library(Pedixplorer) +data(sampleped) +ped <- Pedigree(sampleped) +kinship(ped)[35:48, 35:48] +``` + ## MakeKinship The older $makekinship$ function, @@ -204,7 +211,7 @@ Consider the following rather difficult example: | id1|id2| |----|---| | 1 | 2 | -| 1 | 3 | +| 2 | 3 | | 5 | 6 | | 3 | 7 | | 10 | 9 | @@ -224,6 +231,24 @@ Finally, remove the rows that are identical. The result is a set of all pairs of observations in the matrix that correspond to monozygotic pairs. +```{r, kinship twins} +df <- data.frame( + id = c(1, 2, 3, 4, 5, 6, 7, 8), + dadid = c(4, 4, 4, NA, 4, 4, 4, NA), + momid = c(8, 8, 8, NA, 8, 8, 8, NA), + sex = c(1, 1, 1, 1, 2, 2, 1, 2) +) +rel <- data.frame( + id1 = c(1, 3, 6, 7), + id2 = c(2, 2, 5, 3), + code = c(1, 1, 1, 1) +) +ped <- Pedigree(df, rel) +plot(ped) +twins <- c(1, 2, 3, 7, 5, 6) +kinship(ped)[twins, twins] +``` + Session information =================== diff --git a/vignettes/pedigree_constructor.Rmd b/vignettes/pedigree_object.Rmd similarity index 63% rename from vignettes/pedigree_constructor.Rmd rename to vignettes/pedigree_object.Rmd index 9c1a3889..e1510ab8 100644 --- a/vignettes/pedigree_constructor.Rmd +++ b/vignettes/pedigree_object.Rmd @@ -1,14 +1,14 @@ --- -title: Pedigree() constructor -author: Terry Therneau, Elizabeth Atkinson +title: Pedigree object +author: Terry Therneau, Elizabeth Atkinson, Louis Le Nézet date: '`r format(Sys.time(),"%d %B, %Y")`' output: - rmarkdown::html_vignette: - toc: yes + BiocStyle::html_document: + toc: true toc_depth: 2 header-includes: \usepackage{tabularx} vignette: | - %\VignetteIndexEntry{Pedigree() constructor} + %\VignetteIndexEntry{Pedigree object} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} --- @@ -57,7 +57,7 @@ It accepts the following input and 1 = affected. - $status$ Optional, a numeric variable with 0 = censored and 1 = dead. - - $family$ Optional, a numeric or character vector of family + - $famid$ Optional, a numeric or character vector of family identifiers. - $steril$ Optional, a numeric variable with 0 = not steril and 1 = steril. @@ -66,16 +66,14 @@ It accepts the following input - $indId2$ identifier values of the subject pairs - $code$ relationship codification : 1 = Monozygotic twin, 2=Dizygotic twin, 3= twin of unknown zygosity, 4 = Spouse. - - $family$ Optional, a numeric or character vector of family + - $famid$ Optional, a numeric or character vector of family identifiers. - **cols_ren_ped** Optional, a named list for the renaming of the **ped_df** dataframe - **cols_ren_rel** Optional, a named list for the renaming of the **rel_df** dataframe -- **scales** Optional, a list of two dataframe with the scales to use for the - affection status and the other one for the border color (e.g availability). - **normalize** Optional, a logical to know if the data should be normalised. -- **hints** Optional, a list containing the order in which to plot the +- **hints** Optional, a list containing the horder in which to plot the individuals and the matrix of the spouse. ## Notes @@ -107,22 +105,61 @@ Status follows the pattern of the survival routines and remains an integer. Based on the dataframe given for **ped_df** and **rel_df** and their corresponding named list, the columns are renamed for them to be used correctly. +The renaming is done as follow + +```{r, column renaming} +rel_df <- data.frame( + indId1 = c("110", "204"), + indId2 = c("112", "205"), + code = c(1, 2), + family = c("1", "2") +) +cols_ren_rel <- list( + id1 = "indId1", + id2 = "indId2", + famid = "family" +) + +## Rename columns rel +old_cols <- as.vector(unlist(cols_ren_rel)) +new_cols <- names(cols_ren_rel) +cols_to_ren <- match(old_cols, names(rel_df)) +names(rel_df)[cols_to_ren[!is.na(cols_to_ren)]] <- + new_cols[!is.na(cols_to_ren)] +print(rel_df) +``` ## Normalisation If the normalisation process is selected `normalize = TRUE`, then both dataframe will be checked by their dedicated normalization function. It will ensure that all modalities are written correctly and set up the -right way. If a $family$ column is present in the dataframe, then it will +right way. If a $famid$ column is present in the dataframe, then it will be aggregated to the id of each individual and separated by an ''_'' to ensure the uniqueness of the individuals identifiers. +```{r, normalisation} +library(Pedixplorer) +data("sampleped") +cols <- c("sex", "id", "avail") +summary(sampleped[cols]) +ped <- Pedigree(sampleped) +summary(as.data.frame(ped(ped))[cols]) +``` + ### Errors present after the normalisation process If any error is detected after the normalisation process, then the normalised dataframe is gave back to the user with errors column added describing the encountered problems. +```{r, rel_df errors} +rel_wrong <- rel_df +rel_wrong$code[2] <- "A" +df <- Pedigree(sampleped, rel_wrong) +print(df) +``` + ## Validation Now that the data for the Pedigree object creation are ready, they are @@ -137,8 +174,10 @@ This validation step will check up for many errors such as: - $steril$, $status$, $available$, $affected$ only contains 0, 1 or NA values - Father are males and Mother are females - Twins have same parents and MZ twins have same sex +- Hints object is valid and ids contained is in the Ped object +- ... -Pedigree S4 slots +Pedigree Class ======================== After validation an $S4$ object is generated. @@ -146,31 +185,89 @@ This new concept make it possible to easily setup methods for this new type of object. The controls of the parameters is also more precise. -The $Pedigree$ object contains 4 slots: +The $Pedigree$ object contains 4 slots, each of them contains a different +$S4$ object containing a specific type of information used for the Pedigree +construction. -- $ped$ the Pedigree information as a dataframe with at least the -following columns: +- $ped$ a Ped object for the Pedigree information with at least the following + slots: - $id$ the identifiers of the individuals - $dadid$ the identifiers of the fathers - $momid$ the identifiers of the mothers - $sex$ the gender of each individuals -- $rel$ the relationship dataframe describing all special relationship -beetween individuals that can't be descibed in the $ped$ slot. -The minimal columns needed are : +- $rel$ a Rel object describing all special relationship beetween individuals +that can't be descibed in the $ped$ slot. +The minimal slots needed are : - $id1$ the identifiers of the 1st individuals - $id2$ the identifiers of the 2nd individuals - $code$ factor describing the type of relationship ("MZ twin", "DZ twin", "UZ twin", "Spouse") -- $scales$ a list of two elements +- $scales$ a Scales object with two slots : - $fill$ a dataframe describing which modalities in which columns correspond to an affected individuals. Plotting information such as colour, angle and density are also provided - $border$ a dataframe describing which modalities in which columns to use to plot the border of the plot elements. -- $hints$ a list of two elements - - $order$ numeric vector for the ordering of the individuals plotting +- $hints$ a Hints object with two slots : + - $horder$ numeric vector for the ordering of the individuals plotting - $spouse$ a matrix of the spouses +For more information on each object: + +- `help(Ped)` +- `help(Rel)` +- `help(Scales)` +- `help(Hints)` + +Pedigree accessors +======================== + +As the Pedigree object is now an $S4$ class, we have made available a number +of accessors. +Most of them can be used as a getter or as a setter to modify a value in the +correponding slot of the object + +## For the Pedigree object + +- Get/Set slots : ped(), rel(), scales(), hints() +- Wrapper to the Ped object: famid(), mcols() +- Wrapper of the Scales object: fill(), border() +- Wrapper of the Hints object: horder(), spouse() + +## For the Ped object + +- Given in input: id(), dadid(), momid(), famid(), sex() +- Other infos used : affected(), avail(), status() +- Computed : isinf(), kin(), useful() +- Metadata : mcols() + +## For the Rel object + +- id1(), id2(), code(), famid() + +## For the Scales object + +- fill(), border() + +## For the Hints object + +- horder(), spouse() + +## Focus on mcols() + +The mcols() accessors is the one you should use to add more +informations to your individuals. + +```{r, mcols} +ped <- Pedigree(sampleped) +mcols(ped)[8:12] +## Add new columns as a threshold if identifiers of individuals superior +## to a given threshold for example +mcols(ped)$idth <- ifelse(as.numeric(mcols(ped)$indId) < 200, "A", "B") +mcols(ped)$idth +``` + + Pedigree methods ======================== @@ -180,25 +277,47 @@ With this new S4 object comes multiple methods to ease the use of it: - summary() - print() - show() -- as.data.frame() -- setAs() -- [ -- [<- -- [[ -- [[<- -- $ -- $<- +- as.list() +- `[` - shrink() -- trim() - generate_colors() - is_informative() - kindepth() - kinship() - make_famid() +- upd_famid_id() - num_child() - unrelated() - useful_inds() +```{r, pedigree methods} +## We can change the family name based on an other column +ped <- upd_famid_id(ped, mcols(ped)$idth) + +## We can substract a given family +pedA <- ped[famid(ped) == "A"] + +## Plot it +plot(pedA, cex = 0.5) + +## Do a summary +summary(pedA) + +## Coerce it to a list +as.list(pedA)[[1]][1:3] + +## Shrink it to keep only the necessary information +lst1_s <- shrink(pedA, max_bits = 10) +plot(lst1_s$pedObj, cex = 0.5) + +## Compute the kinship individuals matrix +kinship(pedA)[1:10, 1:10] + +## Get the useful individuals +pedA <- useful_inds(pedA, informative = "AvAf") +as.data.frame(ped(pedA))["useful"][1:10,] +``` + Session information =================== diff --git a/vignettes/pedigree_plot_details.Rmd b/vignettes/pedigree_plot.Rmd similarity index 85% rename from vignettes/pedigree_plot_details.Rmd rename to vignettes/pedigree_plot.Rmd index c14cfa78..bb5293af 100644 --- a/vignettes/pedigree_plot_details.Rmd +++ b/vignettes/pedigree_plot.Rmd @@ -1,12 +1,11 @@ - --- title: Pedigree plotting details -author: TM Therneau, JP Sinnwell +author: TM Therneau, JP Sinnwell, Louis Le Nézet date: '`r format(Sys.time(),"%d %B, %Y")`' output: - rmarkdown::html_vignette: - toc: yes - toc_depth: 3 + BiocStyle::html_document: + toc: true + toc_depth: 2 header-includes: \usepackage{tabularx} vignette: | %\VignetteIndexEntry{Pedigree plotting details} @@ -19,14 +18,14 @@ Introduction The plotting function for Pedigrees has 5 tasks - 1. Gather information and check the data. An important step is the call - to align. - 2. Set up the plot region and size the symbols. The program wants to plot - circles and squares, so needs to understand the geometry of the paper, - Pedigree size, and text size to get the right shape and size symbols. - 3. Set up the plot and add the symbols for each subject - 4. Add connecting lines between spouses, and children with parents - 5. Create an invisible return value containing the locations. +1. Gather information and check the data. An important step is the call + to align. +2. Set up the plot region and size the symbols. The program wants to plot + circles and squares, so needs to understand the geometry of the paper, + Pedigree size, and text size to get the right shape and size symbols. +3. Set up the plot and add the symbols for each subject +4. Add connecting lines between spouses, and children with parents +5. Create an invisible return value containing the locations. Another task, not yet completely understood, and certainly not implemented, is how we might break a plot across multiple pages. @@ -36,10 +35,10 @@ Setup The new version of the plotting Pedigree function works in two step. - - **ped_to_plotdf()** create a dataframe from a Pedigree given containing - all the necessary information to plot all the elements of the Pedigree: - "polygons", "text", "segments", "arcs" - - **plot_from_df()** use a given dataframe and plot all the element given +- **ped_to_plotdf()** create a dataframe from a Pedigree given containing +all the necessary information to plot all the elements of the Pedigree: +"polygons", "text", "segments", "arcs" +- **plot_from_df()** use a given dataframe and plot all the element given The advantage of this two step method, is that all the plotting can be parralelised, each element can be customised by the user if necessary and @@ -50,10 +49,8 @@ family will be produced and the first one will be plotted. For more informations about those two functions, see the help page. -```{r, help} -help(ped_to_plotdf) -help(plot_fromdf) -``` +- `help(ped_to_plotdf)` +- `help(plot_fromdf)` Sizing =============== @@ -78,16 +75,16 @@ We should have at least 1/2 of `stemp2` space above and `stemp2` space below. The `stemp3` variable is the height of labels: users may use multi-line ones. Our constraints then are - 1. $(box height + label height) \times maxlev \le height$ : the boxes and - labels have to fit vertically - 2. $(box height) \times (maxlev + (maxlev-1)/2) \le height$ : at least 1/2 - a box of space between each row of boxes - 3. $(box width) \le stemp1$ in inches - 4. $(box width) \le 0.8$ unit in user coordinates, otherwise they appear - to touch - 5. User coordinates go from $min(xrange)- 1/2 box width$ to - $max(xrange) + 1/2 box width$. - 6. the box is square (in inches) +1. $(box height + label height) \times maxlev \le height$ : the boxes and + labels have to fit vertically +2. $(box height) \times (maxlev + (maxlev-1)/2) \le height$ : at least 1/2 + a box of space between each row of boxes +3. $(box width) \le stemp1$ in inches +4. $(box width) \le 0.8$ unit in user coordinates, otherwise they appear + to touch +5. User coordinates go from $min(xrange)- 1/2 box width$ to + $max(xrange) + 1/2 box width$. +6. the box is square (in inches) The first 3 of these are easy. The fourth comes into play only for very packed pedigrees. Assume that the box were the maximum size of .8 units, i.e., minimal @@ -141,7 +138,8 @@ Each polygon is named based on its shape ("square", "circle","diamond", "triangle"), the total number of division of the whole shape, and the position of the division to plot. -```{r, polygons, eval = FALSE} +```{r, polygons} +library(Pedixplorer) types <- c( "square_1_1", # Whole square "circle_2_1", # Semi circle first division @@ -205,7 +203,7 @@ will make up the symbol. These are then used again and again. The collection is kept as a list with the four elements square, circle, diamond and triangle. -Each of these is in turn a list with `max(scales(ped)$fill$order)` elements, +Each of these is in turn a list with `max(fill(ped, "order"))` elements, and each of those in turn a list of x and y coordinates. ### Circfun