diff --git a/.gitignore b/.gitignore index babe1ead..53568a67 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ *.Rproj *.so *.Rprofile -devel/* -Pedixplorer.Rcheck \ No newline at end of file +*.lintr +Pedixplorer* +devel \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..1a638a48 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,50 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "R-Debugger", + "name": "Launch R-Workspace", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}" + }, + { + "type": "R-Debugger", + "name": "Debug R-File", + "request": "launch", + "debugMode": "file", + "workingDirectory": "${workspaceFolder}", + "file": "${file}" + }, + { + "type": "R-Debugger", + "name": "Debug R-Function", + "request": "launch", + "debugMode": "function", + "workingDirectory": "${workspaceFolder}", + "file": "${file}", + "mainFunction": "main", + "allowGlobalDebugging": false + }, + { + "type": "R-Debugger", + "name": "Debug R-Package", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}", + "includePackageScopes": true, + "loadPackages": [ + "." + ] + }, + { + "type": "R-Debugger", + "request": "attach", + "name": "Attach to R process", + "splitOverwrittenOutput": true + } + ] +} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 12520819..a05ccba7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Pedixplorer -Version: 0.99.2 -Date: 2023-09-19 +Version: 1.1.0 +Date: 2023-11-15 Title: Pedigree Functions Authors@R: c( person("Louis", "Le Nézet", email="louislenezet@gmail.com", @@ -24,7 +24,9 @@ Imports: dplyr, tidyr, quadprog, - Matrix + Matrix, + S4Vectors, + testthat Description: Routines to handle family data with a Pedigree object. The initial purpose was to create correlation structures that describe family relationships such as kinship and identity-by-descent, which can be used to model family data @@ -38,39 +40,39 @@ RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr Suggests: - testthat (>= 3.0.0), diffviewer, - vdiffr, - rmarkdown, + vdiffr, + rmarkdown, BiocStyle, knitr, - withr + withr, + magick Config/testthat/edition: 3 biocViews: Software, DataRepresentation, Genetics, Alignment BugReports: https://github.com/LouisLeLezet/Pedixplorer/issues -url: https://github.com/LouisLeNezet/Pedixplorer +URL: https://github.com/LouisLeNezet/Pedixplorer BiocType: Software Collate: - 'Pedigree.R' + 'AllValidity.R' + 'AllClass.R' + 'kindepth.R' + 'kinship.R' + 'utils.R' + 'AllConstructor.R' + 'AllAccessors.R' + 'AllGeneric.R' 'Pedixplorer-package.R' 'alignped4.R' 'alignped3.R' 'alignped2.R' 'alignped1.R' - 'validity.R' - 'pedigreeClass.R' - 'check_hints.R' - 'kindepth.R' 'auto_hint.R' 'align.R' 'best_hint.R' 'bit_size.R' 'data.R' 'descendants.R' - 'make_famid.R' 'family_check.R' - 'kinship.R' - 'utils.R' 'find_unavailable.R' 'find_avail_affected.R' 'find_avail_noninform.R' @@ -79,6 +81,7 @@ Collate: 'generate_colors.R' 'ibd_matrix.R' 'is_informative.R' + 'make_famid.R' 'min_dist_inf.R' 'norm_data.R' 'num_child.R' @@ -88,6 +91,6 @@ Collate: 'plot_fromdf.R' 'plot.R' 'shrink.R' - 'trim.R' 'unrelated.R' 'useful_inds.R' +LazyData: false diff --git a/NAMESPACE b/NAMESPACE index 0e9efe7a..f1549711 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,17 +1,45 @@ # Generated by roxygen2: do not edit by hand +export("affected<-") +export("avail<-") +export("border<-") +export("dadid<-") +export("famid<-") +export("fill<-") +export("hints<-") +export("horder<-") +export("id<-") +export("isinf<-") +export("kin<-") +export("momid<-") +export("ped<-") +export("rel<-") +export("scales<-") +export("sex<-") +export("spouse<-") +export("status<-") +export("useful<-") +export(Hints) +export(Ped) export(Pedigree) +export(Rel) +export(Scales) +export(affected) export(align) -export(alignped1) -export(alignped2) -export(alignped3) -export(alignped4) +export(ancestors) +export(anchor_to_factor) export(auto_hint) +export(avail) export(best_hint) export(bit_size) -export(check_hints) +export(border) +export(circfun) +export(code) +export(dadid) export(descendants) +export(famid) export(family_check) +export(fill) export(find_avail_affected) export(find_avail_noninform) export(find_unavailable) @@ -21,12 +49,23 @@ export(generate_border) export(generate_colors) export(generate_fill) export(hints) +export(horder) export(ibd_matrix) +export(id) +export(id1) +export(id2) +export(is_disconnected) +export(is_founder) export(is_informative) +export(is_valid_scales) +export(isinf) +export(kin) export(kindepth) export(kinship) export(make_famid) export(min_dist_inf) +export(momid) +export(na_to_length) export(norm_ped) export(norm_rel) export(num_child) @@ -34,34 +73,114 @@ export(ped) export(ped_to_legdf) export(ped_to_plotdf) export(plot_fromdf) +export(polyfun) +export(polygons) export(rel) export(rel_code_to_factor) export(scales) +export(sex) export(sex_to_factor) export(shrink) -export(trim) +export(spouse) +export(status) export(unrelated) +export(upd_famid_id) +export(useful) export(useful_inds) export(vect_to_binary) +exportClasses(Hints) +exportClasses(Ped) exportClasses(Pedigree) +exportClasses(Rel) +exportClasses(Scales) +exportMethods("[") +exportMethods("affected<-") +exportMethods("avail<-") +exportMethods("border<-") +exportMethods("dadid<-") +exportMethods("famid<-") +exportMethods("fill<-") +exportMethods("hints<-") +exportMethods("horder<-") +exportMethods("id<-") +exportMethods("isinf<-") +exportMethods("kin<-") +exportMethods("mcols<-") +exportMethods("momid<-") +exportMethods("ped<-") +exportMethods("rel<-") +exportMethods("scales<-") +exportMethods("sex<-") +exportMethods("spouse<-") +exportMethods("status<-") +exportMethods("useful<-") +exportMethods(Hints) +exportMethods(Ped) exportMethods(Pedigree) +exportMethods(Rel) +exportMethods(Scales) +exportMethods(affected) exportMethods(as.data.frame) exportMethods(as.list) +exportMethods(auto_hint) +exportMethods(avail) +exportMethods(border) +exportMethods(code) +exportMethods(dadid) +exportMethods(famid) exportMethods(family_check) +exportMethods(fill) +exportMethods(find_avail_affected) +exportMethods(find_avail_noninform) +exportMethods(find_unavailable) exportMethods(fix_parents) exportMethods(generate_colors) +exportMethods(hints) +exportMethods(horder) +exportMethods(id) +exportMethods(id1) +exportMethods(id2) exportMethods(is_informative) -exportMethods(kindepth) +exportMethods(is_parent) +exportMethods(isinf) +exportMethods(kin) exportMethods(kinship) exportMethods(length) -exportMethods(make_famid) +exportMethods(mcols) exportMethods(min_dist_inf) +exportMethods(momid) exportMethods(num_child) +exportMethods(ped) +exportMethods(ped_to_legdf) +exportMethods(ped_to_plotdf) exportMethods(plot) +exportMethods(rel) +exportMethods(scales) +exportMethods(sex) +exportMethods(show) +exportMethods(shrink) +exportMethods(spouse) +exportMethods(status) +exportMethods(subset) +exportMethods(summary) +exportMethods(unrelated) +exportMethods(upd_famid_id) +exportMethods(useful) exportMethods(useful_inds) importFrom(Matrix,bdiag) importFrom(Matrix,forceSymmetric) importFrom(Matrix,sparseMatrix) +importFrom(S4Vectors,'mcols') +importFrom(S4Vectors,'mcols<-') +importFrom(S4Vectors,as.data.frame) +importFrom(S4Vectors,as.list) +importFrom(S4Vectors,cbind_mcols_for_display) +importFrom(S4Vectors,classNameForDisplay) +importFrom(S4Vectors,makeClassinfoRowForCompactPrinting) +importFrom(S4Vectors,mcols) +importFrom(S4Vectors,parallel_slot_names) +importFrom(S4Vectors,subset) +importFrom(S4Vectors,summary) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,group_by) @@ -98,10 +217,13 @@ importFrom(graphics,segments) importFrom(graphics,strheight) importFrom(graphics,strwidth) importFrom(graphics,text) -importFrom(methods,"slot<-") +importFrom(methods,'slot<-') importFrom(methods,as) +importFrom(methods,callNextMethod) +importFrom(methods,is) importFrom(methods,new) -importFrom(methods,slot) +importFrom(methods,show) +importFrom(methods,slotNames) importFrom(methods,validObject) importFrom(plyr,rbind.fill) importFrom(plyr,revalue) @@ -116,3 +238,4 @@ importFrom(tidyr,pivot_longer) importFrom(tidyr,unite) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) +importFrom(testthat, test_that) diff --git a/R/AllAccessors.R b/R/AllAccessors.R new file mode 100644 index 00000000..3d43a638 --- /dev/null +++ b/R/AllAccessors.R @@ -0,0 +1,1241 @@ +#### S4 Accessors #### +#' @include AllConstructor.R +#' @include AllClass.R +#' @importFrom S4Vectors mcols +NULL + +#### S4 Ped Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Ped object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. For each getter, you have a setter with the +#' same name, to be use as `slot(x) <- value`. +#' The `value` parameter is a vector of the same length as `x`, except +#' for the `mcols()` accessors where `value` is a list or a data.frame with +#' each elements with the same length as `x`. + +##### Id Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("id", function(x) { + standardGeneric("id") +}) + +#' @section Accessors: +#' - `id(x)` : Individuals identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("id", signature(x = "Ped"), function(x) { + x@id +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("id<-", function(x, value) { + standardGeneric("id<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("id<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("id must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for id should be: ", + "equal to the length of the Ped object" + ) + } + x@id <- as.character(value) + validObject(x) + x + } +) + +##### Dadid Accessors ##### +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("dadid", function(x) { + standardGeneric("dadid") +}) + +#' @section Accessors: +#' - `dadid(x)` : Individuals' father identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("dadid", signature(x = "Ped"), function(x) { + x@dadid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("dadid<-", function(x, value) { + standardGeneric("dadid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("dadid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("dadid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for dadid should be: ", + "equal to the length of the Ped object" + ) + } + x@dadid <- as.character(value) + validObject(x) + x + } +) + +##### Momid Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("momid", function(x) { + standardGeneric("momid") +}) + +#' @section Accessors: +#' - `momid(x)` : Individuals' mother identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("momid", signature(x = "Ped"), function(x) { + x@momid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("momid<-", function(x, value) { + standardGeneric("momid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("momid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("momid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for momid should be: ", + "equal to the length of the Ped object" + ) + } + x@momid <- as.character(value) + validObject(x) + x + } +) + +##### Famid Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("famid", function(x) { + standardGeneric("famid") +}) + +#' @section Accessors: +#' - `famid(x)` : Individuals' family identifiers +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Ped"), function(x) { + x@famid +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("famid<-", function(x, value) { + standardGeneric("famid<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("famid<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("famid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for famid should be: ", + "equal to the length of the Ped object" + ) + } + x@famid <- as.character(value) + x <- upd_famid_id(x) + validObject(x) + x + } +) + +##### Sex Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("sex", function(x) { + standardGeneric("sex") +}) + +#' @section Accessors: +#' - `sex(x)` : Individuals' gender +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("sex", signature(x = "Ped"), function(x) { + x@sex +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("sex<-", function(x, value) { + standardGeneric("sex<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("sex<-", + signature(x = "Ped", value = "character_OR_integer"), + function(x, value) { + if ( + ! is.character(value) && + ! is.integer(value) && + ! is.factor(value) + ) { + stop("sex must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for sex should be: ", + "equal to the length of the Ped object" + ) + } + x@sex <- sex_to_factor(value) + validObject(x) + x + } +) + +##### Affected Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("affected", function(x) { + standardGeneric("affected") +}) + +#' @section Accessors: +#' - `affected(x)` : Individuals' affection status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("affected", signature(x = "Ped"), function(x) { + x@affected +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("affected<-", function(x, value) { + standardGeneric("affected<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("affected<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for affected should be: ", + "equal to the length of the Ped object" + ) + } + } + x@affected <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Avail Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("avail", function(x) { + standardGeneric("avail") +}) + +#' @section Accessors: +#' - `avail(x)` : Individuals' availability status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("avail", signature(x = "Ped"), function(x) { + x@avail +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("avail<-", function(x, value) { + standardGeneric("avail<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("avail<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for avail should be: ", + "equal to the length of the Ped object" + ) + } + } + x@avail <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Status Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("status", function(x) { + standardGeneric("status") +}) + +#' @section Accessors: +#' - `status(x)` : Individuals' death status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("status", signature(x = "Ped"), function(x) { + x@status +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("status<-", function(x, value) { + standardGeneric("status<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("status<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for avail should be: ", + "equal to the length of the Ped object" + ) + } + } + x@status <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Isinf Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("isinf", function(x) { + standardGeneric("isinf") +}) + +#' @section Accessors: +#' - `isinf(x)` : Individuals' informativeness status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("isinf", signature(x = "Ped"), function(x) { + x@isinf +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("isinf<-", function(x, value) { + standardGeneric("isinf<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("isinf<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for isinf should be: ", + "equal to the length of the Ped object" + ) + } + } + x@isinf <- value + validObject(x) + x + } +) + +##### Kin Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("kin", function(x) { + standardGeneric("kin") +}) + +#' @section Accessors: +#' - `kin(x)` : Individuals' kinship distance to the +#' informative individuals +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("kin", signature(x = "Ped"), function(x) { + x@kin +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("kin<-", function(x, value) { + standardGeneric("kin<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("kin<-", + signature(x = "Ped", value = "numeric"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for kin should be: ", + "equal to the length of the Ped object" + ) + } + } + x@kin <- value + validObject(x) + x + } +) + +##### Useful Accessors ##### + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("useful", function(x) { + standardGeneric("useful") +}) + +#' @section Accessors: +#' - `useful(x)` : Individuals' usefullness status +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("useful", signature(x = "Ped"), function(x) { + x@useful +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setGeneric("useful<-", function(x, value) { + standardGeneric("useful<-") +}) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("useful<-", + signature(x = "Ped", value = "numeric_OR_logical"), + function(x, value) { + if (length(value) != length(x)) { + if (length(value) == 1) { + value <- rep(value, length(x)) + } else { + stop( + "The length of the new values for useful should be: ", + "equal to the length of the Ped object" + ) + } + } + x@useful <- vect_to_binary(value, logical = TRUE) + validObject(x) + x + } +) + +##### Mcols Accessors ##### + +#' @section Accessors: +#' - `mcols(x)` : Individuals' metadata +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("mcols<-", + signature(x = "Ped", value = "list"), + function(x, value) { + mcols(x) <- as(value, "DataFrame") + x + } +) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("mcols<-", + signature(x = "Ped", value = "data.frame"), + function(x, value) { + mcols(x) <- as(value, "DataFrame") + x + } +) + +#### S4 Rel Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Rel object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. + +##### Code Accessors ##### +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("code", function(x) { + standardGeneric("code") +}) + +#' @section Accessors: +#' - `code(x)` : Relationships' code +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("code", signature(x = "Rel"), function(x) { + x@code +}) + +##### Id1 Accessors ##### +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("id1", function(x) { + standardGeneric("id1") +}) + +#' @section Accessors: +#' - `id1(x)` : Relationships' first individuals' identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("id1", signature(x = "Rel"), function(x) { + x@id1 +}) + +##### Id2 Accessors ##### + +#' @rdname Rel-class +#' @usage NULL +#' @export +setGeneric("id2", function(x) { + standardGeneric("id2") +}) + +#' @section Accessors: +#' - `id2(x)` : Relationships' second individuals' identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("id2", signature(x = "Rel"), function(x) { + x@id2 +}) + +#' @section Accessors: +#' - `famid(x)` : Relationships' individuals' family identifier +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Rel"), function(x) { + x@famid +}) + +#' @section Accessors: +#' - `famid(x) <- value` : Set the relationships' individuals' family +#' identifier +#' - `value` : A character or integer vector of the same length as x +#' with the family identifiers +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("famid<-", + signature(x = "Rel", value = "character_OR_integer"), + function(x, value) { + if (! is.character(value) && ! is.integer(value)) { + stop("famid must be a character or integer vector") + } + if (length(value) != length(x)) { + stop( + "The length of the new values for famid should be: ", + "equal to the length of the Ped object" + ) + } + x@famid <- as.character(value) + x <- upd_famid_id(x) + validObject(x) + x + } +) + +#### S4 Pedigree Accessors #### + +#' @section Accessors: +#' For all the following accessors, the `x` parameters is a Pedigree object. +#' Each getters return a vector of the same length as `x` with the values +#' of the corresponding slot. + +#' @section Accessors: +#' - `famid(x)` : Get the family identifiers of a Pedigree object. This +#' function is a wrapper around `famid(ped(x))`. +#' +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("famid", signature(x = "Pedigree"), function(x) { + famid(ped(x)) +}) + +##### S4 ped Accessors ##### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("ped", function(object, slot) { + standardGeneric("ped") +}) + +#' @section Accessors: +#' - `ped(x, slot)` : Get the value of a specific slot of the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped", + signature(object = "Pedigree", slot = "ANY"), + function(object, slot) { + slot(object@ped, slot) + } +) + +#' @section Accessors: +#' - `ped(x)` : Get the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped", + signature(object = "Pedigree", slot = "missing"), + function(object) { + object@ped + } +) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("ped<-", function(object, slot, value) { + standardGeneric("ped<-") +}) + +#' @section Accessors: +#' - `ped(x, slot) <- value` : Set the value of a specific slot of +#' the Ped object +#' Wrapper of `slot(ped(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped<-", + signature(object = "Pedigree", slot = "ANY", value = "ANY"), + function(object, slot, value) { + ped_slots <- c( + "id", "dadid", "momid", "sex", "famid", + "steril", "status", "avail", "affected", + "kin", "useful", "isinf", + "num_child_tot", "num_child_dir", "num_child_ind" + ) + if (! slot %in% ped_slots) { + stop("slot selected: ", slot, " is not a Ped slot") + } + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + slot(object@ped, slot) <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `ped(x) <- value` : Set the Ped object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "ped<-", + signature(object = "Pedigree", slot = "missing", value = "Ped"), + function(object, slot, value) { + object@ped <- value + validObject(object) + object + } +) + +##### S4 mcols Accessors ##### + +#' @section Accessors: +#' - `mcols(x)` : Get the metadata of a Pedigree object. +#' This function is a wrapper around `mcols(ped(x))`. +#' @rdname Pedigree-class +#' @usage NULL +#' @importFrom S4Vectors 'mcols' +#' @export +setMethod( + "mcols", + signature(x = "Pedigree"), + function(x) { + mcols(x@ped) + } +) + +#' @section Accessors: +#' - `mcols(x) <- value` : Set the metadata of a Pedigree object. +#' This function is a wrapper around `mcols(ped(x)) <- value`. +#' @rdname Pedigree-class +#' @usage NULL +#' @importFrom S4Vectors 'mcols<-' +#' @export +setMethod( + "mcols<-", + signature(x = "Pedigree", value = "ANY"), + function(x, value) { + mcols(x@ped) <- value + x + } +) + +##### S4 rel Accessors ##### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("rel", function(object, slot) { + standardGeneric("rel") +}) + +#' @section Accessors: +#' - `rel(x, slot)` : Get the value of a specific slot of the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel", + signature(object = "Pedigree", slot = "ANY"), + function(object, slot) { + slot(object@rel, slot) + } +) + +#' @section Accessors: +#' - `rel(x)` : Get the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel", + signature(object = "Pedigree", slot = "missing"), + function(object, slot) { + object@rel + } +) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("rel<-", function(object, slot, value) { + standardGeneric("rel<-") +}) + +#' @section Accessors: +#' - `rel(x, slot) <- value` : Set the value of a specific slot of the +#' Rel object +#' Wrapper of `slot(rel(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel<-", + signature(object = "Pedigree", slot = "ANY", value = "ANY"), + function(object, slot, value) { + rel_cols <- c("id1", "id2", "code", "famid") + if (! slot %in% rel_cols) { + stop("slot selected: ", slot, " is not a relationship column") + } + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + slot(object@rel, slot) <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `rel(x) <- value` : Set the Rel object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "rel<-", + signature(object = "Pedigree", slot = "missing", value = "Rel"), + function(object, slot, value) { + object@rel <- value + validObject(object) + object + } +) + +##### S4 scales Accessors #### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("scales", function(object) { + standardGeneric("scales") +}) + +#' @section Accessors: +#' - `scales(x)` : Get the Scales object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("scales", signature(object = "Pedigree"), function(object) { + object@scales +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("scales<-", function(object, value) { + standardGeneric("scales<-") +}) + +#' @section Accessors: +#' - `scales(x) <- value` : Set the Scales object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "scales<-", signature(object = "Pedigree", value = "Scales"), + function(object, value) { + object@scales <- value + object + } +) + +#### S4 fill Accessors #### + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("fill", function(object) { + standardGeneric("fill") +}) + +#' @section Accessors: +#' - `fill(x)` : Get the fill data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod("fill", + signature(object = "Scales"), + function(object) { + object@fill + } +) + +#' @section Accessors: +#' - `fill(x)` : Get the fill data.frame from the Scales object. +#' Wrapper of `fill(scales(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("fill", + signature(object = "Pedigree"), + function(object) { + fill(scales(object)) + } +) + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("fill<-", function(object, value) { + standardGeneric("fill<-") +}) + +#' @section Accessors: +#' - `fill(x) <- value` : Set the fill data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod( + "fill<-", + signature(object = "Scales", value = "data.frame"), + function(object, value) { + object@fill <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `fill(x) <- value` : Set the fill data.frame from the Scales object. +#' Wrapper of `fill(scales(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "fill<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + fill(scales(object)) <- value + validObject(object) + object + } +) + +#### S4 border Accessors #### + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("border", function(object) { + standardGeneric("border") +}) + +#' @section Accessors: +#' - `border(x)` : Get the border data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod("border", + signature(object = "Scales"), + function(object) { + object@border + } +) + +#' @section Accessors: +#' - `border(x)` : Get the border data.frame from the Scales object. +#' Wrapper of `border(scales(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("border", + signature(object = "Pedigree"), + function(object) { + border(scales(object)) + } +) + +#' @rdname Scales-class +#' @usage NULL +#' @export +setGeneric("border<-", function(object, value) { + standardGeneric("border<-") +}) + +#' @section Accessors: +#' - `border(x) <- value` : Set the border data.frame +#' @rdname Scales-class +#' @usage NULL +#' @export +setMethod( + "border<-", + signature(object = "Scales", value = "data.frame"), + function(object, value) { + object@border <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `border(x) <- value` : Set the border data.frame from the Scales object. +#' Wrapper of `border(scales(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "border<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + border(scales(object)) <- value + validObject(object) + object + } +) + +#### S4 hints Accessors #### + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("hints", function(object) { + standardGeneric("hints") +}) + +#' @section Accessors: +#' - `hints(x)` : Get the Hints object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("hints", signature(object = "Pedigree"), function(object) { + object@hints +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("hints<-", function(object, value) { + standardGeneric("hints<-") +}) + +#' @section Accessors: +#' - `hints(x) <- value` : Set the Hints object +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("hints<-", signature(object = "Pedigree", value = "Hints"), function( + object, value +) { + object@hints <- value + validObject(object) + object +}) + +#### S4 horder Accessors #### + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("horder", function(object) { + standardGeneric("horder") +}) + +#' @section Accessors: +#' - `horder(x)` : Get the horder vector +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("horder", "Hints", function(object) { + object@horder +}) + +#' @section Accessors: +#' - `horder(x)` : Get the horder vector from the Hints object. +#' Wrapper of `horder(hints(x))` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("horder", "Pedigree", function(object) { + horder(hints(object)) +}) + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("horder<-", function(object, value) { + standardGeneric("horder<-") +}) + +#' @section Accessors: +#' - `horder(x) <- value` : Set the horder vector +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod( + "horder<-", + signature(object = "Hints", value = "ANY"), + function(object, value) { + if (length(value) > 0 && is.null(names(value))) { + stop("horder must be named") + } + object@horder <- value + validObject(object) + object + } +) + +#' @section Accessors: +#' - `horder(x) <- value` : Set the horder vector from the Hints object. +#' Wrapper of `horder(hints(x)) <- value` +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "horder<-", + signature(object = "Pedigree", value = "ANY"), + function(object, value) { + if (length(value) != length(object)) { + stop( + "The length of the new value should be: ", + "equal to the length of the pedigree" + ) + } + horder(hints(object)) <- value + validObject(object) + object + } +) + +#### S4 spouse Accessors #### + +#' @rdname Hints-class +#' @usage NULL +#' @export +setGeneric("spouse", function(object) { + standardGeneric("spouse") +}) + +#' @section Accessors: +#' - `spouse(x)` : Get the spouse data.frame +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("spouse", signature(object = "Hints"), function(object) { + object@spouse +}) + +#' @section Accessors: +#' - `spouse(x)` : Get the spouse data.frame from the Hints object. +#' Wrapper of `spouse(hints(x))`. +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod("spouse", signature(object = "Pedigree"), function(object) { + spouse(hints(object)) +}) + +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setGeneric("spouse<-", function(object, value) { + standardGeneric("spouse<-") +}) + +#' @section Accessors: +#' - `spouse(x) <- value` : Set the spouse data.frame +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod( + "spouse<-", + signature(object = "Hints", value = "data.frame"), + function(object, value) { + df <- check_columns(value, c("idl", "idr", "anchor")) + df$anchor <- anchor_to_factor(df$anchor) + object@spouse <- df + validObject(object) + object + } +) + +#' @section Accessors: +#' - `spouse(x) <- value` : Set the spouse data.frame from the Hints object. +#' Wrapper of `spouse(hints(x)) <- value`. +#' @rdname Pedigree-class +#' @usage NULL +#' @export +setMethod( + "spouse<-", + signature(object = "Pedigree", value = "data.frame"), + function(object, value) { + spouse(hints(object)) <- value + validObject(object) + object + } +) diff --git a/R/AllClass.R b/R/AllClass.R new file mode 100644 index 00000000..c79d2569 --- /dev/null +++ b/R/AllClass.R @@ -0,0 +1,253 @@ +#' @importFrom S4Vectors parallel_slot_names +NULL + +#### Class integer / character #### + +setClassUnion("character_OR_integer", c("character", "integer")) +setClassUnion("numeric_OR_logical", c("numeric", "logical")) +setClassUnion("missing_OR_NULL", c("missing", "NULL")) + +#### Ped Class #### + +#' Ped object +#' +#' S4 class to represent the identity informations of the individuals +#' in a pedigree. +#' +#' The minimal needed informations are `id`, `dadid`, `momid` and `sex`. +#' The other slots are used to store recognized informations. +#' Additional columns can be added to the Ped object and will be +#' stored in the `elementMetadata` slot of the Ped object. +#' +#' @slot id A character vector with the id of the individuals. +#' @slot dadid A character vector with the id of the father of the individuals. +#' @slot momid A character vector with the id of the mother of the individuals. +#' @slot sex An ordered factor vector for the sex of the individuals +#' (i.e. `male` < `female` < `unknown` < `terminated`). +#' @slot famid A character vector with the family identifiers of the +#' individuals (optional). +#' @slot steril A logical vector with the sterilisation status of the +#' individuals +#' (i.e. `FALSE` = not sterilised, `TRUE` = sterilised, `NA` = unknown). +#' @slot status A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = alive, `TRUE` = dead, `NA` = unknown). +#' @slot avail A logical vector with the availability status of the +#' individuals +#' (i.e. `FALSE` = not available, `TRUE` = available, `NA` = unknown). +#' @slot affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = not affected, `TRUE` = affected, `NA` = unknown). +#' @slot useful A logical vector with the usefulness status of the +#' individuals +#' (i.e. `FALSE` = not useful, `TRUE` = useful). +#' @slot isinf A logical vector indicating if the individual is informative +#' or not +#' (i.e. `FALSE` = not informative, `TRUE` = informative). +#' @slot kin A numeric vector with minimal kinship value between the +#' individuals and the useful individuals. +#' @slot num_child_tot A numeric vector with the total number of children +#' of the individuals. +#' @slot num_child_dir A numeric vector with the number of children +#' of the individuals. +#' @slot num_child_ind A numeric vector with the number of children +#' of the individuals. +#' @slot elementMetadata A DataFrame with the additional metadata columns +#' of the Ped object. +#' @slot metadata Meta informations about the pedigree. +#' +#' @seealso [Pedigree()] +#' @name Ped-class +#' @export +setClass("Ped", + contains = "Vector", + slots = c( + id = "character", + dadid = "character", + momid = "character", + sex = "factor", + famid = "character", + steril = "logical", + status = "logical", + avail = "logical", + affected = "logical", + useful = "logical", + kin = "numeric", + isinf = "logical", + num_child_tot = "numeric", + num_child_dir = "numeric", + num_child_ind = "numeric" + ) +) + +#' @importFrom S4Vectors parallel_slot_names +setMethod("parallel_slot_names", "Ped", + function(x) { + c( + "id", "dadid", "momid", "sex", "famid", + "steril", "status", "avail", "affected", + "useful", "kin", "isinf", + "num_child_tot", "num_child_dir", "num_child_ind", + callNextMethod() + ) + } +) + +setValidity("Ped", is_valid_ped) + +#### Rel Class #### + +#' Rel object +#' +#' S4 class to represent the special relationships in a Pedigree. +#' +#' A Rel object is a list of special relationships +#' between individuals in the pedigree. +#' It is used to create a Pedigree object. +#' The minimal needed informations are `id1`, `id2` and `code`. +#' +#' If a `famid` is provided, the individuals `id` will be aggregated +#' to the `famid` character to ensure the uniqueness of the `id`. +#' +#' @slot id1 A character vector with the id of the first individual. +#' @slot id2 A character vector with the id of the second individual. +#' @slot code An ordered factor vector with the code of the special +#' relationship. +#' +#' (i.e. `MZ twin` < `DZ twin` < `UZ twin` < `Spouse`). +#' @slot famid A character vector with the famid of the individuals. +#' +#' @seealso [Pedigree()] +#' @name Rel-class +#' @export +setClass("Rel", + contains = "Vector", + slots = c( + id1 = "character", + id2 = "character", + code = "factor", + famid = "character" + ) +) + +setMethod("parallel_slot_names", "Rel", + function(x) { + c( + "id1", "id2", "code", "famid", + callNextMethod() + ) + } +) + +setValidity("Rel", is_valid_rel) + +#### Hints Class #### + +#' Hints object +#' +#' The hints are used to specify the order of the individuals in the pedigree +#' and to specify the order of the spouses. +#' +#' @slot horder A numeric named vector with one element per subject in the +#' Pedigree. It determines the relative horizontal order of subjects within +#' a sibship, as well as the relative order of processing for the founder +#' couples. (For this latter, the female founders are ordered as though they +#' were sisters). +#' @slot spouse A data.frame with one row per hinted marriage, usually +#' only a few marriages in a Pedigree will need an added hint, for +#' instance reverse the plot order of a husband/wife pair. +#' Each row contains the identifiers of the left spouse, the right hand spouse, +#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). +#' +#' @seealso [Pedigree()] +#' @rdname Hints-class +#' @export +setClass("Hints", + representation( + horder = "numeric", + spouse = "data.frame" + ) +) + +setValidity("Hints", is_valid_hints) + +#### Scale Class #### + +#' Scales object +#' +#' A Scales object is a list of two data.frame. +#' The first one is used to represent the affection status of the individuals +#' and therefore the filling of the individuals in the pedigree plot. +#' The second one is used to represent the availability status of the +#' individuals and therefore the border color of the individuals in the +#' pedigree plot. +#' +#' @slot fill A data.frame with the informations for the affection status. +#' The columns needed are: +#' - 'order': the order of the affection to be used +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'affected': a logical value indicating if the mod correspond to an affected +#' individuals +#' - 'fill': the color to use for this mods +#' - 'density': the density of the shading +#' - 'angle': the angle of the shading +#' @slot border A data.frame with the informations for the availability status. +#' The columns needed are: +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'border': the color to use for this mods +#' +#' @seealso [Pedigree()] +#' @docType class +#' @rdname Scales-class +#' @export +setClass("Scales", + representation( + fill = "data.frame", + border = "data.frame" + ) +) + +setValidity("Scales", is_valid_scales) + +#### Pedigree Class #### +#' Pedigree object +#' +#' A pedigree is a ensemble of individuals linked to each other into +#' a family tree. +#' A Pedigree object store the informations of the individuals and the +#' special relationships between them. It also permit to store the +#' informations needed to plot the pedigree (i.e. scales and hints). +#' +#' @slot ped A Ped object for the identity informations. See [Ped()] for +#' more informations. +#' @slot rel A Rel object for the special relationships. See [Rel()] for +#' more informations. +#' @slot scales A Scales object for the filling and bordering +#' colors used in the plot. See [Scales()] for more informations. +#' @slot hints A Hints object for the ordering of the +#' individuals in the plot. See [Hints()] for more informations. +#' +#' @seealso [Pedigree()], [Ped()], [Rel()], [Scales()], [Hints()] +#' @rdname Pedigree-class +#' @include AllValidity.R +#' @export +setClass("Pedigree", + representation( + ped = "Ped", # identity data + rel = "Rel", # special relationships + scales = "Scales", # scales for the plot + hints = "Hints" # hints for the plot + ) +) + +setValidity("Pedigree", is_valid_pedigree) diff --git a/R/AllConstructor.R b/R/AllConstructor.R new file mode 100644 index 00000000..2f95ecb8 --- /dev/null +++ b/R/AllConstructor.R @@ -0,0 +1,889 @@ +#' @importFrom methods callNextMethod is new 'slot<-' validObject +NULL + +#' NA to specific length +#' +#' Check if all value in a vector is `NA`. +#' If so set all of them to a new value matching the length +#' of the template. +#' If not check that the size of the vector is equal to +#' the template. +#' +#' @param x The vector to check. +#' @param temp A template vector to use to determine the length. +#' @param value The value to use to fill the vector. +#' +#' @return A vector with the same length as temp. +#' @keywords internal +#' @examples +#' +#' na_to_length(NA, rep(0, 4), "NewValue") +#' na_to_length(c(1, 2, 3, NA), rep(0, 4), "NewValue") +#' @export +na_to_length <- function(x, temp, value) { + if (length(x) == 1 && all(is.na(x))) { + rep(value, length(temp)) + } else { + if (length(x) != length(temp)) { + stop("The length of the vector should be: ", + "equal to the length of the template vector" + ) + } + x + } +} + +#### S4 Ped constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide a vector of the same size for each slot +#' or a `data.frame` with the corresponding columns. +#' +#' The metadata will correspond to the columns that do not correspond +#' to the Ped slots. +#' +#' @param obj A character vector with the id of the individuals or a +#' `data.frame` with all the informations in corresponding columns. +#' @param dadid A vector containing for each subject, the identifiers of the +#' biologicals fathers. +#' @param momid A vector containing for each subject, the identifiers of the +#' biologicals mothers. +#' @param famid A character vector with the family identifiers of the +#' individuals. If provide, will be aggregated to the individuals +#' identifiers separated by an underscore. +#' @param sex A character, factor or numeric vector corresponding to +#' the gender of the individuals. This will be transformed to an ordered factor +#' with the following levels: `male` < `female` < `unknown` < `terminated +#' The following values are recognized: +#' - character() or factor() : "f", "m", "woman", "man", "male", "female", +#' "unknown", "terminated" +#' - numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +#' @param steril A logical vector with the sterilisation status of the +#' individuals +#' (i.e. `FALSE` = not sterilised, `TRUE` = sterilised, `NA` = unknown). +#' @param status A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = alive, `TRUE` = dead, `NA` = unknown). +#' @param avail A logical vector with the availability status of the +#' individuals +#' (i.e. `FALSE` = not available, `TRUE` = available, `NA` = unknown). +#' @param affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = unaffected, `TRUE` = affected, `NA` = unknown). +#' @param missid A character vector with the missing values identifiers. +#' All the id, dadid and momid corresponding to those values will be set +#' to `NA_character_`. +#' @inheritParams check_columns +#' @return A Ped object. +#' @rdname Ped-class +#' @export +#' @include utils.R +#' @usage NULL +setGeneric("Ped", signature = "obj", function(obj, ...) { + standardGeneric("Ped") +}) + +#' @rdname Ped-class +#' @examples +#' +#' data(sampleped) +#' Ped(sampleped) +#' @export +setMethod("Ped", "data.frame", + function(obj, cols_used_init = FALSE, cols_used_del = FALSE) { + col_need <- c("id", "sex", "dadid", "momid") + col_to_use <- c( + "famid", "steril", "status", "avail", "affected", + "kin", "isinf", "useful" + ) + col_used <- c( + "num_child_tot", "num_child_dir", "num_child_ind", + "elementMetadata" + ) + df <- check_columns( + obj, col_need, col_used, col_to_use, + others_cols = TRUE, cols_to_use_init = TRUE, + cols_used_init = cols_used_init, cols_used_del = cols_used_del + ) + + df$famid[is.na(df$famid)] <- NA_character_ + + df$steril <- vect_to_binary(df$steril, logical = TRUE) + df$status <- vect_to_binary(df$status, logical = TRUE) + df$avail <- vect_to_binary(df$avail, logical = TRUE) + df$affected <- vect_to_binary(df$affected, logical = TRUE) + myped <- with(df, Ped( + obj = id, sex = sex, dadid = dadid, momid = momid, + famid = famid, + steril = steril, status = status, avail = avail, + affected = affected + )) + mcols(myped) <- df[, + colnames(df)[!colnames(df) %in% slotNames(myped)] + ] + rownames(mcols(myped)) <- df$id + myped + } +) + +#' @rdname Ped-class +#' @examples +#' +#' Ped( +#' obj = c("1", "2", "3", "4", "5", "6"), +#' dadid = c("4", "4", "6", "0", "0", "0"), +#' momid = c("5", "5", "5", "0", "0", "0"), +#' sex = c(1, 2, 3, 1, 2, 1), +#' missid = "0" +#' ) +#' @export +setMethod("Ped", "character_OR_integer", + function( + obj, sex, dadid, momid, famid = NA, + steril = NA, status = NA, avail = NA, + affected = NA, missid = NA_character_ + ) { + famid <- na_to_length(famid, obj, NA_character_) + id <- as.character(obj) + dadid <- as.character(dadid) + momid <- as.character(momid) + + id[id %in% missid] <- NA_character_ + dadid[dadid %in% missid] <- NA_character_ + momid[momid %in% missid] <- NA_character_ + + sex <- sex_to_factor(sex) + + steril <- na_to_length(steril, id, NA) + status <- na_to_length(status, id, NA) + avail <- na_to_length(avail, id, NA) + affected <- na_to_length(affected, id, NA) + useful <- na_to_length(NA, id, NA) + isinf <- na_to_length(NA, id, NA) + kin <- na_to_length(NA, id, NA_real_) + + df_child <- num_child(id, dadid, momid, rel_df = NULL) + + new( + "Ped", + id = id, dadid = dadid, momid = momid, famid = famid, + sex = sex, steril = steril, status = status, avail = avail, + affected = affected, + useful = useful, kin = kin, isinf = isinf, + num_child_tot = df_child$num_child_tot, + num_child_dir = df_child$num_child_dir, + num_child_ind = df_child$num_child_ind + ) + } +) + +#' @rdname Ped-class +#' @usage NULL +#' @export +setMethod("Ped", "missing", + function(obj) { + new("Ped") + } +) + +#### S4 Rel constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide a vector of the same size for each slot +#' or a `data.frame` with the corresponding columns. +#' +#' @param obj A character vector with the id of the first individuals of each +#' pairs or a `data.frame` with all the informations in corresponding columns. +#' @param id2 A character vector with the id of the second individuals of each +#' pairs +#' @param code A character, factor or numeric vector corresponding to +#' the relation code of the individuals: +#' - MZ twin = Monozygotic twin +#' - DZ twin = Dizygotic twin +#' - UZ twin = twin of unknown zygosity +#' - Spouse = Spouse +#' The following values are recognized: +#' - character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with +#' of without space between the words. The case is not important. +#' - numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +#' @inheritParams Ped +#' +#' @return A Rel object. +#' @rdname Rel-class +#' @export +#' @usage NULL +setGeneric("Rel", signature = "obj", function(obj, ...) { + standardGeneric("Rel") +}) + +#' @rdname Rel-class +#' @export +#' @examples +#' +#' rel_df <- data.frame( +#' id1 = c("1", "2", "3"), +#' id2 = c("2", "3", "4"), +#' code = c(1, 2, 3) +#' ) +#' Rel(rel_df) +setMethod("Rel", "data.frame", + function(obj) { + col_need <- c("id1", "id2", "code") + col_to_use <- c("famid") + df <- check_columns( + obj, col_need, NULL, col_to_use, + cols_to_use_init = TRUE + ) + + with(df, Rel( + obj = id1, id2 = id2, code = code, famid = as.character(famid) + )) + } +) + +#' @rdname Rel-class +#' @export +#' @examples +#' +#' Rel( +#' obj = c("1", "2", "3"), +#' id2 = c("2", "3", "4"), +#' code = c(1, 2, 3) +#' ) +setMethod("Rel", "character_OR_integer", + function( + obj, id2, code, famid = NA_character_ + ) { + famid <- na_to_length(famid, obj, NA_character_) + id1 <- as.character(obj) + id2 <- as.character(id2) + + ## Reorder id1 and id2 + ## id1 is the first in the alphabetic order + ## id2 is the second in the alphabetic order + id1o <- pmin(id1, id2) + id2o <- pmax(id1, id2) + + code <- rel_code_to_factor(code) + + rel <- new( + "Rel", + id1 = id1o, id2 = id2o, code = code, famid = famid + ) + rel + } +) + +#' @rdname Rel-class +#' @usage NULL +#' @export +setMethod("Rel", "missing", + function(obj) { + new("Rel") + } +) + +#### S4 Hints constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You either need to provide **horder** or **spouse** in +#' the dedicated parameters (together or separately), or inside a list. +#' +#' @param horder A named numeric vector with one element per subject in the +#' Pedigree. It determines the relative horizontal order of subjects within a +#' sibship, as well as the relative order of processing for the founder couples. +#' (For this latter, the female founders are ordered as though +#' they were sisters). +#' The names of the vector should be the individual identifiers. +#' @param spouse A data.frame with one row per hinted marriage, usually only +#' a few marriages in a pedigree will need an added hint, for instance reverse +#' the plot order of a husband/wife pair. +#' Each row contains the id of the left spouse (i.e. `idl`), the id of the +#' right hand spouse (i.e. `idr`), and the anchor (i.e : `anchor` : +#' `1` = left, `2` = right, `0` = either). +#' Children will preferentially appear under the parents of the anchored spouse. +#' +#' @return A Hints object. +#' @rdname Hints-class +#' @export +setGeneric("Hints", function(horder, spouse) { + standardGeneric("Hints") +}) + +#' @rdname Hints-class +#' @usage NULL +#' @export +setMethod("Hints", + signature(horder = "Hints", spouse = "missing_OR_NULL"), + function(horder, spouse) { + hints + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' list( +#' horder = c("1" = 1, "2" = 2, "3" = 3), +#' spouse = data.frame( +#' idl = c("1", "2"), +#' idr = c("2", "3"), +#' anchor = c(1, 2) +#' ) +#' ) +#' ) +setMethod("Hints", + signature(horder = "list", spouse = "missing_OR_NULL"), + function(horder, spouse) { + if (all(!c("horder", "spouse") %in% names(horder))) { + stop("hints is a list, ", + "but doesn't contains horder or spouse slot" + ) + } + if ("horder" %in% names(horder)) { + horder <- horder$horder + } else { + horder <- NULL + } + if ("spouse" %in% names(horder)) { + spouse <- horder$spouse + } else { + spouse <- NULL + } + Hints(horder, spouse) + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' horder = c("1" = 1, "2" = 2, "3" = 3), +#' spouse = data.frame( +#' idl = c("1", "2"), +#' idr = c("2", "3"), +#' anchor = c(1, 2) +#' ) +#' ) +setMethod("Hints", + signature(horder = "numeric", spouse = "data.frame"), + function(horder, spouse) { + if (length(horder) > 0 && ( + is.null(names(horder)) || any(!is.numeric(horder)) + )) { + stop("horder must be a named numeric vector") + } + spouse <- check_columns( + spouse, c("idl", "idr", "anchor"), NULL, NULL, + cols_to_use_init = TRUE + ) + spouse$anchor <- anchor_to_factor(spouse$anchor) + new("Hints", horder = horder, spouse = spouse) + } +) + +#' @rdname Hints-class +#' @export +#' @examples +#' +#' Hints( +#' horder = c("1" = 1, "2" = 2, "3" = 3) +#' ) +setMethod("Hints", + signature(horder = "numeric", spouse = "missing_OR_NULL"), + function(horder, spouse) { + if (length(horder) > 0 && ( + is.null(names(horder)) || any(!is.numeric(horder)) + )) { + stop("horder must be a named numeric vector") + } + dfe <- data.frame("idl" = character(), "idr" = character(), + "anchor" = factor() + ) + new("Hints", horder = horder, spouse = dfe) + } +) + +#' @rdname Hints-class +#' @export +#' @usage NULL +setMethod("Hints", + signature(horder = "missing_OR_NULL", spouse = "missing_OR_NULL"), + function(horder, spouse) { + dfe <- data.frame("idl" = character(), "idr" = character(), + "anchor" = factor() + ) + new("Hints", horder = numeric(), spouse = dfe) + } +) + +#### S4 Scales constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' You need to provide both **fill** and **border** in the dedicated parameters. +#' However this is usually done using the [generate_colors()] function with a +#' Pedigree object. +#' +#' @param fill A data.frame with the informations for the affection status. +#' The columns needed are: +#' - 'order': the order of the affection to be used +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'affected': a logical value indicating if the mod correspond to an affected +#' individuals +#' - 'fill': the color to use for this mods +#' - 'density': the density of the shading +#' - 'angle': the angle of the shading +#' @param border A data.frame with the informations for the availability status. +#' The columns needed are: +#' - 'column_values': name of the column containing the raw values in the +#' Ped object +#' - 'column_mods': name of the column containing the mods of the transformed +#' values in the Ped object +#' - 'mods': all the different mods +#' - 'labels': the corresponding labels of each mods +#' - 'border': the color to use for this mods +#' +#' @return A Scales object. +#' @seealso [generate_colors()] +#' @rdname Scales-class +#' @export +setGeneric("Scales", function(fill, border) { + standardGeneric("Scales") +}) + +#' @rdname Scales-class +#' @export +#' @examples +#' +#' Scales( +#' fill = data.frame( +#' order = 1, +#' column_values = "affected", +#' column_mods = "affected_mods", +#' mods = c(0, 1), +#' labels = c("unaffected", "affected"), +#' affected = c(FALSE, TRUE), +#' fill = c("white", "red"), +#' density = c(NA, 20), +#' angle = c(NA, 45) +#' ), +#' border = data.frame( +#' column_values = "avail", +#' column_mods = "avail_mods", +#' mods = c(0, 1), +#' labels = c("not available", "available"), +#' border = c("black", "blue") +#' ) +#' ) +setMethod("Scales", + signature(fill = "data.frame", border = "data.frame"), + function(fill, border) { + fill <- check_columns( + fill, c( + "order", "column_values", "column_mods", "mods", + "labels", "affected", "fill", "density", "angle" + ), NULL, NULL + ) + border <- check_columns( + border, + c("column_values", "column_mods", "mods", "labels", "border"), + NULL, NULL + ) + new("Scales", fill = fill, border = border) + } +) + +#' @rdname Scales-class +#' @export +#' @usage NULL +setMethod("Scales", + signature(fill = "missing", border = "missing"), + function(fill, border) { + fill <- data.frame( + order = numeric(), + column_values = character(), + column_mods = character(), + mods = numeric(), + labels = character(), + affected = logical(), + fill = character(), + density = numeric(), + angle = numeric() + ) + border <- data.frame( + column_values = character(), + column_mods = character(), + mods = numeric(), + labels = character(), + border = character() + ) + new("Scales", fill = fill, border = border) + } +) + +#### S4 Pedigree constructor #### + +#' @description +#' +#' ## Constructor : +#' +#' Main constructor of the package. +#' This constructor help to create a `Pedigree` object from +#' different `data.frame` or a set of vectors. +#' +#' If any errors are found in the data, the function will return +#' the data.frame with the errors of the Ped object and the +#' Rel object. +#' +#' @details +#' If the normalization is set to `TRUE`, then the data will be +#' standardized using the function `norm_ped()` and `norm_rel()`. +#' +#' If a data.frame is given, the columns names needed will depend if +#' the normalization is selected or not. If the normalization is selected, +#' the columns names needed are as follow and if not the columns names +#' needed are in parenthesis: +#' - `indID`: the individual identifier (`id`) +#' - `fatherId`: the identifier of the biological father (`dadid`) +#' - `motherId`: the identifier of the biological mother (`momid`) +#' - `gender`: the sex of the individual (`sex`) +#' - `family`: the family identifier of the individual (`famid`) +#' - `sterilisation`: the sterilisation status of the individual (`steril`) +#' - `available`: the availability status of the individual (`avail`) +#' - `vitalStatus`: the death status of the individual (`status`) +#' - `affection`: the affection status of the individual (`affected`) +#' - `...`: other columns that will be stored in the `elementMetadata` slot +#' +#' The minimum columns required are : +#' - `indID` / `id` +#' - `fatherId` / `dadid` +#' - `motherId` / `momid` +#' - `gender` / `sex` +#' +#' The `family` / `famid` column can also be used to specify the family of the +#' individuals and will be merge to the `indId` / `id` field separated by an +#' underscore. +#' The columns `sterilisation`, `available`, `vitalStatus`, `affection` +#' will be transformed with the [vect_to_binary()] function when the +#' normalisation is selected. +#' If you do not use the normalisation, the columns will be checked to +#' be `0` or `1`. +#' +#' @param obj A vector of the individuals identifiers or a data.frame +#' with the individuals informations. See [Ped()] for more informations. +#' @param rel_df A data.frame with the special relationships between +#' individuals. See [Rel()] for more informations. +#' The minimum columns required are `id1`, `id2` and `code`. +#' The `famid` column can also be used to specify the family +#' of the individuals. +#' If a matrix is given, the columns needs to be ordered as +#' `id1`, `id2`, `code` and `famid`. +#' The code values are: +#' - `1` = Monozygotic twin +#' - `2` = Dizygotic twin +#' - `3` = twin of unknown zygosity +#' - `4` = Spouse +#' +#' The value relation code recognized by the function are the one defined +#' by the [rel_code_to_factor()] function. +#' +#' @param hints A Hints object or a named list containing `horder` and +#' `spouse`. +#' @param cols_ren_ped A named list with the columns to rename for the +#' pedigree dataframe. This is useful if you want to use a dataframe with +#' different column names. The names of the list should be the new column +#' names and the values should be the old column names. The default values +#' are to be used with `normalize = TRUE`. +#' @param cols_ren_rel A named list with the columns to rename for the +#' relationship matrix. This is useful if you want to use a dataframe with +#' different column names. The names of the list should be the new column +#' names and the values should be the old column names. +#' @param normalize A logical to know if the data should be normalised. +#' @inheritDotParams generate_colors +#' @inheritParams Ped +#' @inheritParams is_informative +#' +#' @return A Pedigree object. +#' @export +#' @rdname Pedigree-class +#' @seealso [Ped()], [Rel()], [Scales()] +setGeneric("Pedigree", signature = "obj", + function(obj, ...) standardGeneric("Pedigree") +) + +#' @export +#' @rdname Pedigree-class +#' @param affected A logical vector with the affection status of the +#' individuals +#' (i.e. `FALSE` = unaffected, `TRUE` = affected, `NA` = unknown). +#' Can also be a data.frame with the same length as `obj`. If it is a +#' matrix, it will be converted to a data.frame and the columns will be +#' named after the `col_aff` argument. +#' @details +#' If `affected` is a data.frame, **col_aff** will be overwritten by the column +#' names of the data.frame. +#' @inheritParams generate_colors +#' @examples +#' +#' Pedigree( +#' obj = c("1", "2", "3", "4", "5", "6"), +#' dadid = c("4", "4", "6", "0", "0", "0"), +#' momid = c("5", "5", "5", "0", "0", "0"), +#' sex = c(1, 2, 3, 1, 2, 1), +#' avail = c(0, 1, 0, 1, 0, 1), +#' affected = matrix(c( +#' 0, 1, 0, 1, 0, 1, +#' 1, 1, 1, 1, 1, 1 +#' ), ncol = 2), +#' col_aff = c("aff1", "aff2"), +#' missid = "0", +#' rel_df = matrix(c( +#' "1", "2", 2 +#' ), ncol = 3, byrow = TRUE), +#' ) +setMethod("Pedigree", "character_OR_integer", function(obj, dadid, momid, + sex, famid = NA, avail = NULL, affected = NULL, status = NULL, + steril = NULL, rel_df = NULL, + missid = NA_character_, col_aff = "affection", normalize = TRUE, ... +) { + n <- length(obj) + ## Code transferred from noweb to markdown vignette. + ## Sections from the noweb/vignettes are noted here with + ## Doc: Error and Data Checks + ## Doc: Errors1 + if (length(momid) != n) stop("Mismatched lengths, id and momid") + if (length(dadid) != n) stop("Mismatched lengths, id and momid") + if (length(sex) != n) stop("Mismatched lengths, id and sex") + if (length(steril) != n & !is.null(steril)) { + stop("Mismatched lengths, id and steril") + } + + if (length(avail) != n & !is.null(avail)) { + stop("Mismatched lengths, id and avail") + } + if (length(status) != n & !is.null(status)) { + stop("Mismatched lengths, id and status") + } + + ped_df <- data.frame( + family = famid, + indId = obj, + fatherId = dadid, + motherId = momid, + gender = sex + ) + + if (is.null(affected)) { + ped_df[col_aff] <- NA + } else if (any(!is.na(affected))) { + if (is.vector(affected)) { + ped_df[col_aff] <- affected + } else if (is.data.frame(affected)) { + ped_df <- cbind(ped_df, affected) + col_aff <- colnames(affected) + } else if (is.matrix(affected)) { + affected <- as.data.frame(affected) + if (is.null(colnames(affected))) { + if (length(col_aff) != ncol(affected)) { + stop("The length of col_aff should be equal to the number", + "of columns of affected" + ) + } + colnames(affected) <- col_aff + } + ped_df <- cbind(ped_df, affected) + col_aff <- colnames(affected) + } else { + stop("Affected must be a vector or a data.frame, got:", + class(affected) + ) + } + } + if (any(!is.na(avail))) { + ped_df$available <- avail + } + if (any(!is.na(status))) { + ped_df$vitalStatus <- status + } + if (any(!is.na(steril))) { + ped_df$sterilisation <- steril + } + if (is.null(rel_df)) { + rel_df <- data.frame( + id1 = character(), + id2 = character(), + code = numeric(), + family = character() + ) + } + Pedigree(ped_df, rel_df = rel_df, + missid = missid, col_aff = col_aff, ... + ) +}) + +#' @export +#' @rdname Pedigree-class +#' @examples +#' +#' data(sampleped) +#' Pedigree(sampleped) +setMethod("Pedigree", "data.frame", function( + obj = data.frame( + indId = character(), + fatherId = character(), + motherId = character(), + gender = numeric(), + family = character(), + available = numeric(), + vitalStatus = numeric(), + affection = numeric(), + sterilisation = numeric() + ), + rel_df = data.frame( + id1 = character(), + id2 = character(), + code = numeric(), + famid = character() + ), + cols_ren_ped = list( + indId = "id", + fatherId = "dadid", + motherId = "momid", + family = "famid", + gender = "sex", + sterilisation = "steril", + affection = "affected", + available = "avail", + vitalStatus = "status" + ), + cols_ren_rel = list( + id1 = "indId1", + id2 = "indId2", + famid = "family" + ), + hints = list( + horder = NULL, + spouse = NULL + ), + normalize = TRUE, + missid = NA_character_, + col_aff = "affection", + ... +) { + ped_df <- obj + if (!is.data.frame(ped_df)) { + stop("ped_df must be a data.frame") + } + + if (is.matrix(rel_df)) { + rel_mat <- rel_df + rel_df <- data.frame( + id1 = rel_mat[, 1], + id2 = rel_mat[, 2], + code = rel_mat[, 3] + ) + if (dim(rel_mat)[2] > 3) { + rel_df$family <- rel_mat[, 4] + } + } + + if (!is.data.frame(rel_df)) { + stop("relation must be a matrix or a data.frame") + } + + ## Rename columns ped + old_cols <- as.vector(unlist(cols_ren_ped)) + new_cols <- names(cols_ren_ped) + cols_to_ren <- match(old_cols, names(ped_df)) + names(ped_df)[cols_to_ren[!is.na(cols_to_ren)]] <- + new_cols[!is.na(cols_to_ren)] + + ## Rename columns rel + old_cols <- as.vector(unlist(cols_ren_rel)) + new_cols <- names(cols_ren_rel) + cols_to_ren <- match(old_cols, names(rel_df)) + names(rel_df)[cols_to_ren[!is.na(cols_to_ren)]] <- + new_cols[!is.na(cols_to_ren)] + + ## Set family, id, dadid and momid to character + to_char <- c("family", "indId", "fatherId", "motherId") + to_char <- colnames(ped_df)[colnames(ped_df) %in% to_char] + ped_df[to_char] <- lapply(ped_df[to_char], as.character) + + ## Normalise the data before creating the object + if (normalize) { + ped_df <- norm_ped(ped_df, missid = missid) + rel_df <- norm_rel(rel_df, missid = missid) + } else { + cols_need <- c("id", "dadid", "momid", "sex") + cols_to_use <- c("steril", "avail", "famid", "status", "affected") + ped_df <- check_columns( + ped_df, cols_need, "", cols_to_use, + others_cols = TRUE, cols_to_use_init = TRUE + ) + cols_need <- c("id1", "id2", "code") + cols_to_use <- c("famid") + rel_df <- check_columns( + rel_df, cols_need, "", cols_to_use, cols_to_use_init = TRUE + ) + } + if (any(!is.na(ped_df$error))) { + warning("The Pedigree informations are not valid. ", + "Here is the normalised Pedigree informations ", + "with the identified problems" + ) + return(ped_df) + } + + if (any(!is.na(rel_df$error))) { + warning("The relationship informations are not valid. ", + "Here is the normalised relationship informations ", + "with the identified problems" + ) + return(rel_df) + } + + ped <- Ped(ped_df) + rel <- Rel(rel_df) + hints <- Hints(hints) + scales <- Scales() + + ## Create the object + ped <- new("Pedigree", + ped = ped, rel = rel, + hints = hints, scales = scales + ) + generate_colors(ped, col_aff = col_aff, ...) +} +) + +#' @export +#' @rdname Pedigree-class +#' @usage NULL +setMethod("Pedigree", "missing", function(obj) { + ped <- new("Pedigree", + ped = Ped(), rel = Rel(), + hints = Hints(), scales = Scales() + ) + ped +}) diff --git a/R/AllGeneric.R b/R/AllGeneric.R new file mode 100644 index 00000000..1d8c3bd1 --- /dev/null +++ b/R/AllGeneric.R @@ -0,0 +1,402 @@ +#### S4 Ped generics #### + +#' @section Generics: +#' - `summary(x)`: Compute the summary of a Ped object +#' @export +#' @importFrom S4Vectors classNameForDisplay +#' @importFrom S4Vectors summary +#' @rdname Ped-class +#' @usage NULL +setMethod("summary", "Ped", + function(object) { + object_class <- classNameForDisplay(object) + object_len <- length(object) + object_mcols <- mcols(object, use.names = FALSE) + object_nmc <- if (is.null(object_mcols)) 0L else ncol(object_mcols) + paste0(object_class, " object with ", object_len, " ", + ifelse(object_len == 1L, "individual", "individuals"), + " and ", object_nmc, " metadata ", + ifelse(object_nmc == 1L, "column", "columns") + ) + } +) + +#' @section Generics: +#' - `show(x)`: Convert the Ped object to a data.frame +#' and print it with its summary. +#' @export +#' @importFrom S4Vectors cbind_mcols_for_display +#' @importFrom methods show +#' @importFrom S4Vectors makeClassinfoRowForCompactPrinting +#' @rdname Ped-class +#' @usage NULL +setMethod("show", "Ped", + function(object) { + cat(summary(object), ":\n", sep = "") + df <- as.data.frame(object) + df <- df[, !colnames(df) %in% colnames(mcols(object))] + out <- S4Vectors::cbind_mcols_for_display(df, object) + class_df <- lapply(df, class) + classinfo <- S4Vectors::makeClassinfoRowForCompactPrinting( + object, class_df + ) + stopifnot(identical(colnames(classinfo), colnames(out))) + out <- rbind(classinfo, out) + print(out, quote = FALSE, right = TRUE) + } +) + +#' @section Generics: +#' - `as.list(x)`: Convert a Ped object to a list with +#' the metadata columns at the end. +#' @rdname Ped-class +#' @importFrom methods slotNames +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Ped", function(x) { + to <- list() + for (slot in slotNames(x)) { + if (slot %in% c("metadata", "elementMetadata")) { + next + } else { + to[[slot]] <- slot(x, slot) + } + } + # Add the metadata in separate slot + c(to, as.list(mcols(x))) +}) + +#' @section Generics: +#' - `as.data.frame(x)`: Convert a Ped object to a data.frame with +#' the metadata columns at the end. +#' @rdname Ped-class +#' @importFrom S4Vectors as.data.frame +#' @export +#' @usage NULL +setMethod("as.data.frame", "Ped", function(x) { + lst <- as.list(x) + if (length(unique(lapply(lst, length))) != 1) { + stop("All slots should have the same length") + } + ped_df <- data.frame(lst) + rownames(ped_df) <- ped_df$id + ped_df +}) + +#' @section Generics: +#' - `subset(x, i, del_parents = FALSE, keep = TRUE)`: Subset a Ped object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `del_parents` : A logical value indicating if the parents +#' of the individuals should be deleted. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Ped-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Ped", function(x, i, del_parents = FALSE, keep = TRUE) { + if (is.factor(i)) { + i <- as.character(i) + } + if (is.character(i)) { + i <- x@id %in% i + } else if (!is.numeric(i) & !is.logical(i)) { + stop("i must be a character, an integer or a logical vector") + } + if (!keep) { + i <- !i + } + col_computed <- c( + "num_child_tot", "num_child_dir", "num_child_ind" + ) + ped_df <- as.data.frame(x)[i, ] + ped_df <- ped_df[, ! colnames(ped_df) %in% col_computed] + + if (del_parents) { + ped_df$dadid[!ped_df$dadid %in% ped_df$id] <- NA_character_ + ped_df$momid[!ped_df$momid %in% ped_df$id] <- NA_character_ + } + new_ped <- Ped(ped_df) + validObject(new_ped) + new_ped +}) + +#### S4 Rel generics #### + +#' @section Generics: +#' - `summary(x)`: Compute the summary of a Rel object +#' @export +#' @importFrom S4Vectors classNameForDisplay +#' @importFrom S4Vectors summary +#' @rdname Rel-class +#' @usage NULL +setMethod("summary", "Rel", + function(object) { + object_class <- classNameForDisplay(object) + object_len <- length(object) + tbl <- table(object@code) + if (length(tbl) == 0L) { + sum_codes <- 0L + } else { + sum_codes <- paste0( + paste(tbl, levels(object@code)), collapse = ", " + ) + } + paste0(object_class, " object with ", object_len, " ", + ifelse(object_len == 1L, "relationship", "relationships"), + ifelse(sum_codes == 0L, "", paste0("with ", sum_codes)) + ) + } +) + +#' @section Generics: +#' - `show(x)`: Convert the Rel object to a data.frame +#' and print it with its summary. +#' @export +#' @importFrom S4Vectors cbind_mcols_for_display +#' @importFrom S4Vectors makeClassinfoRowForCompactPrinting +#' @importFrom methods show +#' @rdname Rel-class +#' @usage NULL +setMethod("show", signature(object = "Rel"), + function(object) { + cat(summary(object), ":\n", sep = "") + df <- as.data.frame(object) + df <- df[, !colnames(df) %in% colnames(mcols(object))] + out <- S4Vectors::cbind_mcols_for_display(df, object) + class_df <- lapply(df, class) + classinfo <- S4Vectors::makeClassinfoRowForCompactPrinting( + object, class_df + ) + stopifnot(identical(colnames(classinfo), colnames(out))) + out <- rbind(classinfo, out) + print(out, quote = FALSE, right = TRUE) + } +) + +#' @section Generics: +#' - `as.list(x)`: Convert a Rel object to a list +#' @rdname Rel-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Rel", function(x) { + to <- list() + for (slot in slotNames(x)) { + if (slot %in% c("metadata", "elementMetadata")) { + next + } else { + to[[slot]] <- slot(x, slot) + } + } + # Add the metadata in separate slot + c(to, as.list(mcols(x))) +}) + +#' @section Generics: +#' - `as.data.frame(x)`: Convert a Rel object to a data.frame +#' @rdname Rel-class +#' @importFrom S4Vectors as.data.frame +#' @export +#' @usage NULL +setMethod("as.data.frame", "Rel", function(x) { + lst <- as.list(x) + if (length(unique(lapply(lst, length))) != 1) { + stop("All slots should have the same length") + } + data.frame(lst) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Rel object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Rel-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Rel", function(x, idlist, keep = TRUE) { + if (is.factor(idlist)) { + idlist <- as.character(idlist) + } + if (! is.character(idlist)) { + stop("idlist must be a character") + } + + if (! keep) { + id_all <- c(id1(x), id2(x)) + idlist <- setdiff(id_all, idlist) + } + + rel_df <- as.data.frame(x) + + id1 <- rel_df$id1 %in% idlist + id2 <- rel_df$id2 %in% idlist + rel_df <- rel_df[id1 & id2, ] + new_rel <- Rel(rel_df) + validObject(new_rel) + new_rel +}) + +#### S4 Hints generics #### + +#' @section Generics: +#' - `as.list(x)`: Convert a Hints object to a list +#' @rdname Hints-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Hints", function(x) { + list(horder = x@horder, spouse = x@spouse) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Hints object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Hints-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Hints", function(x, idlist, keep = TRUE) { + horder <- horder(x) + spouse <- spouse(x) + + if (is.factor(idlist)) { + idlist <- as.character(idlist) + } + if (! is.character(idlist)) { + stop("idlist must be a character") + } + + if (length(horder) > 0) { + if (! keep) { + idlist <- setdiff(names(horder), idlist) + } + horder <- horder[names(horder) %in% idlist] + } + + if (nrow(spouse) > 0) { + if (! keep) { + id_all <- c(spouse$idl, spouse$idr) + idlist <- setdiff(id_all, idlist) + } + spouse <- spouse[spouse$idl %in% idlist & spouse$idr %in% idlist, ] + } + new_hints <- Hints(horder = horder, spouse = spouse) + validObject(new_hints) + new_hints +}) + +#### S4 Scales generics #### + +#' @section Generics: +#' - `as.list(x)`: Convert a Scales object to a list +#' @rdname Scales-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Scales", function(x) { + list(fill = x@fill, border = x@border) +}) + +#### S4 Pedigree generics #### + +#' @section Generics: +#' - `length(x)`: Get the length of a Pedigree object. +#' Wrapper of `length(ped(x))`. +#' @rdname Pedigree-class +#' @export +#' @usage NULL +setMethod("length", c(x = "Pedigree"), + function(x) { + length(ped(x)) + } +) + +#' @section Generics: +#' - `show(x)`: Print the information of the Ped and Rel +#' object inside the Pedigree object. +#' @export +#' @importFrom methods show +#' @rdname Pedigree-class +#' @usage NULL +setMethod("show", signature(object = "Pedigree"), function(object) { + cat("Pedigree object with: \n") + print(ped(object)) + print(rel(object)) +}) + +#' @section Generics: +#' - `summary(x)`: Compute the summary of the Ped and Rel object +#' inside the Pedigree object. +#' @export +#' @rdname Pedigree-class +#' @usage NULL +setMethod("summary", signature(object = "Pedigree"), function(object) { + cat("Pedigree object with \n") + print(summary(ped(object))) + print(summary(rel(object))) +}) + +#' @section Generics: +#' - `as.list(x)`: Convert a Pedigree object to a list +#' @rdname Pedigree-class +#' @importFrom S4Vectors as.list +#' @export +#' @usage NULL +setMethod("as.list", "Pedigree", function(x) { + list( + ped = as.list(ped(x)), + rel = as.list(rel(x)), + scales = as.list(scales(x)), + hints = as.list(hints(x)) + ) +}) + +#' @section Generics: +#' - `subset(x, i, keep = TRUE)`: Subset a Pedigree object +#' based on the individuals identifiers given. +#' - `i` : A vector of individuals identifiers to keep. +#' - `del_parents` : A logical value indicating if the parents +#' of the individuals should be deleted. +#' - `keep` : A logical value indicating if the individuals +#' should be kept or deleted. +#' @rdname Pedigree-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("subset", "Pedigree", + function(x, i, del_parents = FALSE, keep = TRUE) { + new_ped <- subset(ped(x), i, del_parents = del_parents, keep = keep) + all_id <- id(new_ped) + new_rel <- subset(rel(x), all_id) + new_hints <- subset(hints(x), all_id) + + new_pedi <- new("Pedigree", + ped = new_ped, rel = new_rel, + hints = new_hints, scales = scales(x) + ) + validObject(new_pedi) + new_pedi + } +) + +#' @section Generics: +#' - `x[i, del_parents, keep]`: Subset a Pedigree object +#' based on the individuals identifiers given. +#' @rdname Pedigree-class +#' @importFrom S4Vectors subset +#' @export +#' @usage NULL +setMethod("[", c(x = "Pedigree", i = "ANY", j = "missing"), + function(x, i, j, del_parents = FALSE, keep = TRUE, drop = TRUE) { + subset(x, i, del_parents, keep) + } +) diff --git a/R/AllValidity.R b/R/AllValidity.R new file mode 100644 index 00000000..ce6b27fe --- /dev/null +++ b/R/AllValidity.R @@ -0,0 +1,577 @@ +#' Print0 to max +#' +#' Print0 the elements inside a vector until a maximum is reached. +#' +#' @param x A vector. +#' @param max The maximum number of elements to print. +#' @param ... Additional arguments passed to print0 +#' +#' @return The character vector aggregated until the maximum is reached. +#' @keywords internal +paste0max <- function(x, max = 5, sep = "", ...) { + lgt <- min(length(x), max) + ext <- ifelse(length(x) > max, "...", "") + paste( + "'", paste0(x[seq_len(lgt)], collapse = "', '"), + "'", ext, sep = sep, ... + ) +} + +#' Check if the fields are present in an object slot +#' +#' @param obj An object. +#' @param slot A slot of object. +#' @param fields A character vector with the fields to check. +#' +#' @return A character vector with the errors if any. +#' @keywords internal +check_slot_fd <- function(obj, slot = NULL, fields = character()) { + if (is.object(obj)) { + obj <- as.list(obj) + } + if (is.null(slot)) { + array_names <- names(obj) + } else if (is.data.frame(obj[[slot]])) { + array_names <- colnames(obj[[slot]]) + } else if (is.list(obj[[slot]])) { + array_names <- names(obj[[slot]]) + } else { + stop( + "Slot ", slot, " is not a data.frame or a list. ", + class(obj[[slot]]), " found." + ) + } + if (length(array_names) == 0) { + paste0( + "No fields in ", slot, + " slot. See Pedigree documentation." + ) + } else if (any(!fields %in% array_names)) { + paste0( + paste0max(fields[!fields %in% array_names]), + " column(s) is not present in slot ", slot, "." + ) + } +} + +#' Check values in a slot +#' +#' Check if the all the values in a slot are in a vector of values. +#' +#' @param val A vector of values to check. +#' @param ref A vector of reference values. +#' @param name A character vector with the name of the values to check. +#' @param present A logical value indicating if the values should be present +#' or not +#' +#' @return A character vector with the errors if any. +#' @keywords internal +check_values <- function(val, ref, name = NULL, present = TRUE) { + if (length(dim(val)) > 1) { + stop("val must be a vector") + } + + if (present) { + val_abs <- !val %in% ref + should <- " should be in " + } else { + val_abs <- val %in% ref + should <- " should not be in " + } + + val_name <- ifelse(is.null(name), "Values ", paste(name, "values ")) + + if (any(val_abs)) { + paste0( + val_name, paste0max(unique(val[val_abs])), should, + paste0max(unique(ref)) + ) + } +} + +#' Check if a Hints object is valid +#' +#' Check if horder and spouse slots are valid: +#' - horder is named numeric vector +#' - spouse is a data.frame +#' - Has the three `idr`, `idl`, `anchor` columns +#' - `idr` and `idl` are different and doesn't contains `NA` +#' - `idr` and `idl` couple are unique +#' - `anchor` column only have `right`, `left`, `either` values +#' - all ids in spouse needs to be in the names of the horder vector +#' @param object A Hints object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' @keywords internal +#' +is_valid_hints <- function(object) { + errors <- c() + + #### Check that the slots are of the right class #### + if (! is.numeric(object@horder)) { + errors <- c(errors, "horder slot must be numeric") + } + if (length(object@horder) > 0 && is.null(names(object@horder))) { + errors <- c(errors, "horder slot should be named") + } + if (! is.data.frame(object@spouse)) { + errors <- c(errors, "spouse slot must be a data.frame") + } + + #### Check that the horder slot is valid #### + errors <- c(errors, check_values( + object@horder, NA_real_, "horder", present = FALSE + )) + + #### Check that the hints spouse data.frame is valid #### + errors <- c(errors, check_slot_fd( + object, "spouse", c("idl", "idr", "anchor") + )) + + if (!is.factor(object@spouse$anchor)) { + errors <- c(errors, "anchor column must be a factor") + } + + errors <- c(errors, check_values( + object@spouse$anchor, c("left", "right", "either"), "anchor" + )) + errors <- c(errors, check_values( + object@spouse$idl, NA_character_, "idl", present = FALSE + )) + errors <- c(errors, check_values( + object@spouse$idr, NA_character_, "idr", present = FALSE + )) + + if (any(object@spouse$idl == object@spouse$idr, na.rm = TRUE)) { + errors <- c(errors, "idl and idr should be different") + } + + idmin <- pmin(object@spouse$idl, object@spouse$idr, na.rm = TRUE) + idmax <- pmax(object@spouse$idl, object@spouse$idr, na.rm = TRUE) + dup <- anyDuplicated(cbind(idmin, idmax)) + if (dup) { + dup <- paste(idmin[dup], idmax[dup], sep = "_") + errors <- c(errors, paste( + "idl and idr couple should be unique:", + paste(dup, collapse = ", "), + "couples are present more than once in the spouse slot." + )) + } + + ## All idl and idr should be in the names of horder + if (length(object@horder) > 0) { + id <- c(object@spouse$idl, object@spouse$idr) + if (any(!id %in% names(object@horder))) { + errors <- c(errors, paste( + "All idl and idr should be in the names of horder" + )) + } + } else { + if (nrow(spouse(object)) > 0) { + errors <- c(errors, paste( + "horder slot should be non empty if spouse slot is non empty" + )) + } + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Scales object is valid +#' +#' Check if the fill and border slots are valid: +#' - fill slot is a data.frame with "order", "column_values", +#' "column_mods", "mods", "labels", "affected", "fill", +#' "density", "angle" columns. +#' - "affected" is logical. +#' - "density", "angle", "order", "mods" are numeric. +#' - "column_values", "column_mods", "labels", "fill" are +#' character. +#' - border slot is a data.frame with "column_values", +#' "column_mods", "mods", "labels", "border" columns. +#' - "column_values", "column_mods", "labels", "border" are +#' character. +#' - "mods" is numeric. +#' +#' @param object A Scales object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' @keywords internal +#' +#' @export +is_valid_scales <- function(object) { + errors <- c() + + fill_cols <- c( + "order", "column_values", "column_mods", "mods", + "labels", "affected", "fill", "density", "angle" + ) + border_cols <- c( + "column_values", "column_mods", "mods", "labels", "border" + ) + errors <- c(errors, check_slot_fd(object, NULL, c("fill", "border"))) + errors <- c(errors, check_slot_fd(object, "fill", fill_cols)) + errors <- c(errors, check_slot_fd(object, "border", border_cols)) + + #### Check that the fill columns have the right values #### + ## Check for logical columns + col_log <- c("affected") + err_log <- col_log[!unlist(lapply(object@fill[col_log], is.logical))] + if (length(err_log) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_log, collapse = ", "), + " column(s) must be logical", sep = "" + )) + } + + ## Check for numeric columns + col_num <- c("density", "angle", "order", "mods") + err_num <- col_num[!unlist(lapply(object@fill[col_num], is.numeric))] + if (length(err_num) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_num, collapse = ", "), + " column(s) must be numeric", sep = "" + )) + } + + ## Check for character columns + col_char <- c( + "column_values", "column_mods", "labels", "fill" + ) + err_char <- col_char[!unlist(lapply( + object@fill[col_char], is.character + ))] + if (length(err_char) > 0) { + errors <- c(errors, paste("Fill slot ", + paste(err_char, collapse = ", "), + " column(s) must be character", sep = "" + )) + } + + #### Check that the border columns have the right values #### + ## Check for character columns + col_char <- c("column_values", "column_mods", "labels", "border") + err_char <- col_char[!unlist(lapply( + object@border[col_char], is.character + ))] + if (length(err_char) > 0) { + errors <- c(errors, paste("Border slot ", + paste(err_char, collapse = ", "), + " column(s) must be character", sep = "" + )) + } + + ## Check for numeric columns + col_num <- c("mods") + err_num <- col_num[!unlist(lapply( + object@border[col_num], is.numeric + ))] + if (length(err_num) > 0) { + errors <- c(errors, paste("Border slot ", + paste(err_num, collapse = ", "), + " column(s) must be numeric", sep = "" + )) + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Ped object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the ped ids slots have the right values +#' 2. Check that the sex, steril, status, avail and affected slots have the +#' right values +#' 3. Check that dad are male and mom are female +#' 4. Check that individuals have both parents or none +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_ped <- function(object) { + missid <- NA_character_ + errors <- c() + + #### Check that the ped columns have the right values #### + # Check for ped@id uniqueness + if (any(duplicated(object@id))) { + errors <- c(errors, "Id in ped slot must be unique") + } + + # Control values for ids + famid <- unique(object@famid) + errors <- c(errors, check_values( + famid, c(""), "famid", present = FALSE + )) + errors <- c(errors, check_values( + object@id, + c( + missid, "", + paste(as.character(missid), famid, sep = "_"), + paste("", famid, sep = "_") + ), + "id", present = FALSE + )) + errors <- c(errors, check_values( + object@dadid, c(object@id, missid), "dadid" + )) + errors <- c(errors, check_values( + object@momid, c(object@id, missid), "momid" + )) + + # Control values for sex, steril, status, avail and affected + sex_code <- c("male", "female", "unknown", "terminated") + errors <- c(errors, check_values(object@sex, sex_code)) + errors <- c(errors, check_values(object@steril, c(0, 1, NA))) + errors <- c(errors, check_values(object@status, c(0, 1, NA))) + errors <- c(errors, check_values(object@avail, c(0, 1, NA))) + errors <- c(errors, check_values(object@affected, c(0, 1, NA))) + + # Control sex for parents + id <- object@id + momid <- object@momid + dadid <- object@dadid + sex <- object@sex + is_dad <- id %in% dadid + is_mom <- id %in% momid + + if (any(sex[is_dad] != "male")) { + errors <- c(errors, "Some dad are not male") + } + if (any(sex[is_mom] != "female")) { + errors <- c(errors, "Some mom are not female") + } + if (any( + (dadid %in% missid & (! momid %in% missid)) | + ((! dadid %in% missid) & momid %in% missid) + )) { + errors <- c(errors, "Individuals should have both parents or none") + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } + + return(errors) +} + +#' Check if a Rel object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the "id1", "id2", "code", "famid" slots exist +#' 2. Check that the "code" slots have the right values +#' (i.e. "MZ twin", "DZ twin", "UZ twin", "Spouse") +#' 3. Check that all "id1" are different to "id2" +#' 4. Check that all "id1" are smaller than "id2" +#' 5. Check that no duplicate relation are present +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_rel <- function(object) { + errors <- c() + + rel_cols <- c("id1", "id2", "code", "famid") + #### Check that the slots have the right columns #### + errors <- c(errors, check_slot_fd(object, NULL, rel_cols)) + + #### Check that the rel columns have the right values #### + codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") + errors <- c(errors, check_values(object@code, codes)) + + #### Check that id1 is different from id2 #### + if (any(object@id1 == object@id2)) { + id1e <- object@id1[object@id1 == object@id2] + id2e <- object@id2[object@id1 == object@id2] + errors <- c(errors, paste( + "id1 '", paste0(id1e, collapse = "', '"), + "' should be different to id2 '", paste0(id2e, collapse = "', '"), + "'.", sep = "" + )) + } + + #### Check that all id1 is smaller than id2 #### + if (any(object@id1 > object@id2)) { + id1b <- object@id1[object@id1 > object@id2] + id2b <- object@id2[object@id1 > object@id2] + errors <- c(errors, paste( + "id1 '", paste0(id1b, collapse = "', '"), + "' should be smaller than id2 '", paste0(id2b, collapse = "', '"), + "'.", sep = "" + )) + } + + #### Check absence of duplicate #### + idr <- paste(object@id1, object@id2, sep = "_") + if (any(duplicated(idr))) { + idd <- idr[duplicated(idr)] + errors <- c(errors, paste( + "Pairs of individuals should be unique", + " ('", paste0(idd, collapse = "', '"), "').", sep = "" + )) + } + + if (length(errors) == 0) { + TRUE + } else { + errors + } +} + +#' Check if a Pedigree object is valid +#' +#' Multiple checks are done here +#' +#' 1. Check that the all Rel id are in the Ped object +#' 2. Check that twins have same parents +#' 3. Check that MZ twins have same sex +#' 4. Check that all columns used in scales are in the Ped object +#' 5. Check that all fill & border modalities are in the Ped object column +#' 6. Check that all id used in Hints object are in the Ped object +#' 7. Check that all spouse in Hints object are male / female +#' +#' @param object A Ped object. +#' +#' @return A character vector with the errors or `TRUE` if no errors. +#' +#' @keywords internal +is_valid_pedigree <- function(object) { + errors <- c() + + #### Check Rel Id in Ped Id #### + errors <- c(errors, check_values( + object@rel@famid, c(object@ped@famid, NA), "Rel famid" + )) + errors <- c(errors, check_values(object@rel@id1, object@ped@id, "Rel id1")) + errors <- c(errors, check_values(object@rel@id2, object@ped@id, "Rel id2")) + + #### Check if twins has same parents #### + code <- object@rel@code + ncode <- as.numeric(code) + id1 <- object@rel@id1 + id2 <- object@rel@id2 + id <- object@ped@id + momid <- object@ped@momid + dadid <- object@ped@dadid + sex <- object@ped@sex + temp1 <- match(id1, id, nomatch = 0) + temp2 <- match(id2, id, nomatch = 0) + if (any(ncode < 4)) { + twins <- (ncode < 4) + if (any(momid[temp1[twins]] != momid[temp2[twins]])) { + errors <- c(errors, "twins found with different mothers") + } + if (any(dadid[temp1[twins]] != dadid[temp2[twins]])) { + errors <- c(errors, "twins found with different fathers") + } + } + + #### Check if the monozygote twins has same gender #### + if (any(ncode == 1)) { + mztwins <- (ncode == 1) + if (any(sex[temp1[mztwins]] != sex[temp2[mztwins]])) { + errors <- c(errors, "MZ twins with different genders") + } + } + + #### Check that the scales columns have the right values #### + ped <- as.data.frame(ped(object)) + errors <- c(errors, check_values( + fill(object)$column_values, colnames(ped), + "fill column_values" + )) + errors <- c(errors, check_values( + fill(object)$column_mods, colnames(ped), + "fill column_mods" + )) + errors <- c(errors, check_values( + border(object)$column, colnames(ped), + "border column" + )) + + #### Check that all fill modalities are present in the pedigree data #### + for (col in unique(fill(object)$column)){ + errors <- c(errors, check_values( + ped[[col]], + fill(object)[fill(object)$column_mods == col, "mods"], + paste("fill column", col) + )) + } + #### Check that all borders modalities are present in the pedigree data #### + for (col in unique(border(object)$column)){ + errors <- c(errors, check_values( + ped[[col]], + border(object)[border(object)$column == col, "mods"], + paste("border column", col) + )) + } + + #### Check that the hints are valid #### + if (length(horder(object)) > 0 && + length(horder(object)) != length(object) + ) { + errors <- c(errors, + "Length for horder component should be equal to Pedigree length" + ) + } + idh <- names(horder(object)) + idh_abs <- idh[!idh %in% id(ped(object))] + if (length(idh_abs) > 0) { + errors <- c(errors, + paste( + "Hints horder id", + paste(idh_abs, sep = ","), + "not present in the Ped object" + ) + ) + } + + idl <- spouse(object)$idl + idr <- spouse(object)$idr + + ## Check for presence of spouses in Ped object + ids_abs <- c(idl, idr)[!c(idl, idr) %in% id(ped(object))] + if (length(ids_abs) > 0) { + errors <- c(errors, paste( + "Hints spouse(s)", + paste(ids_abs, sep = ","), + "not present in the Ped object" + )) + } + + ## Check for sex of spouses + idls <- sex(ped(object))[match(idl, id(ped(object)))] + idlr <- sex(ped(object))[match(idr, id(ped(object)))] + sps <- paste(idls, idlr, sep = "_") + sps <- sps %in% c("female_male", "male_female") + if (any(!sps)) { + errors <- c(errors, paste( + "Hints spouse(s)", + paste(paste(idl[!sps], idr[!sps], sep = "_"), sep = ","), + "not female, male" + )) + } + + #### Errors #### + if (length(errors) == 0) { + TRUE + } else { + errors + } +} \ No newline at end of file diff --git a/R/Pedixplorer-package.R b/R/Pedixplorer-package.R index 3326ede4..2931422a 100644 --- a/R/Pedixplorer-package.R +++ b/R/Pedixplorer-package.R @@ -11,7 +11,8 @@ #' additional functionality and bug fixes. #' #' The package download, NEWS, and README are available on CRAN: -#' \\url{https://cran.r-project.org/package=kinship2} +#' \\url{https://cran.r-project.org/package=kinship2} for the +#' previous version of the package. #' #' @section Functions: #' Below are listed some of the most widely used functions available @@ -24,12 +25,14 @@ #' probability having an allele sampled from two individuals #' be the same via IBD. #' -#' [ped_to_plotdf()] : Method to transform a Pedigree -#' object into a dataframe of graphical elements. -#' Allows extra information to be included in the id under the plot symbol -#' -#' [plot_fromdf()] : Method to plot a Pedigree from a -#' dataframe of graphical elements. +#' [Pedixplorer::plot()] : Method to transform a Pedigree +#' object into a graphical plot. +#' Allows extra information to be included in the id under the +#' plot symbol. +#' This method use the [plot_fromdf()] function to transform the Pedigree +#' object into a data frame of graphical elements, the same is done for the +#' legend with the [ped_to_legdf()] function. +#' When done, the data frames are plotted with the [plot_fromdf()] function. #' #' [shrink()]: Shrink a Pedigree to a specific bit size, #' removing non-informative members first. @@ -49,5 +52,4 @@ #' #' @docType package #' @rdname Pedixplorer_package -#' @keywords internal "_PACKAGE" diff --git a/R/align.R b/R/align.R index 6966b58f..88a1f8b5 100644 --- a/R/align.R +++ b/R/align.R @@ -1,11 +1,9 @@ -## Automatically generated from all.nw using noweb - -#' Routine function to get ancestors of a subject +#' Ancestors indexes of a subject #' #' @description Given the index of one or multiple individual(s), this #' function iterate through the mom and dad indexes to #' list out all the ancestors of the said individual(s). -#' This function is use in the `align()` function to +#' This function is use in the [align()] function to #' identify which spouse pairs has a common ancestor and #' therefore if they need to be connected with a double line #' (i.e. inbred). @@ -14,8 +12,13 @@ #' @param dadx Indexes of the fathers #' @param momx Indexes of the mothers #' +#' @examples +#' ancestors(c(1), c(3, 4, 5, 6), c(7, 8, 9, 10)) +#' ancestors(c(1, 2), c(3, 4, 5, 6), c(7, 8, 9, 10)) #' @return A vector of ancestor indexes +#' @keywords internal #' @seealso [align()] +#' @export ancestors <- function(idx, momx, dadx) { alist <- idx repeat { @@ -26,11 +29,11 @@ ancestors <- function(idx, momx, dadx) { } alist <- newlist } - alist[alist != idx] + alist[!alist %in% idx] } -#' Generate plotting information for a Pedigree +#' Align a Pedigree object #' #' @description #' Given a Pedigree, this function creates helper matrices that describe the @@ -38,61 +41,55 @@ ancestors <- function(idx, momx, dadx) { #' #' @details #' This is an internal routine, used almost exclusively by -#' `ped_to_plotdf()`. The subservient functions `auto_hint()`, -#' `alignped1()`, `alignped2()`, -#' `alignped3()`, and `alignped4()` +#' [ped_to_plotdf()]. +#' +#' The subservient functions [auto_hint()], +#' [alignped1()], [alignped2()], +#' [alignped3()], and [alignped4()] #' contain the bulk of the computation. -#' If the **hints** are missing the `auto_hint()` routine is called to +#' +#' If the **hints** are missing the [auto_hint()] routine is called to #' supply an initial guess. -#' If multiple families are present in the Pedigree, this routine is called -#' once for each family, and the results are combined in the list returned. -#' For more information you can read the associated vignette:align -#' `vignette("alignement_details")`. #' -#' @param ped A Pedigree object -#' @param packed Should the Pedigree be compressed, i.e., allow diagonal -#' lines connecting parents to children in order to have a smaller overall -#' width for the plot. -#' @param width for a packed output, the minimum width of the plot, in +#' If multiple families are present in the **obj** Pedigree, this routine +#' is called once for each family, and the results are combined in the +#' list returned. +#' +#' For more information you can read the associated vignette: +#' `vignette("pedigree_alignment")`. +#' +#' @param obj A Pedigree object +#' @param packed Should the Pedigree be compressed. +#' (i.e. allow diagonal lines connecting parents to children in order +#' to have a smaller overall width for the plot.) +#' @param width For a packed output, the minimum width of the plot, in #' inches. -#' @param align for a packed Pedigree, align children under parents `TRUE`, +#' @param align For a packed Pedigree, align children under parents `TRUE`, #' to the extent possible given the page width, or align to to the left #' margin `FALSE`. #' This argument can be a two element vector, giving the alignment #' parameters, or a logical value. -#' If `TRUE`, the default is `c(1.5, 2)`, or numeric the routine +#' If `TRUE`, the default is `c(1.5, 2)`, or if numeric the routine #' `alignped4()` will be called. -#' @param hints Plotting hints for the Pedigree. -#' This is a list with components `order` and `spouse`, the second one -#' is optional. -#' - **order** is a numeric vector with one element per subject in the -#' Pedigree. It determines the relative order of subjects within a sibship, as -#' well as the relative order of processing for the founder couples. (For this -#' latter, the female founders are ordered as though they were sisters). -#' - **spouse** is a matrix with one row per hinted marriage, usually -#' only a few marriages in a pedigree will need an added hint, for instance -#' reverse the plot order of a husband/wife pair. Each row contains the -#' index of the left spouse, the right hand spouse, and the anchor -#' (i.e : `1` = left, `2` = right, `0` = either). -#' Children will preferentially appear under the parents of the anchored -#' spouse. -#' @inheritParams is_parent +#' @param hints A Hints object or a named list containing `horder` and +#' `spouse`. If `NULL` then the Hints stored in **obj** will be used. +#' @inheritParams Ped #' #' @return A list with components -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n`: A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid`: A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos`: A matrix giving the horizontal position of each plot point +#' - `fam`: A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values +#' - `spouse`: A matrix with values #' - `0` = not a spouse #' - `1` = subject plotted to the immediate right is a spouse #' - `2` = subject plotted to the immediate right is an inbred spouse -#' - twins Optional matrix which will only be present if the Pedigree +#' - `twins`: Optional matrix which will only be present if the Pedigree #' contains twins : #' - `0` = not a twin #' - `1` = sibling to the right is a monozygotic twin @@ -112,143 +109,169 @@ ancestors <- function(idx, momx, dadx) { #' @export #' @include auto_hint.R #' @include kindepth.R -#' @include check_hints.R -#' @include pedigreeClass.R +#' @include AllClass.R #' @include alignped1.R #' @include alignped2.R #' @include alignped3.R #' @include alignped4.R -align <- function(ped, packed = TRUE, width = 10, - align = TRUE, hints = ped$hints, missid = "0" -) { - famlist <- unique(ped(ped)$family) - if (length(famlist) > 1) { - nfam <- length(famlist) - alignment <- vector("list", nfam) - for (i_fam in famlist) { - ped_fam <- ped[ped(ped)$family == i_fam] - alignment[[i_fam]] <- align(ped_fam, packed, width, align) +#' @rdname align +#' @usage NULL +setGeneric("align", signature = "obj", + function(obj, ...) standardGeneric("align") +) + +#' @rdname align +#' @docType methods +setMethod("align", "Pedigree", + function( + obj, packed = TRUE, width = 10, + align = TRUE, hints = NULL, missid = "NA_character_" + ) { + famlist <- unique(famid(obj)) + if (length(famlist) > 1) { + nfam <- length(famlist) + alignment <- vector("list", nfam) + for (i_fam in famlist) { + ped_fam <- obj[famid(obj) == i_fam] + alignment[[i_fam]] <- align(ped_fam, packed, width, align) + } + return(alignment) } - return(alignment) - } - if (is.null(hints$order)) { - hints <- try({ - auto_hint(ped) - }, silent = TRUE) - ## sometimes appears dim(ped) is empty (ped is NULL), so try fix here: - ## (JPS 6/6/17 - if ("try-error" %in% class(hints)) { - hints <- list(order = seq_len(max(1, dim(ped)))) + if (is.null(hints)) { + hints <- hints(obj) } - } else { - check_hints(hints, ped(ped)$sex) - } - ## Doc: Setup-align - n <- length(ped(ped)$id) - - level <- 1 + kindepth(ped, align = TRUE) - horder <- hints$order # relative order of siblings within a family + if (!is(hints, "Hints")) { + if (!is.list(hints)) { + stop("hints argument should be a Hints object or a list") + } + hints <- Hints(hints) + } + if (length(horder(hints)) == 0) { + hints <- try({ + auto_hint(obj) + }, silent = TRUE) + if ("try-error" %in% class(hints)) { + hints <- Hints(horder = setNames( + seq_len(length(ped(obj))), id(ped(obj)) + )) + } + } + ## Doc: Setup-align + n <- length(obj) - if (!is.null(hints$spouse)) { - # start with the hints list - tsex <- ped(ped)$sex[hints$spouse[, 1]] # sex of the left member - spouselist <- cbind(0, 0, 1 + (tsex != "male"), hints$spouse[, 3]) - spouselist[, 1] <- ifelse(tsex == "male", hints$spouse[, 1], - hints$spouse[, 2] - ) - spouselist[, 2] <- ifelse(tsex == "male", hints$spouse[, 2], - hints$spouse[, 1] - ) - } else { - spouselist <- matrix(0L, nrow = 0, ncol = 4) - } + level <- 1 + kindepth(obj, align_parents = TRUE) + ## relative order of siblings within a family + horder <- horder(hints) - if (nrow(rel(ped)) > 0 && any(rel(ped)$code == "Spouse")) { - # Add spouses from the relationship matrix - trel <- rel(ped)[ - rel(ped)$code == "Spouse", c("id1", "id2"), drop = FALSE - ] - trel$id1 <- match(trel$id1, ped(ped)$id) - trel$id2 <- match(trel$id2, ped(ped)$id) - tsex <- ped(ped)$sex[trel[, 1]] - trel[tsex != "male", seq_len(2)] <- trel[tsex != "male", 2:1] - spouselist <- rbind(spouselist, cbind(trel[, 1], trel[, 2], 0, 0)) - } - dad <- match(ped(ped)$dadid, ped(ped)$id, nomatch = 0) - mom <- match(ped(ped)$momid, ped(ped)$id, nomatch = 0) - is_child <- dad > 0 & mom > 0 - if (any(is_child)) { - # add parents - who <- which(is_child) - spouselist <- rbind(spouselist, cbind(dad[who], mom[who], 0, 0)) - } - hash <- spouselist[, 1] * n + spouselist[, 2] - spouselist <- spouselist[!duplicated(hash), , drop = FALSE] - ## Doc: Founders -align - noparents <- (dad[spouselist[, 1]] == 0 & dad[spouselist[, 2]] == 0) - ## Take duplicated mothers and fathers, then founder mothers - dupmom <- spouselist[noparents, 2][duplicated(spouselist[noparents, 2])] - ## Founding mothers with multiple marriages - dupdad <- spouselist[noparents, 1][duplicated(spouselist[noparents, 1])] - ## Founding fathers with multiple marriages - foundmom <- spouselist[ - noparents & !(spouselist[, 1] %in% c(dupmom, dupdad)), 2 - ] # founding mothers - founders <- unique(c(dupmom, dupdad, foundmom)) - # use the hints to order them - founders <- founders[order(horder[founders])] - rval <- alignped1(founders[1], dad, mom, level, horder, packed, spouselist) - if (length(founders) > 1) { - spouselist <- rval$spouselist - for (i in 2:length(founders)) { - rval2 <- alignped1(founders[i], dad, mom, level, horder, packed, - spouselist + if (nrow(spouse(hints)) > 0) { + # start with the hints list + idxl <- match(spouse(hints)$idl, id(ped(obj))) + idxr <- match(spouse(hints)$idr, id(ped(obj))) + tsex <- sex(ped(obj))[idxl] # sex of the left member + spouselist <- cbind( + 0, 0, 1 + (tsex != "male"), as.numeric(spouse(hints)$anchor) ) - spouselist <- rval2$spouselist - rval <- alignped3(rval, rval2, packed) + spouselist[, 1] <- ifelse(tsex == "male", idxl, idxr) + spouselist[, 2] <- ifelse(tsex == "male", idxr, idxl) + } else { + spouselist <- matrix(0L, nrow = 0, ncol = 4) + } + if (any(code(rel(obj)) == "Spouse")) { + # Add spouses from the relationship matrix + trel <- as.data.frame(rel(obj)) + trel <- trel[trel$code == "Spouse", c("id1", "id2")] + trel$id1 <- match(trel$id1, id(ped(obj))) + trel$id2 <- match(trel$id2, id(ped(obj))) + tsex <- sex(ped(obj))[trel$id1] + trel[tsex != "male", seq_len(2)] <- trel[tsex != "male", 2:1] + spouselist <- rbind(spouselist, cbind(trel[, 1], trel[, 2], 0, 0)) } - } - ## Doc: finish-align (1) Unhash out the spouse and nid arrays - nid <- matrix(as.integer(floor(rval$nid)), nrow = nrow(rval$nid)) - spouse <- 1L * (rval$nid != nid) - maxdepth <- nrow(nid) - for (i in (seq_along(spouse))[spouse > 0]) { - a1 <- ancestors(nid[i], mom, dad) - # matrices are in column order - a2 <- ancestors(nid[i + maxdepth], mom, dad) - if (any(duplicated(c(a1, a2)))) { - spouse[i] <- 2 + dad <- match(dadid(ped(obj)), id(ped(obj)), nomatch = 0) + mom <- match(momid(ped(obj)), id(ped(obj)), nomatch = 0) + is_child <- dad > 0 & mom > 0 + if (any(is_child)) { + # add parents + who <- which(is_child) + spouselist <- rbind(spouselist, cbind(dad[who], mom[who], 0, 0)) } - } - ## Doc: finish align(2) - if (nrow(rel(ped)) > 0 && any(rel(ped)$code != "Spouse")) { - twins <- 0 * nid - who <- (rel(ped)$code != "Spouse") - ltwin <- match(rel(ped)[who, "id1"], ped(ped)$id, nomatch = 0) - rtwin <- match(rel(ped)[who, "id2"], ped(ped)$id, nomatch = 0) - ttype <- rel(ped)[who, "code"] - # find where each of them is plotted (any twin only appears once with a - # family id, i.e., under their parents) - # matrix of connected-to-parent ids - ntemp <- ifelse(rval$fam > 0, nid, 0) - ltemp <- (seq_along(ntemp))[match(ltwin, ntemp, nomatch = 0)] - rtemp <- (seq_along(ntemp))[match(rtwin, ntemp, nomatch = 0)] - twins[pmin(ltemp, rtemp)] <- ttype - } else { - twins <- NULL - } - ## Doc: finish align(3) - if ((is.numeric(align) || align) && max(level) > 1) { - pos <- alignped4(rval, spouse > 0, level, width, align) - } else { - pos <- rval$pos - } - if (is.null(twins)) { - list(n = rval$n, nid = nid, pos = pos, fam = rval$fam, spouse = spouse) - } else { - list(n = rval$n, nid = nid, pos = pos, fam = rval$fam, spouse = spouse, - twins = twins + hash <- spouselist[, 1] * n + spouselist[, 2] + spouselist <- spouselist[!duplicated(hash), , drop = FALSE] + ## Doc: Founders -align + noparents <- (dad[spouselist[, 1]] == 0 & dad[spouselist[, 2]] == 0) + ## Take duplicated mothers and fathers, then founder mothers + dupmom <- spouselist[noparents, 2][duplicated(spouselist[noparents, 2])] + ## Founding mothers with multiple marriages + dupdad <- spouselist[noparents, 1][duplicated(spouselist[noparents, 1])] + ## Founding fathers with multiple marriages + foundmom <- spouselist[ + noparents & !(spouselist[, 1] %in% c(dupmom, dupdad)), 2 + ] # founding mothers + founders <- unique(c(dupmom, dupdad, foundmom)) + # use the hints to order them + founders <- founders[order(horder[founders])] + rval <- alignped1( + founders[1], dad, mom, level, horder, packed, spouselist ) + if (length(founders) > 1) { + spouselist <- rval$spouselist + for (i in 2:length(founders)) { + rval2 <- alignped1(founders[i], dad, mom, level, horder, packed, + spouselist + ) + spouselist <- rval2$spouselist + rval <- alignped3(rval, rval2, packed) + } + } + ## Doc: finish-align (1) Unhash out the spouse and nid arrays + nid <- matrix(as.integer(floor(rval$nid)), nrow = nrow(rval$nid)) + spouse <- 1L * (rval$nid != nid) + maxdepth <- nrow(nid) + + for (i in (seq_along(spouse))[spouse > 0]) { + a1 <- ancestors(nid[i], mom, dad) + # matrices are in column order + a2 <- ancestors(nid[i + maxdepth], mom, dad) + if (any(duplicated(c(a1, a2)))) { + spouse[i] <- 2 + } + } + + ## Doc: finish align(2) + if (length(rel(obj)) > 0 && any(code(rel(obj)) != "Spouse")) { + twins <- 0 * nid + who <- (code(rel(obj)) != "Spouse") + ltwin <- match(id1(rel(obj))[who], id(ped(obj)), nomatch = 0) + rtwin <- match(id2(rel(obj))[who], id(ped(obj)), nomatch = 0) + ttype <- code(rel(obj))[who] + ## find where each of them is plotted + ## (any twin only appears once with a + ## family id, i.e., under their parents) + ## matrix of connected-to-parent ids + ntemp <- ifelse(rval$fam > 0, nid, 0) + ltemp <- (seq_along(ntemp))[match(ltwin, ntemp, nomatch = 0)] + rtemp <- (seq_along(ntemp))[match(rtwin, ntemp, nomatch = 0)] + twins[pmin(ltemp, rtemp)] <- ttype + } else { + twins <- NULL + } + ## Doc: finish align(3) + if ((is.numeric(align) || align) && max(level) > 1) { + pos <- alignped4(rval, spouse > 0, level, width, align) + } else { + pos <- rval$pos + } + if (is.null(twins)) { + list( + n = rval$n, nid = nid, pos = pos, + fam = rval$fam, spouse = spouse + ) + } else { + list( + n = rval$n, nid = nid, pos = pos, + fam = rval$fam, spouse = spouse, + twins = twins + ) + } } -} +) \ No newline at end of file diff --git a/R/alignped1.R b/R/alignped1.R index e9d6511c..fe50703d 100644 --- a/R/alignped1.R +++ b/R/alignped1.R @@ -1,24 +1,30 @@ # Automatically generated from all.nw using noweb -#' First routine alignement +#' Alignment first routine #' #' @description -#' First alignement routine which create the subtree founded on a single +#' First alignment routine which create the subtree founded on a single #' subject as though it were the only tree. #' #' @details -#' 1. In this routine the **nid** array consists of the final -#' `nid array + 1/2` of the final spouse array. -#' Note that the **spouselist** matrix will only contain spouse pairs -#' that are not yet processed. The logic for anchoring is slightly tricky. -#' First, if row 4 of the spouselist matrix is 0, we anchor at the first +#' In this routine the **nid** array consists of the final +#' `nid array + 1/2` of the final spouse array. +#' Note that the **spouselist** matrix will only contain spouse pairs +#' that are not yet processed. The logic for anchoring is slightly tricky. +#' +#' ## 1. Anchoring: +#' First, if col 4 of the spouselist matrix is 0, we anchor at the first #' opportunity. Also note that if `spouselist[, 3] == spouselist[, 4]` #' it is the husband who is the anchor (just write out the possibilities). -#' 2. Create the set of 3 return structures, which will be matrices +#' +#' ## 2. Return values initialization: +#' Create the set of 3 return structures, which will be matrices #' with `1 + nspouse` columns. #' If there are children then other routines will widen the result. -#' 3. Create the two complimentary lists **lspouse** and **rspouse** -#' to denote those plotted on the left and on the right. +#' +#' ## 3. Create **lspouse** and **rspouse**: +#' This two complimentary lists denote the spouses plotted on the left +#' and on the right. #' For someone with lots of spouses we try to split them evenly. #' If the number of spouses is odd, then men should have more on #' the right than on the left, women more on the right. @@ -31,11 +37,15 @@ #' `length(rspouse) > 1`. This caused `nleft > length(indx)`. #' A fix was to not let **indx** to be indexed beyond its length, #' fix by JPS 5/2013. -#' 4. For each spouse get the list of children. If there are any we +#' +#' ## 4. List the children: +#' For each spouse get the list of children. If there are any we #' call [alignped2()] to generate their tree and #' then mark the connection to their parent. #' If multiple marriages have children we need to join the trees. -#' 5. To finish up we need to splice together the tree made up from +#' +#' ## 5. Splice the tree: +#' To finish up we need to splice together the tree made up from #' all the kids, which only has data from `lev + 1` down, with the data here. #' There are 3 cases: #' @@ -45,31 +55,28 @@ #' 3. The tree below is narrower, for instance an only child. #' #' @param level Vector of the level of each subject -#' @param horder Vector of the horizontal order of each subject -#' @param spouselist Matrix of the spouses with one row per hinted marriage, -#' usually only a few marriages in a pedigree will need an added hint, for -#' instance reverse the plot order of a husband/wife pair. -#' Each row contains the index of the left spouse, the right hand spouse -#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). -#' @inheritParams align +#' @param spouselist Matrix of spouses with 4 columns: +#' - `1`: husband index +#' - `2`: wife index +#' - `3`: husband anchor +#' - `4`: wife anchor #' @inheritParams ancestors +#' @inheritParams Hints +#' @inheritParams align #' #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) @@ -77,7 +84,7 @@ #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped1 <- function(idx, dadx, momx, level, horder, packed, spouselist) { # Set a few constants maxlev <- max(level) diff --git a/R/alignped2.R b/R/alignped2.R index 5ae238db..afb45533 100644 --- a/R/alignped2.R +++ b/R/alignped2.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Second routine alignement +#' Alignment second routine #' #' @description #' Second of the four co-routines which takes a collection of siblings, @@ -16,7 +16,7 @@ #' When the first sib is processed by `alignped1` then both partners #' (and any children) will be added to the rval structure below. #' When the second sib is processed they will come back as a 1 element tree -#' (the marriage will no longer be on the spouselist), which should be added +#' (the marriage will no longer be on the **spouselist**), which should be added #' onto rval. The rule thus is to not add any 1 element tree whose value #' (which must be `idx[i]` is already in the rval structure for this level. #' @@ -26,27 +26,24 @@ #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) #' ped <- Pedigree(sampleped) #' align(ped) #' -#' @seealso [align()], [alignped1()] -#' @export +#' @seealso [align()] +#' @keywords internal, alignment alignped2 <- function(idx, dadx, momx, level, horder, packed, spouselist) { idx <- idx[order(horder[idx])] # Use the hints to order the sibs rval <- alignped1(idx[1], dadx, momx, level, horder, packed, spouselist) diff --git a/R/alignped3.R b/R/alignped3.R index 153c38bb..31b0f0b4 100644 --- a/R/alignped3.R +++ b/R/alignped3.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Third routine alignement +#' Alignment third routine #' #' @description #' Third of the four co-routines to merges two pedigree trees which @@ -12,9 +12,9 @@ #' need not plot two copies of the same person side by side. #' (When initializing the output structures do not worry about this, #' there is no harm if they are a column bigger than finally needed.) -#' Beyond that the work is simple bookkeeping. +#' Beyond that the work is simple book keeping. #' -#' ## Slide +#' ## 1. Slide: #' #' For the unpacked case, which is the traditional way to draw #' a Pedigree when we can assume the paper is infinitely wide, all parents are @@ -22,31 +22,28 @@ #' merged as solid blocks. On input they both have a left margin of 0. #' Compute how far over we have to slide the right tree. #' -#' ## Merge +#' ## 2. Merge: #' #' Now merge the two trees. Start at the top level and work down. #' -#' @param alt1 Alignement of the first tree -#' @param alt2 Alignement of the second tree +#' @param alt1 Alignment of the first tree +#' @param alt2 Alignment of the second tree #' @param space Space between two subjects #' @inheritParams align #' #' @return A list containing the elements to plot the Pedigree. #' It contains a set of matrices along with the spouselist matrix. #' The latter has marriages removed as they are processed. -#' - n A vector giving the number of subjects on each horizonal level of the +#' - `n` : A vector giving the number of subjects on each horizonal level of the #' plot -#' - nid A matrix with one row for each level, giving the numeric id of +#' - `nid` : A matrix with one row for each level, giving the numeric id of #' each subject plotted. #' (A value of `17` means the 17th subject in the Pedigree). -#' - pos A matrix giving the horizontal position of each plot point -#' - fam A matrix giving the family id of each plot point. +#' - `pos` : A matrix giving the horizontal position of each plot point +#' - `fam` : A matrix giving the family id of each plot point. #' A value of `3` would mean that the two subjects in positions 3 and 4, #' in the row above, are this subject's parents. -#' - spouse A matrix with values -#' - `0` = not a spouse -#' - `1` = subject plotted to the immediate right is a spouse -#' - `2` = subject plotted to the immediate right is an inbred spouse +#' - `spouselist` : Spouse matrix with anchors informations #' #' @examples #' data(sampleped) @@ -54,7 +51,7 @@ #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped3 <- function(alt1, alt2, packed, space = 1) { maxcol <- max(alt1$n + alt2$n) maxlev <- length(alt1$n) diff --git a/R/alignped4.R b/R/alignped4.R index 52e6098f..5b6e1166 100644 --- a/R/alignped4.R +++ b/R/alignped4.R @@ -2,7 +2,7 @@ #' @importFrom quadprog solve.QP NULL -#' Fourth and last routine alignement +#' Alignment fourth routine #' #' @description #' Last routines which attempts to line up children under parents and put @@ -23,13 +23,13 @@ NULL #' For each set of siblings `x` with parents at `p_1` and `p_2` #' the alignment penalty is : #' -#' \eqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} +#' \deqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} #' #' where `k` is the number of siblings in the set. #' -#' Using the fact that when `a = 1` : +#'Using the fact that when `a = 1` : #' -#' \eqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} +#' \deqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} #' #' then moving a sibship with `k` sibs one unit to the left or #' right of optimal will incur the same cost as moving one with only 1 or @@ -77,7 +77,7 @@ NULL #' align(ped) #' #' @seealso [align()] -#' @export +#' @keywords internal, alignment alignped4 <- function(rval, spouse, level, width, align) { ## Doc: alignped4 -part1, spacing across page if (is.logical(align)) diff --git a/R/auto_hint.R b/R/auto_hint.R index e89240d1..cd144066 100644 --- a/R/auto_hint.R +++ b/R/auto_hint.R @@ -1,6 +1,6 @@ # Automatically generated from all.nw using noweb -#' Routine to shift set of siblings to the left or right +#' Shift set of siblings to the left or right #' #' @details This routine is used by `auto_hint()`. #' It shifts a set of siblings to the left or right, so that the @@ -19,7 +19,7 @@ #' #' @return The updated hint vector #' @seealso [auto_hint()] -#' @keywords internal +#' @keywords internal, auto_hint shift <- function(id, sibs, goleft, hint, twinrel, twinset) { if (twinset[id] > 0) { # enough to avoid overlap @@ -74,31 +74,21 @@ shift <- function(id, sibs, goleft, hint, twinrel, twinset) { hint } -#' Routine to find the spouse of a subject +#' Find the spouse of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the spouse of a subject. #' #' @param idpos The position of the subject #' @param plist The alignment structure representing the Pedigree layout. -#' For the differents matrices present in the list, each row represents a -#' level of the Pedigree and each column a potential subject. -#' It contains the following components: -#' - n Vector of the number of subjects per level -#' - nid Matrix of the subjects indexes -#' - pos Matrix of the subjects positions -#' - fam Matrix of the siblings family identifiers -#' - spouse Matrix of the left spouses -#' - `0` = not spouse -#' - `1` = spouse -#' - `2` = inbred spouse. +#' See [align()] for details. #' @param lev The generation level of the subject #' @inheritParams align #' #' @return The position of the spouse #' @seealso [auto_hint()] -#' @keywords internal -findspouse <- function(idpos, plist, lev, ped) { +#' @keywords internal, auto_hint +findspouse <- function(idpos, plist, lev, obj) { lpos <- idpos while (lpos > 1 && plist$spouse[lev, lpos - 1]) { lpos <- lpos - 1 @@ -111,8 +101,8 @@ findspouse <- function(idpos, plist, lev, ped) { stop("auto_hint bug 3") } - opposite <- ped(ped)$sex[plist$nid[lev, lpos:rpos]] != - ped(ped)$sex[plist$nid[lev, idpos]] + opposite <- sex(ped(obj))[plist$nid[lev, lpos:rpos]] != + sex(ped(obj))[plist$nid[lev, idpos]] ## Can happen with a triple marriage if (!any(opposite)) { @@ -122,7 +112,7 @@ findspouse <- function(idpos, plist, lev, ped) { spouse } -#' Routine to find the siblings of a subject +#' Find the siblings of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the siblings of a subject. @@ -131,7 +121,7 @@ findspouse <- function(idpos, plist, lev, ped) { #' #' @return The positions of the siblings #' @seealso [auto_hint()] -#' @keywords internal +#' @keywords internal, auto_hint findsibs <- function(idpos, plist, lev) { family <- plist$fam[lev, idpos] if (family == 0) { @@ -140,20 +130,20 @@ findsibs <- function(idpos, plist, lev) { which(plist$fam[lev, ] == family) } -#' Routine to find the duplicate pairs of a subject +#' Find the duplicate pairs of a subject #' #' @details This routine is used by `auto_hint()`. #' It finds the duplicate pairs of a subject and returns them in #' the order they should be plotted. #' #' @param idlist List of individuals identifiers to be considered -#' @inheritParams align #' @inheritParams findspouse +#' @inheritParams align #' #' @return A matrix of duplicate pairs #' @seealso [auto_hint()] -#' @keywords internal -duporder <- function(idlist, plist, lev, ped) { +#' @keywords internal, auto_hint +duporder <- function(idlist, plist, lev, obj) { temp <- table(idlist) if (all(temp == 1)) { return(matrix(0L, nrow = 0, ncol = 3)) @@ -183,7 +173,7 @@ duporder <- function(idlist, plist, lev, ped) { if (plist$fam[lev, dmat[i, 1]] > 0) { sib1 <- max(findsibs(dmat[i, 1], plist, lev)) } else { - spouse <- findspouse(dmat[i, 1], plist, lev, ped) + spouse <- findspouse(dmat[i, 1], plist, lev, obj) ## If spouse is marry-in then move on without looking ## for sibs if (plist$fam[lev, spouse] == 0) { @@ -196,7 +186,7 @@ duporder <- function(idlist, plist, lev, ped) { if (plist$fam[lev, dmat[i, 2]] > 0) { sib2 <- min(findsibs(dmat[i, 2], plist, lev)) } else { - spouse <- findspouse(dmat[i, 2], plist, lev, ped) + spouse <- findspouse(dmat[i, 2], plist, lev, obj) ## If spouse is marry-in then move on without looking ## for sibs if (plist$fam[lev, spouse] == 0) { @@ -210,38 +200,38 @@ duporder <- function(idlist, plist, lev, ped) { dmat[order(famtouch, dmat[, 1] - dmat[, 2]), , drop = FALSE] } -#' Routine to get twin relationships +#' Get twin relationships #' #' @details This routine function determine the twin relationships -#' in a Pedigree. It complete the missing twin relationships for -#' triplets, quads, etc. It also determine the order of the twins +#' in a Pedigree. It determine the order of the twins #' in the Pedigree. #' It is used by `auto_hint()`. #' #' @inheritParams align -#' @keywords internal +#' +#' @keywords internal, auto_hint #' @return A list containing components #' 1. `twinset` the set of twins #' 2. `twinrel` the twins relationships #' 3. `twinord` the order of the twins #' @seealso [auto_hint()] -get_twin_rel <- function(ped) { - if (is.null(rel(ped))) { +get_twin_rel <- function(obj) { + if (length(rel(obj)) == 0) { relation <- NULL } else { - relation <- cbind( - as.matrix(rel(ped)[, c("id1", "id2")]), - as.numeric(rel(ped)[, "code"]) - ) + relation <- as.data.frame(rel(obj))[, c("id1", "id2", "code")] + relation$code <- as.numeric(relation$code) } - n <- length(ped(ped)$id) - twinset <- rep(0, n) - twinord <- rep(1, n) + n <- length(obj) + twinset <- setNames(rep(0, n), id(ped(obj))) + twinord <- setNames(rep(1, n), id(ped(obj))) twinrel <- NULL - if (!is.null(relation) && any(relation[, 3] < 4)) { + + if (!is.null(relation) && any(relation$code < 4)) { ## Select only siblings relationships - temp <- (relation[, 3] < 4) - twinlist <- unique(c(relation[temp, seq_len(2)])) # list of twin id's + temp <- (relation$code < 4) + ## list of twin id's + twinlist <- unique(unlist(c(relation[temp, seq_len(2)]))) twinrel <- relation[temp, , drop = FALSE] for (i in 2:length(twinlist)) { # Now, for any pair of twins on a line of twinrel, give both @@ -256,18 +246,19 @@ get_twin_rel <- function(ped) { twinord[twinrel[, 2]], twinord[twinrel[, 1]] + 1 ) + twinrel[, 1] <- twinset[twinrel[, 1]] } } list(twinset = twinset, twinrel = twinrel, twinord = twinord) } -#' First initial guess for the alignment of a Pedigree +#' Initial hint for a Pedigree alignment #' #' @description #' Compute an initial guess for the alignment of a Pedigree #' #' @details -#' A Pedigree structure can contain a `hints` object which helps to +#' A Pedigree structure can contain a [Hints-class] object which helps to #' reorder the Pedigree (e.g. left-to-right order of children within family) so #' as to plot with minimal distortion. This routine is used to create an #' initial version of the hints. They can then be modified if desired. @@ -276,64 +267,68 @@ get_twin_rel <- function(ped) { #' within families, so that marriages are on the "edge" of a set children, #' closest to the spouse. For pedigrees that have only a single connection #' between two families this simple-minded approach works surprisingly well. -#' For more complex structures hand-tuning of the hints matrix may be required. +#' For more complex structures hand-tuning of the hints may be required. #' -#' The Pedigree in the example below is one where rearranging the founders -#' greatly decreases the number of extra connections. When `auto_hint()` is -#' called with a a vector of numbers as the second argument, the values for the -#' founder females are used to order the founder families left to right across -#' the plot. The values within a sibship are used as the preliminary order of +#' When `auto_hint()` is called with a a vector of numbers as the **hints** +#' argument, the values for the founder females are used to order the founder +#' families left to right across the plot. +#' The values within a sibship are used as the preliminary order of #' siblings within a family; this may be changed to move one of them to the #' edge so as to match up with a spouse. The actual values in the vector are #' not important, only their order. #' -#' @param reset If `TRUE`, then even if `ped` object has hints, reset -#' them to the initial values +#' @param reset If `TRUE`, then even if the Ped object has Hints, reset +#' them to the initial values. #' @inheritParams align #' -#' @return The **hints** list containing components `order` and `spouse` +#' @return The initial [Hints-class] object. #' -#' @seealso [align()], [best_hint()] +#' @seealso [align()], [best_hint()], [Hints-class] #' @examples #' data(sampleped) -#' ped <- Pedigree(sampleped[sampleped$family == 1, ]) +#' ped <- Pedigree(sampleped[sampleped$famid == 1, ]) #' auto_hint(ped) #' @export -auto_hint <- function( - ped, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE +#' @keywords internal, alignment, auto_hint +#' @usage NULL +setGeneric("auto_hint", signature = "obj", + function(obj, ...) standardGeneric("auto_hint") +) + +#' @rdname auto_hint +#' @export +setMethod("auto_hint", "Pedigree", function(obj, + hints = NULL, packed = TRUE, align = FALSE, reset = FALSE ) { ## full documentation now in vignette: align_code_details.Rmd ## References to those sections appear here as: ## Doc: auto_hint - if ((!is.null(hints(ped)$order) || - !is.null(hints(ped)$spouse) - ) && !reset - ) { - return(hints(ped)) + if (!is.null(hints) && is(hints, "Hints")) { + if ( + (length(horder(hints)) != 0 || length(spouse(hints)) != 0) && !reset + ) { + return(hints(obj)) + } } # nothing to do - if (length(unique(ped(ped)$family)) > 1) { + if (length(unique(famid(ped(obj)))) > 1) { stop("auto_hint only works on Pedigrees with a single family") } - n <- length(ped(ped)$id) - depth <- kindepth(ped, align_parents = TRUE) + n <- length(obj) + depth <- kindepth(obj, align_parents = TRUE) - ## Doc: init-auto_hint + ## Doc: init-auto_hint horder + horder <- setNames(rep(0, n), id(ped(obj))) if (!is.null(hints)) { - if (is.vector(hints)) { - hints <- list(order = hints) - } - if (is.matrix(hints)) { - hints <- list(spouse = hints) - } - if (is.null(hints$order)) { - horder <- integer(n) - } else { - horder <- hints$order + if (is.list(hints)) { + hints <- Hints(hints) + } else if (!is(hints, "Hints")) { + stop("hints must be a list or a Hints object") } + horder <- horder(hints) } else { - horder <- integer(n) + hints <- Hints(horder = horder) } for (i in unique(depth)) { @@ -344,7 +339,7 @@ auto_hint <- function( } } - twin_rel <- get_twin_rel(ped = ped) + twin_rel <- get_twin_rel(obj) twinset <- twin_rel$twinset twinord <- twin_rel$twinord twinrel <- twin_rel$twinrel @@ -360,32 +355,32 @@ auto_hint <- function( } # Then reset to integers - for (i in unique(ped$depth)) { - who <- (ped$depth == i) + for (i in unique(depth)) { + who <- (depth == i) horder[who] <- rank(horder[who]) # there should be no ties } } - if (!is.null(hints)) { - sptemp <- hints$spouse + if (nrow(spouse(hints)) > 0) { + sptemp <- spouse(hints) } else { sptemp <- NULL } - plist <- align(ped, + plist <- align(obj, packed = packed, align = align, - hints = list(order = horder, spouse = sptemp) + hints = Hints(horder = horder, spouse = sptemp) ) ## Doc: fixup-2 - ## Fix if duplicate individuales present + ## Fix if duplicate individuals present maxlev <- nrow(plist$nid) for (lev in seq_len(maxlev)) { # subjects on this level idlist <- plist$nid[lev, seq_len(plist$n[lev])] # duplicates to be dealt with - dpairs <- duporder(idlist, plist, lev, ped) + dpairs <- duporder(idlist, plist, lev, obj) if (nrow(dpairs) == 0) next for (i in seq_len(nrow(dpairs))) { anchor <- spouse <- rep(0, 2) @@ -404,7 +399,7 @@ auto_hint <- function( } } else { # spouse at this location connected to parents ? - spouse[j] <- findspouse(idpos, plist, lev, ped) + spouse[j] <- findspouse(idpos, plist, lev, obj) if (plist$fam[lev, spouse[j]] > 0) { # Yes they are anchor[j] <- 2 # spousal anchor sibs <- idlist[findsibs(spouse[j], plist, lev)] @@ -439,18 +434,39 @@ auto_hint <- function( warning("Unexpected result in auto_hint,", "please contact developer" ) - return(list(order = seq_len(n))) # punt + return(Hints(horder = seq_len(n))) # punt } else { + if (is.vector(temp)) { + temp <- data.frame( + idl = temp[1], idr = temp[2], anchor = temp[3] + ) + } else if (is.matrix(temp)) { + temp <- data.frame( + idl = temp[, 1], idr = temp[, 2], + anchor = temp[, 3] + ) + } sptemp <- rbind(sptemp, temp) } } # # Recompute, since this shifts things on levels below # - plist <- align(ped, + new_spouse <- data.frame( + idl = id(ped(obj))[sptemp$idl], + idr = id(ped(obj))[sptemp$idr], + anchor = anchor_to_factor(sptemp$anchor) + ) + plist <- align(obj, packed = packed, align = align, - hints = list(order = horder, spouse = sptemp) + hints = Hints(horder = horder, spouse = new_spouse) ) } - list(order = horder, spouse = sptemp) -} + + new_spouse <- data.frame( + idl = id(ped(obj))[sptemp$idl], + idr = id(ped(obj))[sptemp$idr], + anchor = anchor_to_factor(sptemp$anchor) + ) + Hints(horder = horder, spouse = new_spouse) +}) diff --git a/R/best_hint.R b/R/best_hint.R index 31b1ff23..a4632cc9 100644 --- a/R/best_hint.R +++ b/R/best_hint.R @@ -1,14 +1,43 @@ -#' Best hint for alignement +#' Generate all possible permutation +#' +#' Given a vector of length **n**, generate all possible permutations of +#' the numbers 1 to **n**. +#' This is a recursive routine, and is not very efficient. +#' +#' @param x A vector of length **n** +#' @return A matrix with **n** cols and **n!** rows +#' @keywords internal, auto_hint +permute <- function(x) { + n <- length(x) + if (n == 1) { + x + } else if (n == 2) { + rbind(x, x[c(2, 1)]) + } else if (n == 3) { + rbind(x[seq_len(3)], x[c(2, 1, 3)], x[c(3, 1, 2)]) + } else { + temp <- paste( + "cbind(x[", seq_len(n), "], permute(x[-", seq_len(n), "]))", + collapse = "," + ) + temp <- paste("rbind(", temp, ")") + eval(parse(text = temp)) + } +} + +#' Best hint for a Pedigree alignment #' #' @description -#' When computer time is cheap, use this routine to get a 'best' Pedigree. +#' When computer time is cheap, use this routine to get a *best* +#' Pedigree alignment. #' This routine will try all possible founder orders, and return the one -#' with the least 'stress'. +#' with the least **stress**. #' #' @details -#' The auto_hint routine will rearrange sibling order, but not founder order. -#' This calls auto_hint with every possible founder order, and finds that -#' plot with the least 'stress'. +#' The [auto_hint()] routine will rearrange sibling order, but not +#' founder order. +#' This calls [auto_hint()] with every possible founder order, and finds that +#' plot with the least "stress". #' The stress is computed as a weighted sum of three error measures: #' #' - nbArcs The number of duplicate individuals in the plot @@ -17,117 +46,125 @@ #' - lgParentsChilds The sum of the absolute values of the differences between #' the center of the children and the parents #' -#' \eqn{stress = -#' wt[1] \times nbArcs + -#' wt[2] \times lgArcs + -#' wt[3] \times lgParentsChilds +#' \deqn{stress = +#' wt[1] * nbArcs + +#' wt[2] * lgArcs + +#' wt[3] * lgParentsChilds #'} #' #' If during the search, a plot is found with a stress level less than #' **tolerance**, the search is terminated. #' -#' @param wt A vector of three weights for the three error measures -#' - The number of duplicate individuals in the plot -#' - The sum of the absolute values of the differences in the -#' positions of duplicate individuals -#' - The sum of the absolute values of the differences between -#' the center of the children and the parents +#' @param wt A vector of three weights for the three error measures. #' Default is `c(1000, 10, 1)`. -#' @param tolerance The maximum stress level to accept. Default is `0` +#' 1. The number of duplicate individuals in the plot +#' 2. The sum of the absolute values of the differences in the +#' positions of duplicate individuals +#' 3. The sum of the absolute values of the differences between +#' the center of the children and the parents. +#' +#' @param tolerance The maximum stress level to accept. +#' Default is `0` #' @inheritParams align #' -#' @return The best hint object out of all the permutations +#' @return The best Hints object out of all the permutations #' -#' @seealso [auto_hint()] +#' @seealso [auto_hint()], [align()] #' @export #' @examples #' data(sampleped) -#' ped <- Pedigree(sampleped[sampleped$family == 1,]) +#' ped <- Pedigree(sampleped[sampleped$famid == 1,]) #' best_hint(ped) #' @include auto_hint.R #' @include align.R -best_hint <- function(ped, wt = c(1000, 10, 1), tolerance = 0) { - # find founders married to founders the female of such pairs - # determines the plot order of founders - mom <- match(ped(ped)$momid, ped(ped)$id) - dad <- match(ped(ped)$dadid, ped(ped)$id) - # founders and marry-ins - founders <- ped(ped)$id[is.na(mom) & is.na(dad)] - fpair <- !(is.na(match(ped(ped)$momid, founders)) | - is.na(match(ped(ped)$dadid, founders)) - ) - # row num of founding moms - fmom <- unique(match(ped(ped)$momid[fpair], ped(ped)$id)) - - # This function generates the permutations one after the other - permute <- function(x) { - n <- length(x) - if (n == 3) { - rbind(x[seq_len(3)], x[c(2, 1, 3)], x[c(3, 1, 2)]) - } else { - temp <- paste( - "cbind(x[", seq_len(n), "], permute(x[-", seq_len(n), "]))", - collapse = "," - ) - temp <- paste("rbind(", temp, ")") - eval(parse(text = temp)) - } +#' @keywords alignment, auto_hint +#' @usage NULL +setGeneric( + "best_hint", signature = "obj", + function(obj, ...) { + standardGeneric("best_hint") } - pmat <- permute(seq_along(fmom)) - # Put the subsets into a random order For most Pedigrees, - # there are several permutations that will give a tolerance - # or near tolerance plot. - # This way we should hit one of them soon. - pmat <- pmat[order(runif(nrow(pmat))), ] +) + +#' @rdname best_hint +setMethod( + "best_hint", "Pedigree", + function(obj, wt = c(1000, 10, 1), tolerance = 0) { - n <- length(ped(ped)$id) - for (perm in seq_len(nrow(pmat))) { - hint <- cbind(seq_len(n), rep(0, n)) - hint[fmom, 1] <- pmat[perm, ] - # this fixes up marriages and such - newhint <- auto_hint(ped, hints = hint[, 1]) - plist <- align( - ped, packed = TRUE, align = TRUE, width = 8, hints = newhint + # find founders married to founders the female of such pairs + # determines the plot order of founders + momid <- momid(ped(obj)) + dadid <- dadid(ped(obj)) + id <- id(ped(obj)) + mom <- match(momid, id) + dad <- match(dadid, id) + # founders and marry-ins + founders <- id[is.na(mom) & is.na(dad)] + fpair <- !(is.na(match(momid, founders)) | + is.na(match(dadid, founders)) ) + # row num of founding moms + fmom <- unique(match(momid[fpair], id)) + pmat <- permute(seq_along(fmom)) + # Put the subsets into a random order For most Pedigrees, + # there are several permutations that will give a tolerance + # or near tolerance plot. + # This way we should hit one of them soon. + pmat <- pmat[order(runif(nrow(pmat))), ] + + n <- length(obj) + for (perm in seq_len(nrow(pmat))) { + hint <- cbind(seq_len(n), rep(0, n)) + hint[fmom, 1] <- pmat[perm, ] + # this fixes up marriages and such + newhint <- auto_hint( + obj, hints = Hints( + horder = setNames(hint[, 1], id(ped(obj))) + ), reset = TRUE + ) + plist <- align( + obj, packed = TRUE, align = TRUE, width = 8, hints = newhint + ) - # Compute the error measures - err <- rep(0, 3) - maxlev <- nrow(plist$nid) - for (lev in seq_len(maxlev)) { - idlist <- plist$nid[lev, seq_len(plist$n[lev])] - dups <- duplicated(idlist) - if (any(dups)) { - err[1] <- err[1] + sum(dups) - for (i in idlist[dups]) { - who <- (seq_along(idlist))[match( - idlist, i, nomatch = 0 - ) > 0] - err[2] <- err[2] + abs(diff(plist$pos[lev, who])) + # Compute the error measures + err <- rep(0, 3) + maxlev <- nrow(plist$nid) + for (lev in seq_len(maxlev)) { + idlist <- plist$nid[lev, seq_len(plist$n[lev])] + dups <- duplicated(idlist) + if (any(dups)) { + err[1] <- err[1] + sum(dups) + for (i in idlist[dups]) { + who <- (seq_along(idlist))[match( + idlist, i, nomatch = 0 + ) > 0] + err[2] <- err[2] + abs(diff(plist$pos[lev, who])) + } } - } - # get parent-child pulls - fam2 <- plist$fam[lev, ] - if (any(fam2 > 0)) { - # center of kids - centers <- tapply(plist$pos[lev, ], fam2, mean) - if (any(fam2 == 0)) { - centers <- centers[-1] + # get parent-child pulls + fam2 <- plist$fam[lev, ] + if (any(fam2 > 0)) { + # center of kids + centers <- tapply(plist$pos[lev, ], fam2, mean) + if (any(fam2 == 0)) { + centers <- centers[-1] + } + # parents + above <- plist$pos[lev - 1, sort(unique(fam2))] + 0.5 + err[3] <- err[3] + sum(abs(centers - above)) } - # parents - above <- plist$pos[lev - 1, sort(unique(fam2))] + 0.5 - err[3] <- err[3] + sum(abs(centers - above)) } - } - # best one so far? - total <- sum(err * wt) - if (perm == 1 || total < besttot) { - besttot <- total - besthint <- newhint + # best one so far? + total <- sum(err * wt) + if (perm == 1 || total < besttot) { + besttot <- total + besthint <- newhint + } + if (besttot <= tolerance) + break # we needn't do better than this! } - if (besttot <= tolerance) - break # we needn't do better than this! + besthint } - besthint -} +) diff --git a/R/bit_size.R b/R/bit_size.R index d1e9e784..c30a70b8 100644 --- a/R/bit_size.R +++ b/R/bit_size.R @@ -1,63 +1,72 @@ -#' Get Pedigree bit_size +#' Bit size of a Pedigree #' -#' @description -#' Calculate Pedigree bit_size, defined as : +#' Utility function used in the `shrink()` function +#' to calculate the bit size of a Pedigree. #' -#' \eqn{ +#' @details +#' The bit size of a Pedigree is defined as : +#' +#' \deqn{ #' 2 \times NbNonFounders - NbFounders #' } #' -#' @details -#' This is a utility function used in `shrink()` -#' to calculate the bit_size of a Pedigree. +#' Where `NbNonFounders` is the number of non founders in the Pedigree +#' (i.e. individuals with identified parents) and +#' `NbFounders` is the number of founders in the Pedigree +#' (i.e. individuals without identified parents). #' -#' @inheritParams kinship -#' @inheritParams is_parent -#' @param obj A Pedigree object or a vector of fathers identifierss +#' @param obj A Ped or Pedigree object or a vector of fathers identifiers +#' @inheritParams Ped #' #' @return A list with the following components: #' -#' - bit_size The bit_size of input Pedigree +#' - bit_size The bit size of the Pedigree #' - nFounder The number of founders in the Pedigree -#' - nNonFounder The number of nonfounders in the Pedigree +#' - nNonFounder The number of non founders in the Pedigree #' #' @seealso [shrink()] -#' @include pedigreeClass.R -#' @docType methods +#' @include AllClass.R #' @examples #' data(sampleped) #' ped <- Pedigree(sampleped) #' bit_size(ped) #' @export -#' @keywords internal +#' @keywords internal, shrink +#' @usage NULL setGeneric("bit_size", signature = "obj", function(obj, ...) standardGeneric("bit_size") ) -#' @docType methods -#' @aliases bit_size,character #' @rdname bit_size -setMethod("bit_size", "character", function(obj, momid, missid = "0") { - dadid <- obj - if (length(dadid) != length(momid)) { - stop("dadid and momid should have the same length") +setMethod("bit_size", + "character_OR_integer", + function(obj, momid, missid = NA_character_) { + dadid <- obj + if (length(dadid) != length(momid)) { + stop("dadid and momid should have the same length") + } + founder <- dadid %in% missid & momid %in% missid + ped_size <- length(dadid) + n_founder <- sum(founder) + n_non_founder <- ped_size - n_founder + bit_size <- 2 * n_non_founder - n_founder + list( + bit_size = bit_size, nFounder = n_founder, + nNonFounder = n_non_founder + ) } - founder <- dadid == missid & momid == missid - ped_size <- length(dadid) - n_founder <- sum(founder) - n_non_founder <- ped_size - n_founder - bit_size <- 2 * n_non_founder - n_founder - list( - bit_size = bit_size, nFounder = n_founder, - nNonFounder = n_non_founder - ) -}) +) -#' @docType methods -#' @aliases bit_size,Pedigree #' @rdname bit_size setMethod("bit_size", "Pedigree", - function(obj, missid = "0") { - bit_size(obj$ped$dadid, obj$ped$momid, missid) + function(obj) { + bit_size(ped(obj)) + } +) + +#' @rdname bit_size +setMethod("bit_size", "Ped", + function(obj) { + bit_size(dadid(obj), momid(obj), NA_character_) } ) \ No newline at end of file diff --git a/R/check_hints.R b/R/check_hints.R deleted file mode 100644 index 7b428f0a..00000000 --- a/R/check_hints.R +++ /dev/null @@ -1,60 +0,0 @@ -## Extracted from checks.Rnw - -#' Detect hints inconsistencies -#' -#' @description -#' This routine tries to detect inconsistencies in spousal hints. -#' -#' @details -#' These arise in `auto_hint()` with complex Pedigrees. -#' One can have ABA (subject A is on both the left and the right of B), -#' cycles, etc. -#' Users can introduce problems as well if they modify the hints. -#' -#' @inheritParams sex_to_factor -#' @inheritParams align -#' -#' @return Nothing, but will stop if there is a problem. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ht1 <- auto_hint(ped1) -#' check_hints(ht1, ped1$ped$sex) -#' -#' @seealso [auto_hint()], [best_hint()] -#' @keywords internal -#' @export -check_hints <- function(hints, sex) { - if (is.null(hints$order)) { - stop("Order component must be present in hints") - } - if (!is.numeric(hints$order)) { - stop("Order component must be numeric") - } - n <- length(sex) - if (length(hints$order) != n) { - stop("Length for order component should be equal to sex length") - } - sex <- as.character(sex_to_factor(sex)) - spouse <- hints$spouse - if (!is.null(spouse)) { - lspouse <- spouse[, 1] - rspouse <- spouse[, 2] - if (any(lspouse < 1 | lspouse > n | rspouse < 1 | rspouse > n)) { - stop("Invalid spouse value, should be between 1 and", n) - } - - temp1 <- (sex[lspouse] == "female" & sex[rspouse] == "male") - temp2 <- (sex[rspouse] == "female" & sex[lspouse] == "male") - if (!all(temp1 | temp2)) { - stop("A marriage is not male/female") - } - - hash <- n * pmax(lspouse, rspouse) + pmin(lspouse, rspouse) - # Turn off this check for now - is set off if someone is married to two - # siblings if (any(duplicated(hash))) stop('Duplicate marriage') - - # TODO Break any loops: A left of B, B left of C, C left of A. Not yet - # done - } -} diff --git a/R/data.R b/R/data.R index 63c843f1..3e32879c 100644 --- a/R/data.R +++ b/R/data.R @@ -2,7 +2,7 @@ #' #' @description Data from the Minnesota Breast Cancer Family Study. #' This contains extended pedigrees from 426 families, each identified by -#' a single proband in 1945-52, with follow up for incident breast cancer. +#' a single proband in 1945-1952, with follow up for incident breast cancer. #' #' @details The original study was conducted by Dr. Elving Anderson at the #' Dight Institute for Human Genetics at the University of Minnesota. @@ -11,11 +11,13 @@ #' siblings, offspring, aunts / uncles, and grandparents with the goal of #' understanding possible familial aspects of brest cancer. In 1991 the #' study was resurrected by Dr Tom Sellers. +#' #' Of the original 544 he excluded 58 prevalent cases, along with another 19 #' who had less than 2 living relatives at the time of Dr Anderson's survey. #' Of the remaining 462 families 10 had no living members, 23 could not be #' located and 8 refused, leaving 426 families on whom updated pedigrees #' were obtained. +#' #' This gave a study with 13351 males and 12699 females (5183 marry-ins). #' Primary questions were the relationship of early life exposures, breast #' density, and pharmacogenomics on incident breast cancer risk. @@ -29,34 +31,31 @@ #' @format A data frame with 28081 observations, one line per subject, on the #' following 14 variables. #' -#' - `id` subject identifier -#' - `proband` if 1, this subject is one of the original +#' - `id` : Subject identifier +#' - `proband` : If 1, this subject is one of the original #' 426 probands -#' - `fatherid` identifier of the father, if the father is part of +#' - `fatherid` : Identifier of the father, if the father is part of #' the data set; zero otherwise -#' - `motherid` identifier of the mother, if the mother is part of +#' - `motherid` : Identifier of the mother, if the mother is part of #' the data set; zero otherwise -#' - `famid` family identifier -#' - `endage` age at last follow-up or incident cancer -#' - `cancer` 1= breast cancer (females) or prostate cancer (males), -#' 0=censored -#' - `yob` year of birth -#' - `education` amount of education: 1-8 years, 9-12 years, high -#' school graduate, vocational education -#' beyond high school, some college but did not graduate, college graduate, -#' post-graduate education, refused to -#' answer on the questionnaire -#' - `marstat` marital status: married, living with someone in a +#' - `famid` : Family identifier +#' - `endage` : Age at last follow-up or incident cancer +#' - `cancer` : `1` = breast cancer (females) or prostate cancer (males), +#' `0` = censored +#' - `yob` : Year of birth +#' - `education` : Amount of education: 1-8 years, 9-12 years, high +#' school graduate, vocational education beyond high school, +#' some college but did not graduate, college graduate, +#' post-graduate education, refused to answer on the questionnaire +#' - `marstat` : Marital status: married, living with someone in a #' marriage-like relationship, separated #' or divorced, widowed, never married, refused to answer the questionaire -#' - `everpreg` ever pregnant: never pregnant at the time of -#' baseline survey, ever pregnant at the time -#' of baseline survey -#' - `parity` number of births -#' - `nbreast` number of breast biopsies -#' - `sex` M or F -#' - `bcpc` part of one of the families in the breast/prostate -#' cancer substudy: 0=no, 1=yes. +#' - `everpreg` : Ever pregnant at the time of baseline survey +#' - `parity` : Number of births +#' - `nbreast` : Number of breast biopsies +#' - `sex` : `M` or `F` +#' - `bcpc` : Part of one of the families in the breast / prostate +#' cancer substudy: `0` = no, `1` = yes. #' Note that subjects who were recruited to the overall study after the date of #' the BP substudy are coded as zero. #' @@ -89,15 +88,17 @@ #' cols_ren_ped = list( #' "indId" = "id", "fatherId" = "fatherid", #' "motherId" = "motherid", "gender" = "sex", "family" = "famid" -#' ) +#' ), missid = "0", col_aff = "cancer" #' ) -#' print(breastped) -#' #plot(breastped) #plot family 8, proband is solid, slash for cancers +#' summary(breastped) +#' scales(breastped) +#' #plot family 8, proband is solid, slash for cancers +#' #plot(breastped[famid(breastped) == "8"]) "minnbreast" -#' samplepedigree data +#' Sampleped data #' -#' @description Small sample pedigree data set. +#' @description Small sample pedigree data set for testing purposes. #' #' @details This is a small fictive pedigree data set, with 55 #' individuals in 2 families. @@ -105,15 +106,15 @@ #' #' @format A data frame with 55 observations, one line per subject, on the #' following 7 variables. -#' - `family` family identifier -#' - `id` subject identifier -#' - `dadid` identifier of the father, if the father is part of the +#' - `famid` : Family identifier +#' - `id` : Subject identifier +#' - `dadid` : Identifier of the father, if the father is part of the #' data set; zero otherwise -#' - `momid` identifier of the mother, if the mother is part of the +#' - `momid` : Identifier of the mother, if the mother is part of the #' data set; zero otherwise -#' - `sex` 1 for male or 2 for F -#' - `affected` 1 or 0 -#' - `available` 1 or 0 +#' - `sex` : `1` for male or `2` for female +#' - `affected` : `1` or `0` +#' - `avail` : `1` or `0` #' #' @usage #' data("sampleped") diff --git a/R/descendants.R b/R/descendants.R index a46d5008..8b362603 100644 --- a/R/descendants.R +++ b/R/descendants.R @@ -1,11 +1,13 @@ -#' Find all the descendants +#' Descendants of individuals #' #' @description #' Find all the descendants of a particular list of individuals -#' given a Pedigree +#' given a Pedigree object. #' +#' @param obj A Ped or Pedigree object or a vector of the +#' individuals identifiers. #' @inheritParams duporder -#' @inheritParams kinship +#' @inheritParams Ped #' #' @return #' Vector of all descendants of the individuals in idlist. @@ -15,24 +17,34 @@ #' data("sampleped") #' ped <- Pedigree(sampleped) #' descendants(c("1_101", "2_208"), ped) -#' @include pedigreeClass.R +#' @include AllClass.R #' @export #' @keywords internal -#' @docType methods +#' @usage NULL setGeneric("descendants", function(idlist, obj, ...) standardGeneric("descendants") ) #' @rdname descendants -#' @docType methods -#' @aliases descendants,character -setMethod("descendants", signature(idlist = "character", obj = "character"), +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "character_OR_integer"), function(idlist, obj, dadid, momid) { - id <- obj - child <- id[!(is.na(match(dadid, idlist)) & - is.na(match(momid, idlist)) + id <- as.character(obj) + idlist <- as.character(idlist) + + if (any(!idlist %in% id)) { + stop( + "All individuals in idlist should be in id ", + idlist[!idlist %in% id] ) - ] + } + + dadid <- as.character(dadid) + momid <- as.character(momid) + child <- id[!( + is.na(match(dadid, idlist)) & + is.na(match(momid, idlist)) + )] descend <- NULL while (length(child > 0)) { newchild <- id[!(is.na(match(dadid, child)) & @@ -47,10 +59,17 @@ setMethod("descendants", signature(idlist = "character", obj = "character"), ) #' @rdname descendants -#' @docType methods -#' @aliases descendants,Pedigree -setMethod("descendants", signature(idlist = "character", obj = "Pedigree"), +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "Pedigree"), + function(idlist, obj) { + descendants(idlist, ped(obj)) + } +) + +#' @rdname descendants +setMethod("descendants", + signature(idlist = "character_OR_integer", obj = "Ped"), function(idlist, obj) { - descendants(idlist, obj$ped$id, obj$ped$dadid, obj$ped$momid) + descendants(as.character(idlist), id(obj), dadid(obj), momid(obj)) } ) diff --git a/R/family_check.R b/R/family_check.R index ea0c6485..91afe72e 100644 --- a/R/family_check.R +++ b/R/family_check.R @@ -9,23 +9,22 @@ #' Given a family id vector, also compute the familial grouping from first #' principles using the parenting data, and compare the results. #' -#' The `make_famid` function is used to create a de novo family id from the +#' The [make_famid()] function is used to create a de novo family id from the #' parentage data, and this is compared to the family id given in the data. #' #' If there are any joins, then an attribute 'join' is attached. #' It will be a matrix with family as row labels, new-family-id as the columns, #' and the number of subjects as entries. #' -#' @inheritParams kinship -#' @param family A vector of family identifiers +#' @inheritParams Ped #' @param newfam The result of a call to `make_famid()`. If this has already #' been computed by the user, adding it as an argument shortens the running #' time somewhat. #' #' @return a data frame with one row for each unique family id in the -#' `family` argument or the one detected in the Pedigree object. +#' `famid` argument or the one detected in the Pedigree object. #' Components of the output are: -#' - `family` : The family id, as entered into the data set +#' - `famid` : The family id, as entered into the data set #' - `n` : Number of subjects in the family #' - `unrelated` : Number of them that appear to be unrelated to #' anyone else in the entire Pedigree. This is usually marry-ins with no @@ -53,45 +52,32 @@ #' rep(1, nrow(sampleped)))) #' fcheck.combined #' -#' # make person 120's father be her son. -#' sampleped[20, 3] <- 131 -#' fcheck1.bad <- try( -#' { -#' with(sampleped, family_check(id, father, mother, family)) -#' }, -#' silent = FALSE -#' ) -#' -#' ## fcheck1.bad is a try-error -#' -#' @seealso [make_famid()], [kinship()] -#' @include pedigreeClass.R +#' @seealso [make_famid()] +#' @include AllClass.R #' @keywords internal -#' @docType methods #' @export +#' @usage NULL setGeneric("family_check", signature = "obj", function(obj, ...) standardGeneric("family_check") ) #' @rdname family_check -#' @include make_famid.R -#' @aliases family_check,character #' @export -setMethod("family_check", "character", - function(obj, dadid, momid, family, newfam) { +setMethod("family_check", "character_OR_integer", + function(obj, dadid, momid, famid, newfam) { id <- obj - if (is.numeric(family) && any(is.na(family))) { + if (is.numeric(famid) && any(is.na(famid))) { stop("Family id of missing not allowed") } - nfam <- length(unique(family)) + nfam <- length(unique(famid)) if (missing(newfam)) { newfam <- make_famid(id, dadid, momid) - } else if (length(newfam) != length(family)) { + } else if (length(newfam) != length(famid)) { stop("Invalid length for newfam") } - xtab <- table(family, newfam) + xtab <- table(famid, newfam) if (any(newfam == 0)) { unrelated <- xtab[, 1] xtab <- xtab[, -1, drop = FALSE] @@ -105,8 +91,8 @@ setMethod("family_check", "character", temp <- apply((xtab > 0) * outer(rep(1, nfam), joins - 1), 1, sum) - out <- data.frame(family = dimnames(xtab)[[1]], - n = as.vector(table(family)), unrelated = as.vector(unrelated), + out <- data.frame(famid = dimnames(xtab)[[1]], + n = as.vector(table(famid)), unrelated = as.vector(unrelated), split = as.vector(splits), join = temp, row.names = seq_len(nfam) ) if (any(joins > 1)) { @@ -121,10 +107,17 @@ setMethod("family_check", "character", ) #' @rdname family_check -#' @docType methods -#' @aliases family_check,Pedigree +#' @export setMethod("family_check", "Pedigree", function(obj) { - family_check(obj$ped$id, obj$ped$dadid, obj$ped$momid, obj$ped$family) + family_check(ped(obj)) + } +) + +#' @rdname family_check +#' @export +setMethod("family_check", "Ped", + function(obj) { + family_check(id(obj), dadid(obj), momid(obj), famid(obj)) } ) diff --git a/R/find_avail_affected.R b/R/find_avail_affected.R index 55c209b2..1b88972e 100644 --- a/R/find_avail_affected.R +++ b/R/find_avail_affected.R @@ -1,26 +1,27 @@ -# Automatically generated from all.nw using noweb - -#' Find a single person to trim from a Pedigree whose is available +#' Find single affected and available individual from a Pedigree #' #' @description #' Finds one subject from among available non-parents with indicated affection #' status. #' #' @details -#' When used within pedigree.shrink, this function is called with the first +#' When used within [shrink()], this function is called with the first #' affected indicator, if the affected item in the Pedigree is a matrix of #' multiple affected indicators. #' +#' If **avail** or **affected** is null, then the function will use the +#' corresponding Ped accessor. +#' #' @param affstatus Affection status to search for. -#' @inheritParams align -#' @inheritParams is_informative +#' @param obj A Ped or Pedigree object. +#' @inheritParams Ped #' #' @return A list is returned with the following components -#' - ped The new Pedigree object +#' - ped The new Ped object #' - newAvail Vector of availability status of trimmed individuals #' - idTrimmed Vector of IDs of trimmed individuals -#' - isTrimmed logical value indicating whether Pedigree has been trimmed -#' - bit_size Bit size of the trimmed Pedigree +#' - isTrimmed logical value indicating whether Ped object has been trimmed +#' - bit_size Bit size of the trimmed Ped #' #' @examples #' data(sampleped) @@ -30,61 +31,83 @@ #' @include bit_size.R #' @include utils.R #' @include find_unavailable.R +#' @keywords internal, shrink #' @export -find_avail_affected <- function(ped, avail = ped(ped)$avail, affstatus = NA) { - ped_df <- ped(ped) - ped_df$avail <- avail - not_parent <- !is_parent(ped_df$id, ped_df$dadid, ped_df$momid) +#' @usage NULL +setGeneric("find_avail_affected", signature = "obj", + function(obj, ...) standardGeneric("find_avail_affected") +) - if (is.na(affstatus)) { - possibl_trim <- ped_df$id[not_parent & avail == 1 & - is.na(ped_df$affected) - ] - } else { - possibl_trim <- ped_df$id[not_parent & avail == 1 & - ped_df$affected == affstatus - ] - } - n_trim <- length(possibl_trim) +#' @rdname find_avail_affected +#' @export +setMethod("find_avail_affected", "Ped", + function(obj, avail = NULL, affected = NULL, affstatus = NA) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + affected <- affected(obj) + } + not_parent <- !is_parent(id(obj), dadid(obj), momid(obj)) - if (n_trim == 0) { - return(list( - ped = ped, id_trimmed = NA, is_trimmed = FALSE, - bit_size = bit_size(ped)$bit_size - )) - } + if (is.na(affstatus)) { + possibl_trim <- id(obj)[not_parent & avail == 1 & + is.na(affected) + ] + } else { + possibl_trim <- id(obj)[not_parent & avail == 1 & + affected == affstatus + ] + } + n_trim <- length(possibl_trim) - trim_dat <- NULL + if (n_trim == 0) { + return(list( + ped = obj, id_trimmed = NA, is_trimmed = FALSE, + bit_size = bit_size(obj)$bit_size + )) + } - for (id_trim in possibl_trim) { - tmp_avail <- avail - tmp_avail[ped_df$id == id_trim] <- FALSE - id_rm <- find_unavailable(ped, tmp_avail) - new_ped <- trim(ped, id_rm) - trim_dat <- rbind(trim_dat, c(id = id_trim, - bit_size = bit_size(new_ped)$bit_size - )) - } + trim_dat <- NULL - bits <- trim_dat[, 2] + for (id_trim in possibl_trim) { + tmp_avail <- avail + tmp_avail[id(obj) == id_trim] <- FALSE + id_rm <- find_unavailable(obj, tmp_avail) + new_ped <- subset(obj, id_rm, keep = FALSE, del_parents = TRUE) + trim_dat <- rbind(trim_dat, c(id = id_trim, + bit_size = bit_size(new_ped)$bit_size + )) + } - # trim by subject with min bits. This trims fewer subject than using - # max(bits). - id_trim <- trim_dat[, 1][bits == min(bits)] + bits <- trim_dat[, 2] - ## break ties by random choice - if (length(id_trim) > 1) { - rord <- order(runif(length(id_trim))) - id_trim <- id_trim[rord][1] - } + # trim by subject with min bits. This trims fewer subject than using + # max(bits). + id_trim <- trim_dat[, 1][bits == min(bits)] + + ## break ties by random choice + if (length(id_trim) > 1) { + rord <- order(runif(length(id_trim))) + id_trim <- id_trim[rord][1] + } + + avail[id(obj) == id_trim] <- FALSE + id_rm <- find_unavailable(obj, avail) + new_ped <- subset(obj, id_rm, keep = FALSE, del_parents = TRUE) + new_size <- bit_size(new_ped)$bit_size + avail <- avail[!(id(obj) %in% id_rm)] - avail[ped_df$id == id_trim] <- FALSE - id_rm <- find_unavailable(ped, avail) - new_ped <- trim(ped, id_rm) - new_size <- bit_size(new_ped)$bit_size - avail <- avail[!(ped_df$id %in% id_rm)] + list(ped = new_ped, new_avail = avail, id_trimmed = id_trim, + is_trimmed = TRUE, bit_size = new_size + ) + } +) - list(ped = new_ped, new_avail = avail, id_trimmed = id_trim, - is_trimmed = TRUE, bit_size = new_size - ) -} +#' @rdname find_avail_affected +#' @export +setMethod("find_avail_affected", "Pedigree", + function(obj, avail = NULL, affected = NULL, affstatus = NA) { + find_avail_affected(ped(obj), avail, affected, affstatus) + } +) diff --git a/R/find_avail_noninform.R b/R/find_avail_noninform.R index bff4c84d..35d5907a 100644 --- a/R/find_avail_noninform.R +++ b/R/find_avail_noninform.R @@ -1,18 +1,17 @@ -# Automatically generated from all.nw using noweb - #' Find uninformative but available subject #' -#' @details -#' Find subjects from a Pedigree who are available and uninformative +#' Finds subjects from among available non-parents with all affection +#' equal to `0`. #' #' @details #' Identify subjects to remove from a Pedigree who are available but -#' non-informative. This is the second step to remove subjects in -#' pedigree.shrink if the Pedigree does not meet the desired bit size. +#' non-informative (unaffected). This is the second step to remove subjects in +#' [shrink()] if the Pedigree does not meet the desired bit size. +#' +#' If **avail** or **affected** is null, then the function will use the +#' corresponding Ped accessor. #' -#' @inheritParams align -#' @inheritParams is_informative -#' @inheritParams is_parent +#' @inheritParams find_avail_affected #' #' @examples #' data(sampleped) @@ -23,27 +22,50 @@ #' informativeness. #' #' @seealso [shrink()] +#' @keywords internal, shrink +#' @export +#' @usage NULL +setGeneric("find_avail_noninform", signature = "obj", + function(obj, ...) standardGeneric("find_avail_noninform") +) + +#' @rdname find_avail_noninform #' @export -find_avail_noninform <- function(ped, avail = ped(ped)$avail, missid = "0") { - ## trim persons who are available but not informative b/c not parent by - ## setting their availability to FALSE, then call find_unavailable() JPS - ## 3/10/14 add strings check in case of char ids - ped_df <- ped(ped) - ped_df$avail <- avail +setMethod("find_avail_noninform", "Ped", + function(obj, avail = NULL, affected = NULL) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + ## TODO affected() may need to give back data.frame + affected <- affected(obj) + } + check_parent <- is_parent(id(obj), dadid(obj), momid(obj)) - check_parent <- is_parent(ped_df$id, ped_df$dadid, ped_df$momid) - for (i in seq_along(nrow(ped_df))) { - if (check_parent[i] == FALSE && avail[i] == 1 && - all(ped_df$affected[i] == 0, na.rm = TRUE)) { - ## could use ped$affected[i,] if keep matrix - fa <- ped_df$dadid[i] - mo <- ped_df$momid[i] - if (avail[ped_df$id == fa] && avail[ped_df$id == mo] || - fa == missid || mo == missid) { - ped_df$avail[i] <- FALSE + # For each individual if not a parent and unaffected + # Set its avail to FALSE if both parent avail + # or if one is absent + for (i in seq_along(length(obj))) { + if (check_parent[i] == FALSE && avail[i] == 1 && + all(affected[i] == 0, na.rm = TRUE)) { + ## could use ped$affected[i,] if keep matrix + fa <- dadid(obj)[i] + mo <- momid(obj)[i] + if (avail[id(obj) == fa] && avail[id(obj) == mo] || + is.na(fa) || is.na(mo)) { + avail[i] <- FALSE + } } } + + find_unavailable(obj, avail) } +) - find_unavailable(ped, ped_df$avail) -} +#' @rdname find_avail_noninform +#' @export +setMethod("find_avail_noninform", "Pedigree", + function(obj, avail = NULL, affected = NULL) { + find_avail_noninform(ped(obj), avail, affected) + } +) diff --git a/R/find_unavailable.R b/R/find_unavailable.R index a8f55101..d2785259 100644 --- a/R/find_unavailable.R +++ b/R/find_unavailable.R @@ -1,13 +1,14 @@ -# Automatically generated from all.nw using noweb - #' Find unavailable subjects in a Pedigree #' #' @description -#' Find the ID of subjects in a Pedigree iteratively, as anyone who is not -#' available and does not have an available descendant by successively removing -#' unavailable terminal nodes. +#' Find the identifiers of subjects in a Pedigree iteratively, +#' as anyone who is not available and does not have an available +#' descendant by successively removing unavailable terminal nodes. #' #' @details +#' If **avail** is null, then the function will use the +#' corresponding Ped accessor. +#' #' Originally written as pedTrim by Steve Iturria, modified by Dan Schaid 2007, #' and now split into the two separate functions: #' `find_unavailable()`, and `trim()` to do the tasks separately. @@ -19,77 +20,99 @@ #' If the subject ids are character, make sure none of the characters in the #' ids is a colon (":"), which is a special character #' used to concatenate and split subjects within the utility. +#' The `trim()` functions is now replaced by the `subset()` function. #' #' @inheritParams find_avail_affected #' #' @return Returns a vector of subject ids for who can be #' removed. #' -#' @section Side Effects: relation matrix from `trim` is trimmed of any +#' @section Side Effects: +#' Relation matrix from subsetting is trimmed of any #' special relations that include the subjects to trim. #' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) #' find_unavailable(ped1) #' #' @seealso [shrink()] +#' @keywords internal, shrink #' @include utils.R #' @export -find_unavailable <- function(ped, avail = ped(ped)$avail) { - ## find id within Pedigree anyone who is not available and - ## does not have an available descendant - - ## avail = TRUE/1 if available, FALSE/0 if not +#' @usage NULL +setGeneric("find_unavailable", signature = "obj", + function(obj, ...) standardGeneric("find_unavailable") +) - ## will do this iteratively by successively removing unavailable - ## terminal nodes - ## Steve Iturria, PhD, modified by Dan Schaid - df <- ped(ped) - df$avail <- avail - cont <- TRUE # flag for whether to keep iterating - - df$is_terminal <- (is_parent(df$id, df$dadid, df$momid) == FALSE) - ## JPS 3/10/14 add strings check in case of char ids - while (cont) { - id_to_remove <- df$id[df$is_terminal & df$avail == 0] - if (length(id_to_remove) > 0) { - idx_to_remove <- match(id_to_remove, df$id) - df <- df[-idx_to_remove, ] - df$is_terminal <- - (is_parent(df$id, df$dadid, df$momid) == FALSE) - } else { - cont <- FALSE +#' @rdname find_unavailable +#' @export +setMethod("find_unavailable", "Ped", + function(obj, avail = NULL) { + if (is.null(avail)) { + avail <- avail(obj) } - } + ## find id within Pedigree anyone who is not available and + ## does not have an available descendant + + ## avail = TRUE/1 if available, FALSE/0 if not + + ## will do this iteratively by successively removing unavailable + ## terminal nodes + ## Steve Iturria, PhD, modified by Dan Schaid + + cont <- TRUE # flag for whether to keep iterating + + is_terminal <- (is_parent(obj) == FALSE) + ## JPS 3/10/14 add strings check in case of char ids + obji <- obj + avail(obji) <- avail + while (cont) { + id_to_remove <- id(obji)[is_terminal & avail(obji) == 0] + if (length(id_to_remove) > 0) { + obji <- subset(obji, id_to_remove, keep = FALSE) + is_terminal <- (is_parent(obji) == FALSE) + } else { + cont <- FALSE + } + } + ## A few more clean up steps - ## A few more clean up steps + ## remove unavailable founders + tmp_ped <- exclude_unavail_founders( + id(obji), dadid(obji), momid(obji), avail(obji) + ) - ## remove unavailable founders - tmp_ped <- exclude_unavail_founders( - df$id, - df$dadid, df$momid, df$avail - ) + ## remove stray marry-ins + tmp_ped <- exclude_stray_marryin( + tmp_ped$id, tmp_ped$dadid, tmp_ped$momid + ) - ## remove stray marry-ins - tmp_ped <- exclude_stray_marryin(tmp_ped$id, tmp_ped$dadid, tmp_ped$momid) + id(obj)[is.na(match(id(obj), tmp_ped$id))] + } +) - ped(ped)$id[is.na(match(ped(ped)$id, tmp_ped$id))] -} +#' @rdname find_unavailable +#' @export +setMethod("find_unavailable", "Pedigree", + function(obj, avail = NULL) { + find_unavailable(ped(obj), avail) + } +) #' Exclude stray marry-ins #' #' @description -#' Exclude from a Pedigree any founders who are not parents. +#' Exclude any founders who are not parents. #' -#' @param id Vector of subject identifiers -#' @inheritParams descendants +#' @inheritParams exclude_unavail_founders #' #' @return #' Returns a data frame of subject identifiers and their parents. #' The data frame is trimmed of any founders who are not parents. #' -#' @keywords internal +#' @keywords internal, shrink +#' @seealso [shrink()] exclude_stray_marryin <- function(id, dadid, momid) { # get rid of founders who are not parents (stray available marryins # who are isolated after trimming their unavailable offspring) @@ -108,14 +131,10 @@ exclude_stray_marryin <- function(id, dadid, momid) { #' Exclude unavailable founders #' #' @description -#' Exclude from a Pedigree any unavailable founders. +#' Exclude any unavailable founders. #' -#' @param id Vector of subject identifiers -#' @inheritParams descendants -#' @inheritParams find_avail_affected -#' @param missid Character defining the missing ids -#' -#' @keywords internal +#' @param id A character vector with the identifiers of each individuals +#' @inheritParams Ped #' #' @return #' Returns a list with the following components: @@ -124,11 +143,16 @@ exclude_stray_marryin <- function(id, dadid, momid) { #' - id Vector of subject identifiers #' - dadid Vector of father identifiers #' - momid Vector of mother identifiers -exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { +#' +#' @keywords internal, shrink +#' @seealso [shrink()] +exclude_unavail_founders <- function( + id, dadid, momid, avail, missid = NA_character_ +) { n_old <- length(id) ## zed = TRUE if both parents are present - zed <- dadid != missid & momid != missid + zed <- (!dadid %in% missid) & (!momid %in% missid) ## concat ids to represent marriages. ## Bug if there is ":" in char subj ids marriage <- paste(dadid[zed], momid[zed], sep = ":") @@ -182,8 +206,10 @@ exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { mom <- mom[zed] for (i in seq_len(n)) { ## check if mom and dad are founders (where their parents = 0) - dad_f <- (dadid[id == dad[i]] == 0) & (momid[id == dad[i]] == 0) - mom_f <- (dadid[id == mom[i]] == 0) & (momid[id == mom[i]] == 0) + dad_f <- (dadid[id == dad[i]] %in% missid) & + (momid[id == dad[i]] %in% missid) + mom_f <- (dadid[id == mom[i]] %in% missid) & + (momid[id == mom[i]] %in% missid) both_f <- dad_f & mom_f ## check if mom and dad have avail @@ -191,7 +217,7 @@ exclude_unavail_founders <- function(id, dadid, momid, avail, missid = "0") { mom_avail <- avail[id == mom[i]] ## define both_unavail = T if both mom & dad not avail - both_unavail <- (dad_avail == FALSE & mom_avail == FALSE) + both_unavail <- (dad_avail == 0 & mom_avail == 0) if (both_f && both_unavail) { ## remove mom and dad from ped, and zero-out parent diff --git a/R/fix_parents.R b/R/fix_parents.R index 32cd2bd5..d2bf0050 100644 --- a/R/fix_parents.R +++ b/R/fix_parents.R @@ -1,10 +1,10 @@ #' @importFrom stringr str_split_i NULL -#' Fix details on the parents for children of the Pedigree +#' Fix parents relationship and gender #' #' @description -#' Fix the sex of parents, add parents that are missing from the Pedigree +#' Fix the sex of parents, add parents that are missing from the data. #' Can be used with a dataframe or a vector of the #' different individuals informations. #' @@ -12,24 +12,21 @@ NULL #' First look to add parents whose ids are given in momid/dadid. Second, fix #' sex of parents. Last look to add second parent for children for whom only #' one parent id is given. -#' If a family vector is given the family id will be added to the ids of all -#' individuals (id, dadid, momid) separated by an underscore befor proceeding. +#' If a **famid** vector is given the family id will be added to the ids of all +#' individuals (`id`, `dadid`, `momid`) +#' separated by an underscore before proceeding. #' #' ## Special case for dataframe -#' Check for presence of both parents id in the `id` field. -#' If not both presence behaviour depend of `delete` parameter -#' - If TRUE then use fix_parents function and merge back the other fields +#' Check for presence of both parents id in the **id** field. +#' If not both presence behaviour depend of **delete** parameter +#' - If `TRUE` then use fix_parents function and merge back the other fields #' in the dataframe then set availability to O for non available parents. -#' - If FALSE then delete the id of missing parents +#' - If `FALSE` then delete the id of missing parents #' -#' @param family Optional family identification set it to NULL to invalidate. -#' If used it will modify the ids of the individuals by pasting it with an _. -#' @inheritParams kinship -#' @inheritParams is_parent -#' @inheritParams sex_to_factor +#' @inheritParams Ped #' @param obj A data.frame or a vector of the individuals identifiers. If a #' dataframe is given it must contain the columns `id`, `dadid`, -#' `momid`, `sex` and `family`. Family is optional. +#' `momid`, `sex` and `famid` (optional). #' #' @return A data.frame with id, dadid, momid, sex as columns with the #' relationships fixed. @@ -50,22 +47,21 @@ NULL #' ) #' test1newmom <- with(test1char, fix_parents(id, father, mother, #' sex, -#' missid = '0' +#' missid = NA_character_ #' )) -#' newped <- Pedigree(test1newmom) -#' as.data.frame(newped) +#' Pedigree(test1newmom) #' #' @author Jason Sinnwell #' @export +#' @usage NULL setGeneric("fix_parents", signature = "obj", function(obj, ...) standardGeneric("fix_parents") ) -#' @export #' @rdname fix_parents -#' @aliases fix_parents,character +#' @export setMethod("fix_parents", "character", function( - obj, dadid, momid, sex, family = NULL, missid = "0" + obj, dadid, momid, sex, famid = NULL, missid = NA_character_ ) { ## fix sex of parents add parents that are missing n <- length(obj) @@ -79,7 +75,8 @@ setMethod("fix_parents", "character", function( if (length(sex) != n) { stop("Mismatched lengths, id and sex") } - if (length(family) != n & length(family) > 0) { + + if (length(famid) != n & length(famid) > 0) { stop("Mismatched lengths, id and sex") } @@ -96,20 +93,23 @@ setMethod("fix_parents", "character", function( } else if (mean(sex == 3) > 0.25) { warning("More than 25% of the gender values are 'unknown'") } - if (any(is.na(id) | id == missid)) { + if (any(is.na(id) | id %in% missid)) { stop("Missing value for the id variable") } - id <- prefix_famid(family, id, missid) - dadid <- prefix_famid(family, dadid, missid) - momid <- prefix_famid(family, momid, missid) + if (!is.null(famid)) { + id <- upd_famid_id(id, famid, missid) + dadid <- upd_famid_id(dadid, famid, missid) + momid <- upd_famid_id(momid, famid, missid) + } + addids <- paste("addin", seq_along(id), sep = "-") if (length(grep("^ *$", id)) > 0) { stop("A blank or empty string is not allowed as the id variable") } - nofather <- (is.na(dadid) | dadid == missid) - nomother <- (is.na(momid) | momid == missid) + nofather <- (is.na(dadid) | dadid %in% missid) + nomother <- (is.na(momid) | momid %in% missid) if (any(duplicated(id))) { duplist <- id[duplicated(id)] msg_nb <- min(length(duplist), 6) @@ -117,20 +117,21 @@ setMethod("fix_parents", "character", function( } findex <- match(dadid, id, nomatch = 0) mindex <- match(momid, id, nomatch = 0) + ## dadid given, not found id, so add if (any(findex == 0 & !nofather)) { dadnotfound <- unique(dadid[which(findex == 0 & !nofather)]) id <- c(id, dadnotfound) sex <- c(sex, rep(1, length(dadnotfound))) - dadid <- c(dadid, rep(0, length(dadnotfound))) - momid <- c(momid, rep(0, length(dadnotfound))) + dadid <- c(dadid, rep(missid, length(dadnotfound))) + momid <- c(momid, rep(missid, length(dadnotfound))) } if (any(mindex == 0 & !nomother)) { momnotfound <- unique(momid[which(mindex == 0 & !nomother)]) id <- c(id, momnotfound) sex <- c(sex, rep(2, length(momnotfound))) - dadid <- c(dadid, rep(0, length(momnotfound))) - momid <- c(momid, rep(0, length(momnotfound))) + dadid <- c(dadid, rep(missid, length(momnotfound))) + momid <- c(momid, rep(missid, length(momnotfound))) } if (any(sex[mindex] != 1)) { dadnotmale <- unique((id[findex])[sex[findex] != 1]) @@ -150,8 +151,8 @@ setMethod("fix_parents", "character", function( dadid[nodad_idx] <- addids[seq_along(nodad_idx)] id <- c(id, addids[seq_along(nodad_idx)]) sex <- c(sex, rep(1, length(nodad_idx))) - dadid <- c(dadid, rep(0, length(nodad_idx))) - momid <- c(momid, rep(0, length(nodad_idx))) + dadid <- c(dadid, rep(missid, length(nodad_idx))) + momid <- c(momid, rep(missid, length(nodad_idx))) } ## children with dad in ped, mom missing addids <- addids[!(addids %in% id)] @@ -160,16 +161,16 @@ setMethod("fix_parents", "character", function( momid[nodad_idx] <- addids[seq_along(nodad_idx)] id <- c(id, addids[seq_along(nodad_idx)]) sex <- c(sex, rep(2, length(nodad_idx))) - dadid <- c(dadid, rep(0, length(nodad_idx))) - momid <- c(momid, rep(0, length(nodad_idx))) + dadid <- c(dadid, rep(missid, length(nodad_idx))) + momid <- c(momid, rep(missid, length(nodad_idx))) } - if (is.null(family)) { + if (is.null(famid)) { data.frame(id = id, momid = momid, dadid = dadid, sex = sex) } else { - family <- stringr::str_split_i(id, "_", i = 1) + famid <- make_famid(id, dadid, momid) data.frame( id = id, momid = momid, dadid = dadid, - sex = sex, family = family + sex = sex, famid = famid ) } }) @@ -179,14 +180,13 @@ setMethod("fix_parents", "character", function( #' - `FALSE` : be deleted #' @param filter Filtering column containing `0` or `1` for the #' rows to kept before proceeding. -#' -#' @export #' @rdname fix_parents +#' @export setMethod("fix_parents", "data.frame", function( - obj, delete = FALSE, filter = NULL, missid = "0" + obj, delete = FALSE, filter = NULL, missid = NA_character_ ) { cols_needed <- c("id", "dadid", "momid", "sex", filter) - df <- check_columns(obj, cols_needed, NULL, "family", others_cols = TRUE, + df <- check_columns(obj, cols_needed, NULL, "famid", others_cols = TRUE, cols_to_use_init = TRUE ) df_old <- df @@ -202,18 +202,18 @@ setMethod("fix_parents", "data.frame", function( # One of the parents doesn't not have a line in id dad_present <- match(df$dadid, df$id, nomatch = missid) mom_present <- match(df$momid, df$id, nomatch = missid) - df[dad_present == missid | - mom_present == missid, c("momid", "dadid") + df[dad_present %in% missid | + mom_present %in% missid, c("momid", "dadid") ] <- missid } df_fix <- fix_parents( df$id, df$dadid, df$momid, - df$sex, missid = missid, family = df$family + df$sex, missid = missid, famid = df$famid ) col_used <- which(names(df_old) == "momid" | names(df_old) == "dadid" | names(df_old) == "sex" | - names(df_old) == "family" + names(df_old) == "famid" ) df <- merge(df_old[, -col_used], df_fix, by = "id", all.y = TRUE, all.x = FALSE diff --git a/R/generate_aff_inds.R b/R/generate_aff_inds.R index e9dd61dc..4ca5c67f 100644 --- a/R/generate_aff_inds.R +++ b/R/generate_aff_inds.R @@ -1,11 +1,25 @@ #' @importFrom plyr revalue NULL -#' Process the information for affection +#' Process the affection informations #' -#' @details Perform transformation uppon a column given as the one -#' containing affection status to get an `affected` column usable for -#' the rest of the script +#' @description Perform transformation uppon a vector given as the one +#' containing the affection status to obtain an `affected` binary state. +#' +#' @details This function helps to configure a binary state from a character or +#' numeric variable. +#' +#' ## If the variable is a `character` or a `factor`: +#' +#' In this case the affected state will depend on the modality provided as +#' an affected status. All individuals with a value corresponding to one of the +#' element in the vector **mods_aff** will be considered as affected. +#' +#' ## If the variable is `numeric`: +#' +#' In this case the affected state will be `TRUE` if the value of the individual +#' is above the **threshold** if **sup_thres_aff** is `TRUE` and `FALSE` +#' otherwise. #' #' @param values Vector containing the values of the column to process. #' @param mods_aff Vector of modality to consider as affected in the case @@ -28,6 +42,8 @@ NULL #' @examples #' generate_aff_inds(c(1, 2, 3, 4, 5), threshold = 3, sup_thres_aff = TRUE) #' generate_aff_inds(c("A", "B", "C", "A", "V", "B"), mods_aff = c("A", "B")) +#' @author Louis Le Nézet +#' @keywords generate_scales #' @export generate_aff_inds <- function(values, mods_aff = NULL, threshold = NULL, sup_thres_aff = NULL @@ -58,7 +74,7 @@ generate_aff_inds <- function(values, mods_aff = NULL, } else { # Separate for factors by levels mods_non_aff <- levels(droplevels(as.factor( - values[!values %in% mods_aff] + values[!values %in% as.character(mods_aff)] ))) if (length(mods_non_aff) == 0) { mods_non_aff <- "None" @@ -79,7 +95,6 @@ generate_aff_inds <- function(values, mods_aff = NULL, names(aff_to_use) <- c(aff_lab, healthy_lab) labels <- revalue(as.character(mods), labels_to_use, warn_missing = FALSE) - affected <- revalue(labels, aff_to_use, warn_missing = FALSE) + affected <- as.logical(revalue(labels, aff_to_use, warn_missing = FALSE)) as.data.frame(list(mods = mods, labels = labels, affected = affected)) } -TRUE diff --git a/R/generate_colors.R b/R/generate_colors.R index fde099c1..ae740034 100644 --- a/R/generate_colors.R +++ b/R/generate_colors.R @@ -1,13 +1,33 @@ #' @importFrom plyr revalue NULL -#' Process the colors based on affection +#' Process the filling colors based on affection #' -#' @details Perform transformation uppon a column given as the one +#' @description Perform transformation uppon a column given as the one #' containing affection status to compute the filling color. #' +#' @details The colors will be set using the +#' [grDevices::colorRampPalette()] function +#' with the colors given as parameters. +#' +#' The colors will be set as follow: +#' +#' - If **keep_full_scale** is `FALSE`: +#' Then the affected individuals will get the first color of the +#' **colors_aff** vector and the unaffected individuals will get the +#' first color of the **colors_unaff** vector. +#' - If **keep_full_scale** is `TRUE`: +#' - If **values** isn't numeric: +#' Each levels of the affected **values** vector will get it's own color from +#' the **colors_aff** vector using the [grDevices::colorRampPalette()] and +#' the same will be done for the unaffected individuals using the +#' **colors_unaff**. +#' - If **values** is numeric: +#' The mean of the affected individuals will be compared to the mean of the +#' unaffected individuals and the colors will be set up such as the color +#' gradient follow the direction of the affection. +#' #' @param values The vector containing the values to process as affection. -#' @param affected The vector containing the affection status TRUE/FALSE. #' @param labels The vector containing the labels to use for the affection. #' @param keep_full_scale Boolean defining if the affection values need to #' be set as a scale. If `values` is numeric the filling scale will be @@ -20,15 +40,20 @@ NULL #' affected individuls. #' @param colors_unaff Set of increasing colors to use for the filling of the #' unaffected individuls. +#' @inheritParams Ped #' -#' @return A list of two elements -#' - The processed values column as a numeric factor -#' - A dataframe containing the description of each modality of the scale +#' @return A list of three elements +#' - `mods` : The processed values column as a numeric factor +#' - `affected` : A logical vector indicating if the individual is affected +#' - `sc_fill` : A dataframe containing the description of each modality of the +#' scale #' #' @examples #' aff <- generate_aff_inds(seq_len(5), threshold = 3, sup_thres_aff = TRUE) #' generate_fill(seq_len(5), aff$affected, aff$labels) #' generate_fill(seq_len(5), aff$affected, aff$labels, keep_full_scale = TRUE) +#' +#' @keywords generate_scales #' @export generate_fill <- function( values, affected, labels, @@ -48,7 +73,6 @@ generate_fill <- function( } mods <- fill <- rep(NA, n) - # Affection modality previously used scale <- unique(as.data.frame( list(mods_aff = labels, affected = affected, fill = fill) @@ -63,8 +87,11 @@ generate_fill <- function( # last of aff fill_to_use <- c(colors_unaff[1], colors_aff[-1], "grey") names(fill_to_use) <- c("FALSE", "TRUE", NA) - fill <- revalue(affected, fill_to_use, warn_missing = FALSE) - mods <- revalue(affected, c("FALSE" = 0, "TRUE" = 1), + fill <- revalue( + as.character(affected), fill_to_use, warn_missing = FALSE + ) + mods <- revalue( + as.character(affected), c("FALSE" = 0, "TRUE" = 1), warn_missing = FALSE ) } else { @@ -130,128 +157,140 @@ generate_fill <- function( fill[is.na(fill)] <- "grey" mods <- as.numeric(mods) - scale <- unique(as.data.frame( + sc_fill <- unique(as.data.frame( list( mods = mods, labels = labels, affected = affected, fill = fill, - density = rep(NA, n), angle = rep(NA, n) + density = rep(NA_integer_, n), angle = rep(NA_integer_, n) ) )) - - list(mods = mods, fill_scale = scale) + list(mods = mods, affected = affected, sc_fill = sc_fill) } -#' Process the colors based on affection and availability +#' Process the border colors based on availability #' -#' @details Perform transformation uppon a column given as the one +#' @description Perform transformation uppon a vector given as the one #' containing the availability status to compute the border color. +#' The vector given will be transformed using the [vect_to_binary()] +#' function. #' -#' @param avail The vector containing the availability status. -#' The values need to be numeric and can only be 0, 1 or NA. +#' @param values The vector containing the values to process as available. #' @param colors_avail Set of 2 colors to use for the box's border of an -#' individual. The first color will be used for available individual (avail -#' == 1) and the second for the unavailable individual (avail == 0). +#' individual. The first color will be used for available individual +#' (`avail == 1`) and the second for the unavailable individual +#' (`avail == 0`). #' -#' @return A dataframe containing the scale to use for the availability -#' status. +#' @return A list of three elements +#' - `mods` : The processed values column as a numeric factor +#' - `avail` : A logical vector indicating if the individual is available +#' - `sc_bord` : A dataframe containing the description of each modality of the +#' scale #' #' @examples #' generate_border(c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA)) #' +#' @keywords generate_scales #' @export -generate_border <- function(avail, colors_avail = c("green", "black")) { - if (length(avail) > 0) { - if (! is.numeric(avail) && ! all(is.na(avail))) { - stop("Available variable need to be numeric") - } - - if (!all(levels(as.factor(avail)) %in% c("0", "1", "NA"))) { - stop("Available variable need to have only 0, 1 or NA") - } - } - +generate_border <- function(values, colors_avail = c("green", "black")) { # Set border colors if (length(colors_avail) != 2) { stop("Variable `colors_avail` need to be a vector of 2 colors") } - as.data.frame(list( + mods <- vect_to_binary(values) + avail <- vect_to_binary(values, logical = TRUE) + + sc_bord <- data.frame( column = "avail", mods = c(NA, 1, 0), border = c("grey", colors_avail[1], colors_avail[2]), labels = c("NA", "Available", "Non Available") - )) + ) + + list(mods = mods, avail = avail, sc_bord = sc_bord) } -#' Process the colors based on affection and availability +#' Process the filling and border colors based on affection and availability #' -#' @details Perform transformation uppon a dataframe given to compute +#' @description Perform transformation uppon a dataframe given to compute #' the colors for the filling and the border of the individuals based #' on the affection and availability status. #' +#' @details The colors will be set using the [generate_fill()] and the +#' [generate_border()] functions respectively for the filling and the border. +#' #' @param obj A Pedigree object or a vector containing the affection status for -#' each individuals. The affection status can be numeric, logical or character. -#' @param col_avail The name of the column containing the availability status. -#' @inheritParams is_informative +#' each individuals. The affection status can be numeric or a character. #' @inheritParams generate_fill #' @inheritParams generate_border #' @inheritParams generate_aff_inds +#' @inheritParams Ped #' #' @return #' ## When used with a vector -#' A list of three elements -#' - A vector containing the transformed filling modalities -#' - A dataframe containing the description of each filling modalities -#' - A dataframe containing the description of the border modalities +#' +#' A list of two elements +#' - The list containing the filling colors processed and their description +#' - The list containing the border colors processed and their description #' #' ## When used with a Pedigree object -#' The Pedigree object with the `affected` and `avail` columns -#' processed accordingly. #' -#' The Pedigree scales slots are updated +#' The Pedigree object with the `affected` and `avail` columns +#' processed accordingly as well as the `scales` slot updated. #' -#' @examples -#' data("sampleped") -#' ped <- Pedigree(sampleped) -#' generate_colors(ped, "affected", add_to_scale=FALSE)$scales +#' @keywords generate_scales #' @export +#' @usage NULL setGeneric("generate_colors", signature = "obj", function(obj, ...) standardGeneric("generate_colors") ) -#' @export -#' @aliases generate_colors,character #' @rdname generate_colors +#' @examples +#' +#' generate_colors( +#' c("A", "B", "A", "B", NA, "A", "B", "A", "B", NA), +#' c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), +#' mods_aff = "A", +#' ) +#' @export setMethod("generate_colors", "character", function( obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, - keep_full_scale = FALSE, breaks = 3, + mods_aff = NULL, + keep_full_scale = FALSE, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), colors_avail = c("green", "black") ) { affected_val <- obj affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff + mods_aff = mods_aff ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( + lst_bord <- generate_border(avail, colors_avail) + lst_aff <- generate_fill( affected_val, affected$affected, affected$labels, - keep_full_scale, breaks, colors_aff, colors_unaff + keep_full_scale, NULL, colors_aff, colors_unaff ) - lst_sc$border_scale <- border - lst_sc + list( + fill = lst_aff, + bord = lst_bord + ) } ) -#' @export -#' @aliases generate_colors,numeric #' @rdname generate_colors +#' @examples +#' +#' generate_colors( +#' c(10, 0, 5, 7, NA, 6, 2, 1, 3, NA), +#' c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), +#' threshold = 3, keep_full_scale = TRUE +#' ) +#' @export setMethod("generate_colors", "numeric", function( - obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, + obj, avail, threshold = 0.5, sup_thres_aff = TRUE, keep_full_scale = FALSE, breaks = 3, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), @@ -259,56 +298,38 @@ setMethod("generate_colors", "numeric", ) { affected_val <- obj affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff + mods_aff = NULL, threshold, sup_thres_aff ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( - affected_val, affected$affected, affected$labels, - keep_full_scale, breaks, colors_aff, colors_unaff - ) - - lst_sc$border_scale <- border - lst_sc - } -) -#' @export -#' @aliases generate_colors,logical -#' @rdname generate_colors -setMethod("generate_colors", "logical", - function( - obj, avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, - keep_full_scale = FALSE, breaks = 3, - colors_aff = c("yellow2", "red"), - colors_unaff = c("white", "steelblue4"), - colors_avail = c("green", "black") - ) { - affected_val <- obj - affected <- generate_aff_inds(affected_val, - mods_aff, threshold, sup_thres_aff - ) - border <- generate_border(avail, colors_avail) - lst_sc <- generate_fill( + lst_bord <- generate_border(avail, colors_avail) + lst_aff <- generate_fill( affected_val, affected$affected, affected$labels, keep_full_scale, breaks, colors_aff, colors_unaff ) - lst_sc$border_scale <- border - lst_sc + list( + fill = lst_aff, + bord = lst_bord + ) } ) #' @importFrom plyr rbind.fill -#' @include pedigreeClass.R -#' @docType methods -#' @aliases generate_colors,Pedigree #' @param add_to_scale Boolean defining if the scales need to be added to the #' existing scales or if they need to replace the existing scales. #' @param reset If `TRUE` the scale of the specified column will be reset if #' already present. -#' @param ... Other parameters to pass to the `generate_colors` function +#' @param col_avail A character vector with the name of the column to be used +#' for the availability status. +#' @param col_aff A character vector with the name of the column to be used +#' for the affection status. +#' @examples +#' data("sampleped") +#' ped <- Pedigree(sampleped) +#' ped <- generate_colors(ped, "affected", add_to_scale=FALSE) +#' scales(ped) #' @rdname generate_colors +#' @include AllClass.R #' @export setMethod("generate_colors", "Pedigree", function(obj, @@ -321,7 +342,7 @@ setMethod("generate_colors", "Pedigree", colors_avail = c("green", "black"), reset = TRUE ) { - if (nrow(obj$ped) == 0) { + if (length(obj) == 0) { return(obj) } @@ -337,51 +358,74 @@ setMethod("generate_colors", "Pedigree", return(obj) } - new_col <- paste0(col_aff, "_aff") - df <- check_columns(obj$ped, c(col_aff, col_avail), - "", new_col, others_cols = TRUE + new_fill <- paste0(col_aff, "_mods") + new_bord <- paste0(col_avail, "_mods") + df <- check_columns(as.data.frame(ped(obj)), c(col_aff, col_avail), + "", c(new_fill, new_bord), others_cols = TRUE ) - lst_sc <- generate_colors(df[[col_aff]], df[[col_avail]], - mods_aff, threshold, sup_thres_aff, - keep_full_scale, breaks, - colors_aff, colors_unaff, colors_avail + ## Generate affected individuals + lst_inds <- generate_aff_inds(df[[col_aff]], + mods_aff, threshold, sup_thres_aff ) - df[new_col] <- lst_sc$mods - if (nrow(lst_sc$fill_scale) > 0) { - lst_sc$fill_scale$column_mods <- new_col - lst_sc$fill_scale$column_values <- col_aff - } + ## Create border and fill scales + lst_bord <- generate_border( + df[[col_avail]], colors_avail + ) + lst_fill <- generate_fill( + df[[col_aff]], lst_inds$affected, lst_inds$labels, + keep_full_scale, breaks, colors_aff, colors_unaff + ) - scales <- list( - fill = lst_sc$fill_scale, - border = lst_sc$border_scale + lst_sc <- list( + fill = lst_fill$sc_fill, + border = lst_bord$sc_bord ) - obj$ped <- df + ## Add mods to Pedigree object and update slots + mcols(obj)[new_fill] <- lst_fill$mods + mcols(obj)[new_bord] <- lst_bord$mods + affected(ped(obj)) <- lst_fill$affected + avail(ped(obj)) <- lst_bord$avail + + ## Update scales with correct names + if (nrow(lst_sc$fill) > 0) { + lst_sc$fill$column_mods <- new_fill + lst_sc$fill$column_values <- col_aff + } + + if (nrow(lst_sc$bord) > 0) { + lst_sc$border$column_mods <- new_bord + lst_sc$border$column_values <- col_avail + } + + ## Should the affected scales be added to existing one if (add_to_scale) { - if (col_aff %in% obj$scales$fill$column_values & - !reset) { - stop("The column ", col_aff, " is already in the scales") - } else if (col_aff %in% obj$scales$fill$column_values & reset) { - obj$scales$fill <- obj$scales$fill[ - obj$scales$fill$column_values != col_aff, - ] + if (col_aff %in% fill(obj)$column_values) { + if (!reset) { + stop("The column ", col_aff, " is already in the scales") + } else { + new_order <- unique(fill(obj)[ + fill(obj)$column_values == col_aff, + "order" + ]) + fill(obj) <- fill(obj)[ + fill(obj)$column_values != col_aff, + ] + } + } else { + new_order <- ifelse(nrow(fill(obj)) > 0, + max(fill(obj)$order) + 1, 1 + ) } - new_order <- ifelse(nrow(obj$scales$fill) > 0, - max(obj$scales$fill$order) + 1, 1 - ) - scales$fill$order <- new_order - scales$fill <- rbind.fill(obj$scales$fill, - scales$fill - ) + lst_sc$fill$order <- new_order + lst_sc$fill <- rbind.fill(fill(obj), lst_sc$fill) } else { - scales$fill$order <- 1 + lst_sc$fill$order <- 1 } - - obj$scales <- scales + scales(obj) <- Scales(lst_sc$fill, lst_sc$border) validObject(obj) obj } diff --git a/R/ibd_matrix.R b/R/ibd_matrix.R index 769c0e3d..7166ae47 100644 --- a/R/ibd_matrix.R +++ b/R/ibd_matrix.R @@ -2,7 +2,7 @@ #' @importFrom Matrix sparseMatrix NULL -#' Create an IBD matrix +#' IBD matrix #' #' @description #' Transform identity by descent (IBD) matrix data from the form produced by @@ -21,8 +21,10 @@ NULL #' index file contains the mapping between this new id and the original one. #' The final matrix should be labeled with the original identifiers. #' -#' @param id1 First subject identifiers -#' @param id2 Second subject identifiers +#' @param id1 A character vector with the id of the first individuals of each +#' pairs +#' @param id2 A character vector with the id of the second individuals of each +#' pairs #' @param ibd the IBD value for that pair #' @param idmap an optional 2 column matrix or data frame whose first element #' is the internal value (as found in `id1` and `id2`, and whose @@ -133,4 +135,3 @@ ibd_matrix <- function(id1, id2, ibd, idmap, diagonal) { dimnames = list(dimid, dimid) ) } -TRUE diff --git a/R/is_informative.R b/R/is_informative.R index 35c1f291..90891496 100644 --- a/R/is_informative.R +++ b/R/is_informative.R @@ -1,26 +1,17 @@ -#' @title Is informative +#' Find informative individuals #' #' @description Select the ids of the informative individuals. #' -#' @details Depending on the informative parameter, the function will extract -#' the ids of the informative individuals. In the case of a numeric vector, -#' the function will return the same vector. In the case of a boolean, the -#' function will return the ids of the individuals if TRUE, NA otherwise. +#' @details Depending on the **informative** parameter, the function will +#' extract the ids of the informative individuals. In the case of a +#' numeric vector, the function will return the same vector. +#' In the case of a boolean, the function will return the ids of the +#' individuals if TRUE, NA otherwise. #' In the case of a string, the function will return the ids of the #' corresponding informative individuals based on the avail and affected #' columns. #' -#' @param avail A numeric vector of availability status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : unavailable -#' - `1` : available -#' - `NA` : availability not known -#' @param affected A numeric vector of affection status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : unaffected -#' - `1` : affected -#' - `NA` : affection status not known -#' @param col_aff A string with the column name to use for the affection status. +#' @inheritParams Ped #' @param informative Informative individuals selection can take 5 values: #' - 'AvAf' (available and affected), #' - 'AvOrAf' (available or affected), @@ -29,42 +20,60 @@ #' - 'All' (all individuals) #' - A numeric/character vector of individuals id #' - A boolean -#' @inheritParams kinship -#' @inheritParams is_parent +#' @inheritParams generate_colors #' @return #' #' ## When obj is a vector -#' A vector of individuals informative identifiers +#' A vector of individuals informative identifiers. #' #' ## When obj is a Pedigree -#' A list containing the Pedigree object and the vector of individuals -#' identifiers. -#' The Pedigree object will have a new column named 'id_inf' containing 1 for -#' informative individuals and 0 otherwise. -#' -#' @examples -#' data("sampleped") -#' ped <- Pedigree(sampleped) -#' is_informative(ped, col_aff = "affection_aff") +#' The Pedigree object with its `isinf` slot updated. #' #' @export #' @docType methods +#' @usage NULL setGeneric("is_informative", signature = "obj", function(obj, ...) standardGeneric("is_informative") ) -#' @export #' @rdname is_informative -#' @aliases is_informative,character -#' @docType methods -setMethod("is_informative", "character", function( - obj, avail, affected, informative = "AvAf", missid = "0" +#' @examples +#' +#' is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +#' is_informative(c("A", "B", "C", "D", "E"), informative = c(1, 2)) +#' is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +#' is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), +#' affected = c(0, 1, 0, 1, 1), informative = "AvAf") +#' is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), +#' affected = c(0, 1, 0, 1, 1), informative = "AvOrAf") +#' is_informative(c("A", "B", "C", "D", "E"), +#' informative = c(TRUE, FALSE, TRUE, FALSE, TRUE)) +#' @export +setMethod("is_informative", "character_OR_integer", function( + obj, avail, affected, informative = "AvAf" ) { id <- obj # Selection of all informative individuals depending of the informative # parameter - if (is.numeric(informative)) { - id_inf <- id[match(id, informative, nomatch = 0) != 0] + if (length(informative) > 1) { + if (is.character(informative)) { + id_inf <- id[match(id, informative, nomatch = 0) != 0] + } else if (is.numeric(informative)) { + id_inf <- id[informative] + } else if (is.logical(informative)) { + if (length(informative) != length(id)) { + stop("The length of a logical informative parameter must be", + "equal to the length of the id vector" + ) + } + id_inf <- id[informative] + } else { + stop("The informative parameter must be a character, ", + "logical or numeric" + ) + } + } else if (is.numeric(informative)) { + id_inf <- id[informative] } else if (is.logical(informative)) { id_inf <- ifelse(informative, id, NA) id_inf <- id_inf[!is.na(id_inf)] @@ -90,19 +99,32 @@ setMethod("is_informative", "character", function( unique(id_inf) }) -#' @export #' @rdname is_informative -#' @aliases is_informative,Pedigree -#' @docType methods -#' @param reset If `TRUE`, the `id_inf` column is reset +#' @param reset If `TRUE`, the `isinf` slot is reset +#' @examples +#' +#' data("sampleped") +#' ped <- Pedigree(sampleped) +#' ped <- is_informative(ped, col_aff = "affection_mods") +#' isinf(ped(ped)) +#' @export setMethod("is_informative", "Pedigree", function( - obj, col_aff = NULL, informative = "AvAf", missid = "0", reset = FALSE + obj, col_aff = NULL, informative = "AvAf", reset = FALSE ) { - obj$ped$affected <- NA - aff_scl <- obj$scales$fill + if (!reset & any(!is.na(isinf(ped(obj))))) { + warning( + "The isinf slot already has values in the Ped object", + " and reset is set to FALSE" + ) + return(obj) + } + affected(ped(obj)) <- NA + aff_scl <- fill(obj) + ped_df <- as.data.frame(ped(obj)) if (is.null(col_aff)) { stop("The col_aff argument is required") } + # TODO use the affected columns if (col_aff %in% aff_scl$column_mods) { aff <- aff_scl$mods[aff_scl$affected == TRUE & aff_scl$column_mods == col_aff @@ -110,22 +132,20 @@ setMethod("is_informative", "Pedigree", function( unaff <- aff_scl$mods[aff_scl$affected == FALSE & aff_scl$column_mods == col_aff ] - obj$ped$affected[obj$ped[, col_aff] %in% aff] <- 1 - obj$ped$affected[obj$ped[, col_aff] %in% unaff] <- 0 + ped_df$affected[ped_df[, col_aff] %in% aff] <- 1 + ped_df$affected[ped_df[, col_aff] %in% unaff] <- 0 } else { stop("The column ", col_aff, " is not in the scales fill") } cols_needed <- c("id", "avail", "affected") - obj$ped <- check_columns(obj$ped, cols_needed, "", "", others_cols = TRUE) - id_inf <- is_informative(obj$ped$id, obj$ped$avail, obj$ped$affected, - informative, missid + check_columns(ped_df, cols_needed, "", "", others_cols = TRUE) + id_inf <- is_informative(ped_df$id, ped_df$avail, ped_df$affected, + informative = informative ) - if (!reset) { - check_columns(obj$ped, NULL, "id_inf", NULL) - } - - obj$ped$id_inf <- ifelse(obj$ped$id %in% id_inf, 1, 0) + isinf(ped(obj)) <- vect_to_binary( + ifelse(ped_df$id %in% id_inf, 1, 0), logical = TRUE + ) obj }) diff --git a/R/kindepth.R b/R/kindepth.R index e3827410..636d7334 100644 --- a/R/kindepth.R +++ b/R/kindepth.R @@ -1,42 +1,42 @@ -#' Compute the depth of each subject in a Pedigree +#' Individual's depth in a pedigree #' #' @description #' Computes the depth of each subject in the Pedigree. #' #' @details -#' Mark each person as to their depth in a Pedigree; 0 for a founder, otherwise +#' Mark each person as to their depth in a Pedigree; `0` for a founder, +#' otherwise : #' -#' \eqn{depth = 1 + \max(fatherDepth, motherDepth)} +#' \deqn{depth = 1 + \max(fatherDepth, motherDepth)} #' -#' In the case of an inbred Pedigree a perfect alignment obeying -#' `extra=TRUE` may not exist. +#' In the case of an inbred Pedigree a perfect alignment may not exist. #' -#' @inheritParams kinship -#' @param align_parents If `align_parents=T`, go one step further and try to -#' make both parents of each child have the same depth. +#' @param ... Additional arguments +#' @inheritParams Ped +#' @param align_parents If `align_parents = TRUE`, go one step further +#' and try to make both parents of each child have the same depth. #' (This is not always possible). -#' It helps the drawing program by lining up pedigrees that 'join in the middle' -#' via a marriage. +#' It helps the drawing program by lining up pedigrees that +#' 'join in the middle' via a marriage. #' -#' @return an integer vector containing the depth for each subject +#' @return An integer vector containing the depth for each subject #' -#' @author Terry Therneau +#' @author Terry Therneau, updated by Louis Le Nézet #' @seealso [align()] -#' @include pedigreeClass.R -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' kindepth(ped1) +#' @include AllClass.R #' @export setGeneric("kindepth", signature = "obj", function(obj, ...) standardGeneric("kindepth") ) -#' @export #' @rdname kindepth -#' @aliases kindepth,character -#' @docType methods -setMethod("kindepth", "character", function(obj, dadid, momid, +#' @examples +#' kindepth( +#' c("A", "B", "C", "D", "E"), +#' c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0") +#' ) +setMethod("kindepth", "character_OR_integer", function(obj, dadid, momid, align_parents = FALSE ) { id <- obj @@ -62,7 +62,6 @@ setMethod("kindepth", "character", function(obj, dadid, momid, for (i in seq_len(n)) { child <- match(midx, parents, nomatch = 0) + match(didx, parents, nomatch = 0) # Index of parent's childs - ## version 1.8.5 did not have this check with child_old. ## Keeping it here because it was not the issue being fixed in 9/2023. if (all(child == child_old)) { @@ -112,17 +111,6 @@ setMethod("kindepth", "character", function(obj, dadid, momid, ## It may be possible to do better alignment when the Pedigree has loops, ## but it is definitely beyond this program, perhaps in auto_hint one day. - chaseup <- function(x, midx, didx) { - new <- c(midx[x], didx[x]) # mother and father - new <- new[new > 0] - while (length(new) > 1) { - x <- unique(c(x, new)) - new <- c(midx[new], didx[new]) - new <- new[new > 0] - } - x - } - ## First deal with any parents who are founders They all start with depth 0 dads <- didx[midx > 0 & didx > 0] # the father side of all spouse pairs moms <- midx[midx > 0 & didx > 0] @@ -156,46 +144,74 @@ setMethod("kindepth", "character", function(obj, dadid, momid, npair <- length(dads) done <- rep(FALSE, npair) # couples that are taken care of while (TRUE) { - pairs.to.fix <- which((depth[dads] != depth[moms]) & !done) - if (length(pairs.to.fix) == 0) { + ## Select parents pairs to fix + pairs_to_fix <- which((depth[dads] != depth[moms]) & !done) + + if (length(pairs_to_fix) == 0) { break } - temp <- pmax(depth[dads], depth[moms])[pairs.to.fix] - who <- min(pairs.to.fix[temp == min(temp)]) # the chosen couple + ## Get max depth of all pairs to fix + temp <- pmax(depth[dads], depth[moms])[pairs_to_fix] + ## Select the couple to fix + ## that have the minimal depth + who <- min(pairs_to_fix[temp == min(temp)]) + + ## Good is the individuals with the higher depth good <- moms[who] bad <- dads[who] if (depth[dads[who]] > depth[moms[who]]) { good <- dads[who] bad <- moms[who] } - abad <- chaseup(bad, midx, didx) + + ## Move depth of all bad individuals + ## All id linked to bad + abad <- c(bad, ancestors(bad, midx, didx)) + if (length(abad) == 1 && sum(c(dads, moms) == bad) == 1) { - # simple case, a solitary marry-in + ## Simple case, a solitary marry-in + ## Only one in ancestry and is dad or mom of only one depth[bad] <- depth[good] } else { - agood <- chaseup(good, midx, didx) # ancestors of the 'good' side + ## Ancestors of the 'good' side + agood <- c(good, ancestors(good, midx, didx)) + + ## If individual already in agood not bad + abad1 <- abad[!abad %in% agood] + ## For spouse chasing, I need to exclude the given pair tdad <- dads[-who] tmom <- moms[-who] - while (1) { - ## spouses of any on agood list + ## Get all individuals affiliated to agood + while (TRUE) { + ## Add spouse spouse <- c(tmom[!is.na(match(tdad, agood))], tdad[!is.na(match(tmom, agood))] ) temp <- unique(c(agood, spouse)) - temp <- unique(chaseup(temp, midx, didx)) # parents + + ## Add ancestors + temp <- unique(c(temp, ancestors(temp, midx, didx))) + + ## Add kids kids <- (!is.na(match(midx, temp)) | !is.na(match(didx, temp))) temp <- unique(c(temp, (seq_len(n))[ kids & depth <= depth[good] ])) + if (length(temp) == length(agood)) { + ## If no addition to good ancestors break break } else { + ## Else do other iteration agood <- temp } } + ## Update agood but only if not in abad1 + agood <- agood[!agood %in% abad1] + ## Change all depth if (all(match(abad, agood, nomatch = 0) == 0)) { ## shift it down depth[abad] <- depth[abad] + (depth[good] - depth[bad]) @@ -218,21 +234,31 @@ setMethod("kindepth", "character", function(obj, dadid, momid, done[who] <- TRUE ## This snunk into version 1.9.6, which was part of ## bug: done[dads == bad | moms == bad] <- TRUE - - } ## while(TRUE) + } if (all(depth > 0)) { - stop("You found a bug in kindepth's alignment code!") + stop( + "You found a bug in kindepth's alignment code!", + "Depth found:", depth + ) } depth } ) -#' @export #' @rdname kindepth -#' @aliases kindepth,Pedigree -#' @docType methods +#' @examples +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' kindepth(ped1) setMethod("kindepth", "Pedigree", function(obj, align_parents = FALSE) { - kindepth(obj$ped$id, obj$ped$dadid, obj$ped$momid, align_parents) + kindepth(ped(obj), align_parents) + } +) + +#' @rdname kindepth +setMethod("kindepth", "Ped", + function(obj, align_parents = FALSE) { + kindepth(id(obj), dadid(obj), momid(obj), align_parents) } ) \ No newline at end of file diff --git a/R/kinship.R b/R/kinship.R index 0b7dc18d..4bc4cd45 100644 --- a/R/kinship.R +++ b/R/kinship.R @@ -2,21 +2,21 @@ #' @importFrom Matrix forceSymmetric bdiag NULL -#' Compute a kinship matrix +#' Kinship matrix #' #' @description -#' Compute the kinship matrix for a set of related autosomal subjects. The -#' function is generic, and can accept a Pedigree, vector as +#' Compute the kinship matrix for a set of related autosomal subjects. +#' The function is generic, and can accept a Pedigree, a Ped or a vector as #' the first argument. #' #' @details -#' The function will usually be called with a Pedigree the -#' third form is provided for backwards compatibility with an earlier release -#' of the library that was less capable. The first argument is named `id` -#' for the same reason. Note that when using the third form any information on +#' The function will usually be called with a Pedigree. +#' The call with a Ped or a vector is provided for backwards compatibility +#' with an earlier release of the library that was less capable. +#' Note that when using with a Ped or a vector, any information on #' twins is not available to the function. #' -#' When called with a pedigree, the routine +#' When called with a Pedigree, the routine #' will create a block-diagonal-symmetric sparse matrix object of class #' `dsCMatrix`. Since the `[i, j]` value of the result is 0 for any two #' unrelated individuals i and j and a `Matrix` utilizes sparse @@ -37,40 +37,51 @@ NULL #' The computation is based on a recursive algorithm described in Lange, which #' assumes that the founder alleles are all independent. #' -#' @param obj A pedigree object or a vector of subject identifiers. +#' @param obj A Pedigree or Ped object or a vector of subject identifiers. #' @param chrtype chromosome type. The currently supported types are #' 'autosome' and 'X' or 'x'. -#' @param ... Additional arguments passed to methods -#' @inheritParams sex_to_factor -#' @inheritParams is_parent +#' @inheritParams Ped #' #' @return #' ## When obj is a vector #' A matrix of kinship coefficients. -#' ## When obj is a pedigree +#' ## When obj is a Pedigree #' A matrix of kinship coefficients ordered by families present -#' in the pedigree. +#' in the Pedigree object. #' -#' @examples -#' data(sampleped) -#' ped <- Pedigree(sampleped) -#' kinship(ped) -#' -#' @section References: K Lange, Mathematical and Statistical Methods for +#' @section References: +#' K Lange, Mathematical and Statistical Methods for #' Genetic Analysis, Springer-Verlag, New York, 1997. #' @seealso [make_famid()], [kindepth()] -#' @include pedigreeClass.R +#' @include AllClass.R #' @include utils.R #' @export -#' @docType methods +#' @usage NULL setGeneric("kinship", signature = "obj", function(obj, ...) standardGeneric("kinship") ) +#' @rdname kinship #' @export +setMethod("kinship", "Ped", + function(obj, chrtype = "autosome"){ + kinship( + id(obj), dadid(obj), momid(obj), + sex(obj), chrtype = chrtype + ) + } +) + #' @rdname kinship -#' @aliases kinship,character -#' @docType methods +#' @examples +#' +#' kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1)) +#' kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1), +#' chrtype = "x" +#' ) +#' @export setMethod("kinship", "character", function(obj, dadid, momid, sex, chrtype = "autosome") { id <- obj @@ -133,13 +144,16 @@ setMethod("kinship", "character", ) #' @include kindepth.R -#' @export #' @rdname kinship -#' @aliases kinship,Pedigree -#' @docType methods +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' kinship(ped) +#' @export setMethod("kinship", "Pedigree", function(obj, chrtype = "autosome") { - famlist <- unique(obj$ped$family) + famlist <- unique(famid(obj)) nfam <- length(famlist) matlist <- vector("list", nfam) ## The possibly reorderd list of id values @@ -147,26 +161,44 @@ setMethod("kinship", "Pedigree", for (i_fam in seq_along(famlist)) { if (is.na(famlist[i_fam])) { # If no family provided - tped <- obj[is.na(obj$ped$family)] + tped <- obj[is.na(famid(obj))] } else { ## Pedigree for this family - tped <- obj[obj$ped$family == famlist[i_fam]] + tped <- obj[famid(obj) == famlist[i_fam]] } temp <- try({ chrtype <- match.arg(casefold(chrtype), c("autosome", "x")) - n <- length(ped(tped)$id) + n <- length(id(ped(tped))) pdepth <- kindepth(tped) - mom_row <- match(ped(tped)$momid, ped(tped)$id, nomatch = n + 1) - dad_row <- match(ped(tped)$dadid, ped(tped)$id, nomatch = n + 1) + mom_row <- match( + momid(ped(tped)), + id(ped(tped)), + nomatch = n + 1 + ) + dad_row <- match( + dadid(ped(tped)), + id(ped(tped)), + nomatch = n + 1 + ) # Are there any MZ twins to worry about? have_mz <- FALSE - if (!is.null(rel(tped)) && any(rel(tped)$code == "MZ twin")) { + if (length(rel(tped)) > 0 && + any(code(rel(tped)) == "MZ twin") + ) { have_mz <- TRUE ## Doc: MakeMZIndex - temp <- which(rel(tped)$code == "MZ twin") + temp <- which(code(rel(tped)) == "MZ twin") ## drop=FALSE added in case only one MZ twin set - id1x <- match(rel(tped)$id1, ped(tped)$id, nomatch = NA) - id2x <- match(rel(tped)$id2, ped(tped)$id, nomatch = NA) + id1x <- match( + id1(rel(tped)), + id(ped(tped)), + nomatch = NA + ) + id2x <- match( + id2(rel(tped)), + id(ped(tped)), + nomatch = NA + ) if (any(is.na(id1x)) | any(is.na(id2x))) { stop("All individuals in relationship matrix", "should be present in the pedigree informations" @@ -209,7 +241,7 @@ setMethod("kinship", "Pedigree", if (chrtype == "autosome") { if (n == 1) { kmat <- matrix(0.5, 1, 1, - dimnames = list(ped(tped)$id, ped(tped)$id) + dimnames = list(id(ped(tped)), id(ped(tped))) ) } else { kmat <- diag(c(rep(0.5, n), 0)) # founders @@ -232,10 +264,10 @@ setMethod("kinship", "Pedigree", } } } else if (chrtype == "x") { - sex <- as.numeric(ped(tped)$sex) # 1=female, 2=male + sex <- as.numeric(sex(ped(tped))) # 1=female, 2=male if (n == 1) { kmat <- matrix(sex / 2, 1, 1, - dimnames = list(ped(tped)$id, ped(tped)$id) + dimnames = list(id(ped(tped)), id(ped(tped))) ) } else { ## 1 for males, 1/2 for females @@ -265,7 +297,7 @@ setMethod("kinship", "Pedigree", } if (n > 1) { kmat <- kmat[seq_len(n), seq_len(n)] - dimnames(kmat) <- list(ped(tped)$id, ped(tped)$id) + dimnames(kmat) <- list(id(ped(tped)), id(ped(tped))) } kmat }, silent = TRUE) @@ -275,7 +307,7 @@ setMethod("kinship", "Pedigree", matlist[[i_fam]] <- temp } ## deprecated in Matrix: as(forceSymmetric(temp), 'dsCMatrix') - idlist[[i_fam]] <- ped(tped)$id + idlist[[i_fam]] <- id(ped(tped)) } if (length(famlist) == 1) { as(matlist[[1]], diff --git a/R/make_famid.R b/R/make_famid.R index 339ce4a7..0832e671 100644 --- a/R/make_famid.R +++ b/R/make_famid.R @@ -1,9 +1,7 @@ -# $Id: make_famid.s,v 1.7 2003/01/07 15:47:08 therneau Exp - -#' Get family id +#' Compute family id #' #' @description -#' Construct a family id from Pedigree information +#' Construct a family identifier from pedigree information #' #' @details #' Create a vector of length n, giving the family 'tree' number of each @@ -11,7 +9,7 @@ #' tree 1, otherwise the tree numbers represent the disconnected subfamilies. #' Singleton subjects give a zero for family number. #' -#' @inheritParams kinship +#' @inheritParams Ped #' #' @return #' ## When used with a character vector @@ -19,21 +17,23 @@ #' #' ## When used with a Pedigree object #' An updated Pedigree object with the family id added +#' and with all ids updated #' #' @seealso [kinship()] -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[,-1]) -#' make_famid(ped1) #' @export +#' @usage NULL setGeneric("make_famid", signature = "obj", function(obj, ...) standardGeneric("make_famid") ) -#' @export #' @rdname make_famid -#' @aliases make_famid,character -#' @docType methods +#' @examples +#' +#' make_famid( +#' c("A", "B", "C", "D", "E", "F"), +#' c("C", "D", "0", "0", "0", "0"), +#' c("E", "E", "0", "0", "0", "0") +#' ) setMethod("make_famid", "character", function(obj, dadid, momid) { id <- obj @@ -83,9 +83,9 @@ setMethod("make_famid", "character", if (any(xx == 1)) { singles <- as.integer(names(xx[xx == 1])) # famid of singletons famid[!is.na(match(famid, singles))] <- 0 # set singletons to 0 - match(famid, sort(unique(famid))) - 1 # renumber + as.character(match(famid, sort(unique(famid))) - 1) # renumber } else { - match(famid, sort(unique(famid))) + as.character(match(famid, sort(unique(famid)))) } # renumber, no zeros } else { stop("Bug in routine: seem to have found an infinite loop") @@ -93,24 +93,21 @@ setMethod("make_famid", "character", } ) -#' @export #' @rdname make_famid -#' @aliases make_famid,Pedigree -#' @docType methods -#' @include pedigreeClass.R +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' make_famid(ped1) setMethod("make_famid", "Pedigree", function(obj) { - ped <- obj - family <- make_famid(ped(ped)$id, ped(ped)$dadid, ped(ped)$momid) - col_ped_compute <- c("sex", "avail", "id", "dadid", "momid", - "family", "momid", "error", "steril", "status" + famid <- make_famid( + id(ped(obj)), dadid(ped(obj)), momid(ped(obj)) ) - ped_df <- ped(ped)[! colnames(ped(ped)) %in% col_ped_compute] - ped_df$family <- family - col_rel_compute <- c("family", "error") - rel_df <- rel(ped)[! colnames(rel(ped)) %in% col_rel_compute] - fam_id1 <- family[match(rel_df$id1, ped(ped)$id)] - fam_id2 <- family[match(rel_df$id2, ped(ped)$id)] + obj@ped@famid <- famid + + fam_id1 <- famid[match(id1(rel(obj)), id(ped(obj)))] + fam_id2 <- famid[match(id2(rel(obj)), id(ped(obj)))] if (any(fam_id1 != fam_id2)) { stop("The two individuals in the relationship", @@ -118,9 +115,151 @@ setMethod("make_famid", "Pedigree", ) } - rel_df$family <- fam_id1 - Pedigree(ped_df, rel_df, - scales = scales(ped), normalize = TRUE + obj@rel@famid <- fam_id1 + obj <- upd_famid_id(obj) + validObject(obj) + obj + } +) + +#' Update family prefix in individuals id +#' +#' Update the family prefix in the individuals identifiers. +#' Individuals identifiers are constructed as follow **famid**_**id**. +#' Therefore to update their family prefix the ids are split by the +#' first underscore and the first part is overwritten by **famid**. +#' +#' If famid is *missing*, then the `famid()` function will be called on the +#' object. +#' +#' @param obj Ped or Pedigree object or a character vector of individual ids +#' @inheritParams Ped +#' +#' @return A character vector of individual ids with family prefix +#' updated +#' +#' @export +#' @usage NULL +setGeneric("upd_famid_id", + function(obj, famid, ...) standardGeneric("upd_famid_id") +) + +#' @rdname upd_famid_id +#' @examples +#' +#' upd_famid_id(c("1", "2", "B_3"), c("A", "B", "A")) +#' upd_famid_id(c("1", "B_2", "C_3", "4"), c("A", NA, "A", NA)) +#' @export +setMethod("upd_famid_id", "character", + function(obj, famid, missid = NA_character_) { + if (length(obj) != length(famid)) { + stop("id and famid must have the same length") + } + id <- obj[!obj %in% missid] + famid <- famid[!obj %in% missid] + if (! is.character(id)) { + stop("id must be a character vector") + } + id[!str_detect(id, "_")] <- paste0("_", id[!str_detect(id, "_")]) + ids <- str_split_fixed(id, "_", 2) + ids[, 1] <- famid + new_ids <- ifelse( + ids[, 1] %in% missid, + ids[, 2], + paste(ids[, 1], ids[, 2], sep = "_") ) + obj[!obj %in% missid] <- new_ids + obj + } +) + +#' @rdname upd_famid_id +#' @export +setMethod("upd_famid_id", + signature(obj = "Ped", famid = "character_OR_integer"), + function(obj, famid) { + obj@id <- upd_famid_id(id(obj), famid) + obj@dadid <- upd_famid_id(dadid(obj), famid) + obj@momid <- upd_famid_id(momid(obj), famid) + obj@famid <- famid + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Ped", famid = "missing"), + function(obj) { + obj@id <- upd_famid_id(id(obj), famid(obj)) + obj@dadid <- upd_famid_id(dadid(obj), famid(obj)) + obj@momid <- upd_famid_id(momid(obj), famid(obj)) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Rel", famid = "character_OR_integer"), + function(obj, famid) { + obj@id1 <- upd_famid_id(id1(obj), famid) + obj@id2 <- upd_famid_id(id2(obj), famid) + obj@famid <- famid + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +setMethod("upd_famid_id", + signature(obj = "Rel", famid = "missing"), + function(obj) { + obj@id1 <- upd_famid_id(id1(obj), famid(obj)) + obj@id2 <- upd_famid_id(id2(obj), famid(obj)) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' id(ped(ped1)) +#' new_fam <- make_famid(id(ped(ped1)), dadid(ped(ped1)), momid(ped(ped1))) +#' id(ped(upd_famid_id(ped1, new_fam))) +setMethod("upd_famid_id", + signature(obj = "Pedigree", famid = "character_OR_integer"), + function(obj, famid) { + old_id <- id(ped(obj)) + obj@ped <- upd_famid_id(ped(obj), famid) + fid1 <- famid[match(id1(rel(obj)), old_id)] + fid2 <- famid[match(id2(rel(obj)), old_id)] + if (any(fid1 != fid2)) { + stop("The two individuals in the relationship", + "are not in the same family" + ) + } + obj@rel <- upd_famid_id(rel(obj), fid1) + validObject(obj) + obj + } +) + +#' @rdname upd_famid_id +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[,-1]) +#' make_famid(ped1) +setMethod("upd_famid_id", + signature(obj = "Pedigree", famid = "missing"), + function(obj) { + obj@ped <- upd_famid_id(ped(obj)) + obj@rel <- upd_famid_id(rel(obj)) + validObject(obj) + obj } ) \ No newline at end of file diff --git a/R/min_dist_inf.R b/R/min_dist_inf.R index 8304b95b..bdd6981b 100644 --- a/R/min_dist_inf.R +++ b/R/min_dist_inf.R @@ -1,7 +1,7 @@ #' @importFrom dplyr %>% NULL -#' @title Minimum distance to the informative individuals +#' Minimum distance to the informative individuals #' #' @description Compute the minimum distance between the informative #' individuals and all the others. @@ -18,11 +18,10 @@ NULL #' by 2, the minimum distance is increased by 1. #' #' -#' -#' @inheritParams sex_to_factor -#' @inheritParams kinship +#' @param ... Additional arguments +#' @param id_inf An identifiers vector of informative individuals. +#' @inheritParams Ped #' @inheritParams is_informative -#' @inheritParams is_parent #' #' @return #' ## When obj is a vector @@ -31,34 +30,36 @@ NULL #' `obj` vector. #' #' ## When obj is a Pedigree -#' The Pedigree object with a new column named 'kin' containing the kinship -#' degree. -#' -#' @examples -#' data(sampleped) -#' ped <- Pedigree(sampleped) -#' min_dist_inf(ped, col_aff = "affection_aff")$ped +#' The Pedigree object with a new slot named 'kin' containing the minimum +#' distance between each individuals and the informative individuals. +#' The `isinf` slot is also updated with the informative individuals. #' #' @seealso [kinship()] #' @include is_informative.R #' @include kinship.R -#' @docType methods #' @export +#' @usage NULL setGeneric("min_dist_inf", signature = "obj", function(obj, ...) standardGeneric("min_dist_inf") ) -#' @export #' @rdname min_dist_inf -#' @aliases min_dist_inf,character -#' @docType methods +#' @examples +#' +#' min_dist_inf( +#' c("A", "B", "C", "D", "E"), +#' c("C", "D", "0", "0", "0"), +#' c("E", "E", "0", "0", "0"), +#' sex = c(1, 2, 1, 2, 1), +#' id_inf = c("D", "E") +#' ) +#' @export setMethod("min_dist_inf", "character", function(obj, - dadid, momid, sex, avail, affected, informative = "AvAf" + dadid, momid, sex, id_inf ) { id <- obj # Selection of all informative individuals depending of the informative # parameter - id_inf <- is_informative(id, avail, affected, informative) if (any(is.na(id_inf)) || length(id_inf) == 0) { stop("No informative individuals detected") } @@ -73,30 +74,50 @@ setMethod("min_dist_inf", "character", function(obj, kin }) -#' @export #' @rdname min_dist_inf -#' @aliases min_dist_inf,Pedigree -#' @docType methods -#' @param reset If TRUE, the `kin` and if `id_inf` columns is reset +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' kin(ped(min_dist_inf(ped, col_aff = "affection_mods"))) +#' @export setMethod("min_dist_inf", "Pedigree", function(obj, - col_aff = NULL, informative = "AvAf", missid = "0", reset = FALSE, ... + col_aff = NULL, informative = "AvAf", reset = FALSE, ... ) { - ped <- is_informative(obj, col_aff, informative = informative, - missid, reset + obj_aff <- is_informative(obj, col_aff, informative = informative, + reset = reset ) - cols_needed <- c("avail", "affected") - check_columns(ped(ped), cols_needed, NULL, NULL, others_cols = TRUE) - - kin <- min_dist_inf( - ped(ped)$id, ped(ped)$dadid, ped(ped)$momid, ped(ped)$sex, - ped(ped)$avail, ped(ped)$affected, informative + new_ped <- min_dist_inf( + ped(obj_aff), + informative = informative, reset = reset ) - if (!reset) { - check_columns(ped(ped), NULL, "kin", NULL) + ped(obj_aff) <- new_ped + validObject(obj_aff) + obj_aff +}) + +#' @rdname min_dist_inf +#' @param reset If TRUE, the `kin` and if `isinf` columns is reset +#' @export +setMethod("min_dist_inf", "Ped", function( + obj, informative = "AvAf", reset = FALSE +) { + + if (!reset & any(!is.na(kin(obj)))) { + stop( + "The kin slot already has values in the Ped object", + " and reset is set to FALSE" + ) } - ped(ped)$kin <- kin - ped + id_inf <- id(obj)[isinf(obj)] + kin <- min_dist_inf( + id(obj), dadid(obj), momid(obj), sex(obj), id_inf + ) + + kin(obj) <- kin + validObject(obj) + obj }) diff --git a/R/norm_data.R b/R/norm_data.R index 628c2952..eb471269 100644 --- a/R/norm_data.R +++ b/R/norm_data.R @@ -4,36 +4,15 @@ #' @importFrom stringr str_remove_all NULL -#' Compute id with family id +#' Normalise a Ped object dataframe #' -#' @description Compute id with family id if the family id available -#' -#' @param family_id The family id -#' @param ind_id The individual id -#' @inheritParams is_parent -#' @keywords internal -#' @return The id with the family id merged -prefix_famid <- function(family_id, ind_id, missid = "0") { - if (length(family_id) > 1 && length(family_id) != length(ind_id)) { - stop("family_id and ind_id must have the same length.") - } - - pre_famid <- ifelse( - is.na(family_id) | is.null(family_id), - "", paste0(as.character(family_id), "_") - ) - ifelse(ind_id == missid, missid, paste0(pre_famid, as.character(ind_id))) -} - -#' Normalise dataframe -#' -#' @description Normalise dataframe for Pedigree object +#' @description Normalise dataframe for a Ped object #' #' @details Normalise a dataframe and check for columns correspondance -#' to be able to use it as an input to create Pedigree object. +#' to be able to use it as an input to create a Ped object. #' Multiple test are done and errors are checked. -#' Sex is calculated based in the `gender` column the following notations -#' are accepted: f, woman, female, 2 and m, man, male, 1. +#' Sex is calculated based on the `gender` column. +#' #' The `steril` column need to be a boolean either TRUE, FALSE or 'NA'. #' Will be considered available any individual with no 'NA' values in the #' `available` column. @@ -41,25 +20,49 @@ prefix_famid <- function(family_id, ind_id, missid = "0") { #' All individuals with errors will be remove from the dataframe and will #' be transfered to the error dataframe. #' +#' A number of checks are done to ensure the dataframe is correct: +#' +#' ## On identifiers: +#' - All ids (id, dadid, momid, famid) are not empty (`!= ""`) +#' - All `id` are unique (no duplicated) +#' - All `dadid` and `momid` are unique in the id column (no duplicated) +#' - id is not the same as dadid or momid +#' - Either have both parents or none +#' +#' ## On sex +#' - All sex code are either `male`, `female`, `terminated` or `unknown`. +#' - No parents are steril +#' - All fathers are male +#' - All mothers are female +#' #' @param ped_df A data.frame with the individuals informations. -#' The minimum columns required are `indID`, `fatherId`, `motherId` and -#' `gender`. -#' The `family` column can also be used to specify the family of the -#' individuals and will be merge to the `id` field separated by an -#' underscore. -#' The following columns are also recognize `sterilisation`, `available`, -#' `vitalStatus`, `affection`. The four of them will be transformed with the -#' [vect_to_binary()] function. -#' They respectively correspond to the sterilisation status, -#' the availability status, the death status and the affection status -#' of the individuals. The values recognized for those columns are `1` or -#' `0`. -#' @param na_strings Vector of strings to be considered as NA values +#' The minimum columns required are: +#' +#' - `indID` individual identifiers -> `id` +#' - `fatherId` biological fathers identifiers -> `dadid` +#' - `motherId` biological mothers identifiers -> `momdid` +#' - `gender` sex of the individual -> `sex` +#' - `family` family identifiers -> `famid` +#' +#' The `family` column, if provided, will be merged to the *ids* field +#' separated by an underscore using the [upd_famid_id()] function. +#' +#' The following columns are also recognize and will be transformed with the +#' [vect_to_binary()] function: +#' +#' - `sterilisation` status -> `steril` +#' - `available` status -> `avail` +#' - `vitalStatus`, is the individual dead -> `status` +#' - `affection` status -> `affected` +#' +#' The values recognized for those columns are `1` or `0`, `TRUE` or `FALSE`. +#' @param na_strings Vector of strings to be considered as NA values. #' @param try_num Boolean defining if the function should try to convert #' all the columns to numeric. -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A dataframe with the errors identified in the `error` column +#' @return A dataframe with different variable correctly standardized +#' and with the errors identified in the `error` column #' #' @include utils.R #' @examples @@ -69,59 +72,72 @@ prefix_famid <- function(family_id, ind_id, missid = "0") { #' motherId = c(0, 0, 2, 2, 0, 5, 2, 0, 8, 8), #' gender = c(1, 2, "m", "man", "f", "male", "m", "m", "f", "f"), #' available = c("A", "1", 0, NA, 1, 0, 1, 0, 1, 0), -#' family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), +#' famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2), #' sterilisation = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, "TRUE"), #' vitalStatus = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0), #' affection = c("TRUE", "FALSE", TRUE, FALSE, 1, 0, 1, 0, 1, 0) #' ) -#' norm_ped(df) +#' tryCatch( +#' norm_ped(df), +#' error = function(e) print(e) +#' ) +#' +#' @seealso [Ped()], [Ped-class], [Pedigree()] #' @export norm_ped <- function( - ped_df, na_strings = c("NA", ""), missid = "0", try_num = FALSE + ped_df, na_strings = c("NA", ""), missid = NA_character_, try_num = FALSE ) { err_cols <- c( "sexErrMoFa", "sexErrFa", "sexErrMo", "sexErrTer", "sexNA", - "sexError", "idErrFa", "idErrMo", "idErrSelf", "idErrOwnParent", - "idErrBothParent", "idError", "error" + "sexError", "idErr", "idErrFa", "idErrMo", "idErrSelf", + "idErrOwnParent", "idErrBothParent", "idError", "error" ) err <- data.frame(matrix(NA, nrow = nrow(ped_df), ncol = length(err_cols))) colnames(err) <- err_cols cols_need <- c("indId", "fatherId", "motherId", "gender") - cols_used <- c("sex", "steril", "avail", "id", "dadid", "momid", "error", - "affected", "status" + cols_used <- c( + "sex", "steril", "status", "avail", "id", "dadid", "momid", "famid", + "error", "affected" ) - cols_to_use <- c("sterilisation", "available", "family", - "vitalStatus", "affection" + cols_to_use <- c( + "available", "family", "sterilisation", "vitalStatus", "affection" ) ped_df <- check_columns( ped_df, cols_need, cols_used, cols_to_use, others_cols = TRUE, cols_to_use_init = TRUE, cols_used_init = TRUE ) + + ped_df$family[is.na(ped_df$family)] <- missid + if (nrow(ped_df) > 0) { ped_df <- mutate_if( - ped_df, is.character, ~replace(., . %in% na_strings, NA) + ped_df, is.character, ~replace(., . %in% na_strings, NA_character_) ) #### Id #### Check id type - for (id in c("indId", "fatherId", "motherId", "family")) { - if (!is.numeric(ped_df[[id]])) { - ped_df[[id]] <- as.character(ped_df[[id]]) - if (length(grep("^ *$", ped_df[[id]])) > 0) { - stop( - "A blank or empty string is not allowed as the ", - id, " variable" - ) + for (id in c("indId", "fatherId", "motherId")) { + ped_df[[id]] <- as.character(ped_df[[id]]) + } + err$idErr <- lapply( + as.data.frame(t(ped_df[, c( + "indId", "fatherId", "motherId", "family" + )])), + function(x) { + if (any(x == "" & !is.na(x))) { + "One id is Empty" + } else { + NA_character_ } } - } - + ) ## Make a new id from the family and subject pair - ped_df$id <- prefix_famid(ped_df$family, ped_df$indId, missid) - ped_df$dadid <- prefix_famid(ped_df$family, ped_df$fatherId, missid) - ped_df$momid <- prefix_famid(ped_df$family, ped_df$motherId, missid) + ped_df$famid <- ped_df$family + ped_df$id <- upd_famid_id(ped_df$indId, ped_df$famid, missid) + ped_df$dadid <- upd_famid_id(ped_df$fatherId, ped_df$famid, missid) + ped_df$momid <- upd_famid_id(ped_df$motherId, ped_df$famid, missid) ped_df <- mutate_at(ped_df, c("id", "dadid", "momid"), - ~replace(., . %in% na_strings, missid) + ~replace(., . %in% c(na_strings, missid), NA_character_) ) #### Sex #### @@ -131,12 +147,14 @@ norm_ped <- function( is_mother <- ped_df$id %in% ped_df$momid & !is.na(ped_df$id) ## Add missing sex due to parenthood - ped_df$sex[is.na(ped_df$gender) & is_father] <- "male" - ped_df$sex[is.na(ped_df$gender) & is_mother] <- "female" + ped_df$sex[is_father] <- "male" + ped_df$sex[is_mother] <- "female" ## Add terminated for sterilized individuals that is neither dad nor mom if ("sterilisation" %in% colnames(ped_df)) { - ped_df$steril <- vect_to_binary(ped_df$sterilisation) + ped_df$steril <- vect_to_binary( + ped_df$sterilisation, logical = TRUE + ) ped_df$sex[ ped_df$steril == 1 & !is.na(ped_df$steril) & !is_father & !is_mother @@ -146,7 +164,6 @@ norm_ped <- function( ped_df$steril == 1 & !is.na(ped_df$steril) ) & (is_father | is_mother) ] <- "isSterilButIsParent" - nb_steril_parent <- sum(!is.na(err$sexErrTer)) ped_df$steril[!is.na(err$sexErrTer) & (is_father | is_mother) ] <- FALSE @@ -194,14 +211,14 @@ norm_ped <- function( ] <- "selfIdDuplicated" err$idErrOwnParent[ped_df$id %in% id_own_parent] <- "isItsOwnParent" err$idErrBothParent[ - (ped_df$dadid == missid & ped_df$momid != missid) | - (ped_df$dadid != missid & ped_df$momid == missid) + (ped_df$dadid %in% missid & (!ped_df$momid %in% missid)) | + ((!ped_df$dadid %in% missid) & ped_df$momid %in% missid) ] <- "oneParentMissing" ## Unite all id errors in one column err <- unite( err, "idError", c( - "idErrFa", "idErrMo", "idErrSelf", + "idErr", "idErrFa", "idErrMo", "idErrSelf", "idErrOwnParent", "idErrBothParent" ), na.rm = TRUE, sep = "_", remove = TRUE ) @@ -209,16 +226,15 @@ norm_ped <- function( #### Available #### if ("available" %in% colnames(ped_df)) { - ped_df$avail <- vect_to_binary(ped_df$available) + ped_df$avail <- vect_to_binary(ped_df$available, logical = TRUE) } #### Status #### if ("vitalStatus" %in% colnames(ped_df)) { - ped_df$status <- vect_to_binary(ped_df$vitalStatus) + ped_df$status <- vect_to_binary(ped_df$vitalStatus, logical = TRUE) } - #### Affected #### if ("affection" %in% colnames(ped_df)) { - ped_df$affected <- vect_to_binary(ped_df$affection) + ped_df$affected <- vect_to_binary(ped_df$affection, logical = TRUE) } #### Convert to num #### @@ -244,50 +260,63 @@ norm_ped <- function( ped_df } -#' Normalise relationship dataframe +#' Normalise a Rel object dataframe #' -#' @description Normalise relationship dataframe for Pedigree object +#' @description Normalise a dataframe and check for columns correspondance +#' to be able to use it as an input to create a Ped object. #' -#' @inheritParams norm_ped -#' @param rel_df A data.frame with the special relationships between -#' individuals. -#' The minimum columns required are `id1`, `id2` and `code`. -#' The `family` column can also be used to specify the family -#' of the individuals. -#' The code values are: -#' - `1` = Monozygotic twin -#' - `2` = Dizygotic twin -#' - `3` = twin of unknown zygosity -#' - `4` = Spouse +#' @details +#' The `famid` column, if provided, will be merged to the *ids* field +#' separated by an underscore using the [upd_famid_id()] function. +#' The `code` column will be transformed with the [rel_code_to_factor()]. +#' Multiple test are done and errors are checked. #' -#' The value relation code recognized by the function are the one defined -#' by the [rel_code_to_factor()] function. -#' @inheritParams is_parent +#' A number of checks are done to ensure the dataframe is correct: +#' +#' ## On identifiers: +#' - All ids (id1, id2) are not empty (`!= ""`) +#' - `id1` and `id2` are not the same +#' +#' ## On code +#' - All code are recognised as either "MZ twin", "DZ twin", "UZ twin" or +#' "Spouse" +#' +#' @inheritParams norm_ped +#' @inheritParams Pedigree #' #' @examples #' df <- data.frame( -#' indId1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), -#' indId2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), +#' id1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), +#' id2 = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 1), #' code = c("MZ twin", "DZ twin", "UZ twin", "Spouse", 1, 2, #' 3, 4, "MzTwin", "sp oUse"), -#' family = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) +#' famid = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2) #' ) #' norm_rel(df) #' #' @return A dataframe with the errors identified #' @export -norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = "0") { +norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = NA_character_) { + + if (is.matrix(rel_df)) { + rel_df <- as.data.frame(rel_df) + colnames(rel_df) <- c( + "id1", "id2", "code", "famid" + )[seq_len(ncol(rel_df))] + } + #### Check columns #### err_cols <- c("codeErr", "sameIdErr", "id1Err", "id2Err", "error") err <- data.frame(matrix(NA, nrow = nrow(rel_df), ncol = length(err_cols))) colnames(err) <- err_cols - cols_needed <- c("indId1", "indId2", "code") - cols_used <- c("id1", "id2", "error") - cols_to_use <- c("family") + cols_needed <- c("id1", "id2", "code") + cols_used <- c("error") + cols_to_use <- c("famid") rel_df <- check_columns( rel_df, cols_needed, cols_used, cols_to_use, others_cols = FALSE, cols_to_use_init = TRUE, cols_used_init = TRUE ) + rel_df$famid[is.na(rel_df$famid)] <- missid if (nrow(rel_df) > 0) { rel_df <- mutate_if( rel_df, is.character, @@ -302,17 +331,21 @@ norm_rel <- function(rel_df, na_strings = c("NA", ""), missid = "0") { #### Check for id errors #### Set ids as characters rel_df <- rel_df %>% - mutate(across(c("indId1", "indId2"), as.character)) + mutate(across(c("id1", "id2", "famid"), as.character)) ## Check for non null ids - len1 <- nchar(rel_df$indId1) - len2 <- nchar(rel_df$indId2) - err$id1Err[is.na(len1) | len1 == missid] <- "indId1length0" - err$id2Err[is.na(len2) | len2 == missid] <- "indId2length0" + len1 <- nchar(rel_df$id1) + len2 <- nchar(rel_df$id2) + err$id1Err[is.na(len1) | len1 %in% missid] <- "indId1length0" + err$id2Err[is.na(len2) | len2 %in% missid] <- "indId2length0" ## Compute id with family id - rel_df$id1 <- prefix_famid(rel_df$family, rel_df$indId1, missid) - rel_df$id2 <- prefix_famid(rel_df$family, rel_df$indId2, missid) + rel_df$id1 <- upd_famid_id(rel_df$id1, rel_df$famid, missid) + rel_df$id2 <- upd_famid_id(rel_df$id2, rel_df$famid, missid) + + rel_df <- mutate_at(rel_df, c("id1", "id2", "famid"), + ~replace(., . %in% c(na_strings, missid), NA_character_) + ) err$sameIdErr[rel_df$id1 == rel_df$id2] <- "SameId" diff --git a/R/num_child.R b/R/num_child.R index 18960d49..3ea91147 100644 --- a/R/num_child.R +++ b/R/num_child.R @@ -2,18 +2,17 @@ #' @importFrom tidyr pivot_longer NULL -#' Number of child +#' Number of childs #' -#' @description Compute the number of child per individual +#' @description Compute the number of childs per individual #' #' @details Compute the number of direct child but also the number #' of indirect child given by the ones related with the linked spouses. -#' If a relation ship matrix is given, then even if no children is present +#' If a relation ship dataframe is given, then even if no children is present #' between 2 spouses, the indirect childs will still be added. #' -#' @inheritParams kinship -#' @inheritParams norm_rel -#' @inheritParams is_parent +#' @inheritParams Ped +#' @inheritParams Pedigree #' #' @return #' ## When obj is a vector @@ -25,26 +24,39 @@ NULL #' An updated Pedigree object with the columns `num_child_dir`, #' `num_child_ind` and `num_child_tot` added to the #' Pedigree `ped` slot. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ped1 <- num_child(ped1) -#' summary(ped1$ped) -#' @include pedigreeClass.R +#' @include AllClass.R #' @export +#' @usage NULL setGeneric("num_child", signature = "obj", function(obj, ...) standardGeneric("num_child") ) -#' @export #' @rdname num_child -#' @aliases num_child,character -#' @docType methods -setMethod("num_child", "character", function(obj, dadid, momid, - rel_df = NULL, missid = "0" +#' @examples +#' +#' num_child( +#' obj = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), +#' dadid = c("3", "3", "6", "8", "0", "0", "0", "0", "0", "0"), +#' momid = c("4", "5", "7", "9", "0", "0", "0", "0", "0", "0"), +#' rel_df = data.frame( +#' id1 = "10", +#' id2 = "3", +#' code = "Spouse" +#' ) +#' ) +#' @export +setMethod("num_child", "character_OR_integer", function(obj, dadid, momid, + rel_df = NULL, missid = NA_character_ ) { id <- obj + if (length(dadid) != length(id)) { + stop("The length of dadid should be equal to the length of id") + } + if (length(momid) != length(id)) { + stop("The length of momid should be equal to the length of id") + } + # Create dummy vectors for the check() function child <- NULL num_child_dir <- NULL @@ -57,18 +69,24 @@ setMethod("num_child", "character", function(obj, dadid, momid, childs_all <- NULL df <- data.frame(id, dadid, momid, stringsAsFactors = FALSE) + if (nrow(df) == 0) { + df <- data.frame(id = character(), + num_child_dir = integer(), + num_child_ind = integer(), + num_child_tot = integer(), + stringsAsFactors = FALSE + ) + return(df) + } - spouse_rel <- unique(df[df$dadid != missid & - df$momid != missid, c("dadid", "momid") + spouse_rel <- unique(df[(!df$dadid %in% missid) & + (!df$momid %in% missid), c("dadid", "momid") ] ) colnames(spouse_rel) <- c("id1", "id2") if (!is.null(rel_df)) { - cols_needed <- c("id1", "id2", "code") - rel_df <- check_columns(rel_df, cols_needed, "", "", - others_cols = FALSE - ) + rel_df <- norm_rel(rel_df, missid = missid) spouse_rel <- rbind(spouse_rel, rel_df[rel_df$code == "Spouse", c("id1", "id2")] ) @@ -77,74 +95,85 @@ setMethod("num_child", "character", function(obj, dadid, momid, spouse_rel$idmax <- pmax(spouse_rel$id1, spouse_rel$id2) spouse_rel <- unique(spouse_rel[c("idmin", "idmax")]) - dad_child <- df[df$dadid != missid, c("dadid", "id")] %>% - group_by(dadid) %>% - summarise(child = list(id)) %>% - mutate(num_child_dir = lengths(child)) %>% - rename(id = dadid) - mom_child <- df[df$momid != missid, c("id", "momid")] %>% - group_by(momid) %>% - summarise(child = list(id)) %>% - mutate(num_child_dir = lengths(child)) %>% - rename(id = momid) - id_child <- rbind(dad_child, mom_child) - - # Number of direct child per individual - df$num_child_dir <- id_child$num_child_dir[match(df$id, id_child$id)] - - # Number of total childs per individual - rel_child <- spouse_rel %>% - left_join(id_child, by = c("idmin" = "id")) %>% - left_join(id_child, by = c("idmax" = "id"), - suffix = c("_min", "_max") - ) %>% - rowwise() %>% - mutate(childs = list(unique(unlist( - list(child_min, child_max) - )))) %>% - select(c(idmin, idmax, childs)) %>% - tidyr::pivot_longer(cols = -childs, names_to = "order", - values_to = "id" - ) %>% - group_by(id) %>% - summarise(childs_all = list(unique(unlist(childs)))) %>% - mutate(num_child_tot = lengths(childs_all)) - - df$num_child_tot <- rel_child$num_child_tot[match(df$id, rel_child$id)] - - df <- df %>% - mutate(across(c(num_child_dir, num_child_tot), - ~replace(., is.na(.), 0) - )) - - df$num_child_ind <- df$num_child_tot - df$num_child_dir - - df + if (nrow(spouse_rel) > 0) { + dad_child <- df[(!df$dadid %in% missid), c("dadid", "id")] %>% + group_by(dadid) %>% + summarise(child = list(id)) %>% + mutate(num_child_dir = lengths(child)) %>% + rename(id = dadid) + mom_child <- df[(!df$momid %in% missid), c("id", "momid")] %>% + group_by(momid) %>% + summarise(child = list(id)) %>% + mutate(num_child_dir = lengths(child)) %>% + rename(id = momid) + id_child <- rbind(dad_child, mom_child) + + # Number of direct child per individual + df$num_child_dir <- id_child$num_child_dir[match(df$id, id_child$id)] + + # Number of total childs per individual + spouse_child <- spouse_rel %>% + left_join(id_child, by = c("idmin" = "id")) %>% + left_join(id_child, by = c("idmax" = "id"), + suffix = c("_min", "_max") + ) + rel_child <- spouse_child %>% + rowwise() %>% + mutate(childs = list(unique(unlist( + list(child_min, child_max) + )))) %>% + select(c(idmin, idmax, childs)) %>% + tidyr::pivot_longer(cols = -childs, names_to = "order", + values_to = "id" + ) %>% + group_by(id) %>% + summarise(childs_all = list(unique(unlist(childs)))) %>% + mutate(num_child_tot = lengths(childs_all)) + + df$num_child_tot <- rel_child$num_child_tot[match(df$id, rel_child$id)] + + df <- df %>% + mutate(across(c(num_child_dir, num_child_tot), + ~replace(., is.na(.), 0) + )) + + df$num_child_ind <- df$num_child_tot - df$num_child_dir + + df + } else { + df$num_child_dir <- 0 + df$num_child_ind <- 0 + df$num_child_tot <- 0 + df + } }) -#' @export #' @rdname num_child -#' @aliases num_child,Pedigree -#' @docType methods #' @param reset If TRUE, the `num_child_tot`, `num_child_ind` and #' the `num_child_dir` columns are reset. +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' ped1 <- num_child(ped1, reset = TRUE) +#' summary(ped(ped1)) +#' @export setMethod("num_child", "Pedigree", function(obj, reset = FALSE) { - df <- num_child(obj$ped$id, obj$ped$dadid, obj$ped$momid, - rel_df = obj$rel + df <- num_child(id(ped(obj)), dadid(ped(obj)), momid(ped(obj)), + rel_df = as.data.frame(rel(obj)) ) if (!reset) { - check_columns(obj$ped, NULL, + check_columns(as.data.frame(ped(obj)), NULL, c("num_child_tot", "num_child_ind", "num_child_dir"), NULL, others_cols = TRUE ) } - obj$ped <- merge(obj$ped, - df[c("id", "num_child_tot", "num_child_ind", "num_child_dir")], - by = "id", sort = FALSE - ) + obj@ped@num_child_tot <- df$num_child_tot + obj@ped@num_child_ind <- df$num_child_ind + obj@ped@num_child_dir <- df$num_child_dir + validObject(obj) obj }) -TRUE diff --git a/R/ped_to_legdf.R b/R/ped_to_legdf.R index 0d084e12..4631218a 100644 --- a/R/ped_to_legdf.R +++ b/R/ped_to_legdf.R @@ -1,11 +1,25 @@ -#' Convert a Pedigree to a legend data frame of element to plot +#' Create plotting legend data frame from a Pedigree #' #' @description #' Convert a Pedigree to a legend data frame for it to -#' be plotted with afterwards with [plot_fromdf()]. +#' be plotted afterwards with [plot_fromdf()]. #' -#' @inheritParams align -#' @inheritParams set_plot_area +#' @details The data frame contains the following columns: +#' - `x0`, `y0`, `x1`, `y1`: coordinates of the elements +#' - `type`: type of the elements +#' - `fill`: fill color of the elements +#' - `border`: border color of the elements +#' - `angle`: angle of the shading of the elements +#' - `density`: density of the shading of the elements +#' - `cex`: size of the elements +#' - `label`: label of the elements +#' - `tips`: tips of the elements (used for the tooltips) +#' - `adjx`: horizontal text adjustment of the labels +#' - `adjy`: vertical text adjustment of the labels +#' +#' All those columns are used by [plot_fromdf()] to plot the graph. +#' @param obj A Pedigree object +#' @param cex Character expansion of the text #' @inheritParams plot_fromdf #' @param adjx default=0. Controls the horizontal text adjustment of #' the labels in the legend. @@ -19,11 +33,21 @@ #' data("sampleped") #' ped <- Pedigree(sampleped) #' leg_df <- ped_to_legdf(ped) -#' summary(leg_df$leg_df) -#' plot_fromdf(leg_df$leg_df) +#' summary(leg_df$df) +#' plot_fromdf(leg_df$df, usr = c(-1,15,0,7)) +#' @keywords internal, Pedigree-plot +#' @export +#' @usage NULL +setGeneric( + "ped_to_legdf", signature = "obj", + function(obj, ...) { + standardGeneric("ped_to_legdf") + } +) + +#' @rdname ped_to_legdf #' @export -#' @docType methods -ped_to_legdf <- function(ped, +setMethod("ped_to_legdf", "Pedigree", function(obj, boxh = 1, boxw = 1, cex = 1, adjx = 0, adjy = 0 ) { @@ -37,9 +61,9 @@ ped_to_legdf <- function(ped, adjy = numeric() ) sex_equiv <- c("Male", "Female", "Terminated", "Unknown") - all_lab <- list(sex_equiv, scales(ped)$border$labels) - all_aff <- lapply(unique(scales(ped)$fill$order), function(x) { - scales(ped)$fill$labels[scales(ped)$fill$order == x] + all_lab <- list(sex_equiv, border(obj)$labels) + all_aff <- lapply(unique(fill(obj)$order), function(x) { + fill(obj)$labels[fill(obj)$order == x] }) all_lab <- c(all_lab, all_aff) @@ -63,7 +87,7 @@ ped_to_legdf <- function(ped, posy <- cumsum(posy) - boxh posy <- posy[seq_along(posy) %% 2 == 0] - all_aff <- scales(ped)$fill + all_aff <- fill(obj) n_aff <- length(unique(all_aff$order)) lab_title <- c("Sex", "Border", unique(all_aff$column_values)) @@ -75,9 +99,11 @@ ped_to_legdf <- function(ped, ) plot_df <- rbind.fill(plot_df, titles) + ## Get ped_df + ped_df <- as.data.frame(ped(obj)) # Sex poly1 <- polygons(1) - all_sex <- unique(as.numeric(ped(ped)$sex)) + all_sex <- unique(as.numeric(ped_df$sex)) sex <- data.frame( x0 = posx[1], y0 = posy[all_sex], type = paste(names(poly1)[all_sex], 1, 1, sep = "_"), @@ -98,16 +124,15 @@ ped_to_legdf <- function(ped, plot_df <- rbind.fill(plot_df, sex, sex_label) # Border - bord_df <- scales(ped)$border - border_mods <- unique(ped(ped)[, unique(bord_df[["column"]])]) + border_mods <- unique(ped_df[, unique(border(obj)$column_mods)]) border <- data.frame( x0 = posx[3], y0 = posy[seq_along(border_mods)], type = rep("square_1_1", length(border_mods)), - border = bord_df$border[match(border_mods, bord_df$mods)], + border = border(obj)$border[match(border_mods, border(obj)$mods)], fill = "white", id = "border" ) - lab <- bord_df$labels[match(border_mods, bord_df$mods)] + lab <- border(obj)$labels[match(border_mods, border(obj)$mods)] lab[is.na(lab)] <- "NA" border_label <- data.frame( x0 = posx[4] + adjx, @@ -123,7 +148,7 @@ ped_to_legdf <- function(ped, ## Affected for (aff in seq_len(n_aff)) { aff_df <- all_aff[all_aff$order == aff, ] - aff_mods <- unique(ped(ped)[, unique(aff_df[["column_mods"]])]) + aff_mods <- unique(ped_df[, unique(aff_df[["column_mods"]])]) aff_bkg <- data.frame( x0 = posx[3 + aff * 2], y0 = posy[seq_along(aff_mods)], type = rep(paste("square", 1, 1, sep = "_"), @@ -173,5 +198,6 @@ ped_to_legdf <- function(ped, min(plot_df$x0), max(plot_df$x0), min(plot_df$y0), max(plot_df$y0) ) - list(leg_df = plot_df, par_usr = par_usr) + list(df = plot_df, par_usr = par_usr) } +) diff --git a/R/ped_to_plotdf.R b/R/ped_to_plotdf.R index 0f7ebd58..6da6e4ed 100644 --- a/R/ped_to_plotdf.R +++ b/R/ped_to_plotdf.R @@ -1,43 +1,82 @@ #' @importFrom plyr rbind.fill NULL -#' Convert a Pedigree to a data frame of element to plot +#' Create plotting data frame from a Pedigree +#' +#' @description +#' Convert a Pedigree to a data frame with all the elements and their +#' characteristic for them to be plotted afterwards with [plot_fromdf()]. +#' +#' @details The data frame contains the following columns: +#' - `x0`, `y0`, `x1`, `y1`: coordinates of the elements +#' - `type`: type of the elements +#' - `fill`: fill color of the elements +#' - `border`: border color of the elements +#' - `angle`: angle of the shading of the elements +#' - `density`: density of the shading of the elements +#' - `cex`: size of the elements +#' - `label`: label of the elements +#' - `tips`: tips of the elements (used for the tooltips) +#' - `adjx`: horizontal text adjustment of the labels +#' - `adjy`: vertical text adjustment of the labels +#' +#' All those columns are used by [plot_fromdf()] to plot the graph. #' #' @inheritParams align -#' @param pconnect when connecting parent to children the program will try to +#' @param pconnect When connecting parent to children the program will try to #' make the connecting line as close to vertical as possible, subject to it #' lying inside the endpoints of the line that connects the children by at #' least `pconnect` people. Setting this option to a large number will #' force the line to connect at the midpoint of the children. #' @param branch defines how much angle is used to connect various levels of #' nuclear families. -#' @param aff_mark if TRUE, add a aff_mark to each box corresponding to the +#' @param aff_mark If `TRUE`, add a aff_mark to each box corresponding to the #' value of the affection column for each filling scale. -#' @param label if not NULL, add a label to each box corresponding to the +#' @param label If not `NULL`, add a label to each box corresponding to the #' value of the column given. -#' @inheritParams set_plot_area +#' @param ... Other arguments passed to [par()] #' @inheritParams subregion +#' @inheritParams set_plot_area #' #' @return A list containing the data frame and the user coordinates. +#' #' @examples +#' #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == 1,]) -#' ped_to_plotdf(ped1) -#' @seealso [plot_fromdf()] +#' ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) +#' plot_df <- ped_to_plotdf(ped1) +#' summary(plot_df$df) +#' plot_fromdf(plot_df$df, usr = plot_df$par_usr$usr, +#' boxh = plot_df$par_usr$boxh, boxw = plot_df$par_usr$boxw +#' ) +#' +#' @seealso +#' [plot_fromdf()] #' [ped_to_legdf()] +#' @keywords internal, Pedigree-plot +#' @export +#' @usage NULL +setGeneric( + "ped_to_plotdf", signature = "obj", + function(obj, ...) { + standardGeneric("ped_to_plotdf") + } +) + +#' @rdname ped_to_plotdf #' @export -ped_to_plotdf <- function( - ped, packed = FALSE, width = 10, align = c(1.5, 2), - subreg = NULL, cex = 0.5, symbolsize = cex, pconnect = 0.5, branch = 0.6, +setMethod("ped_to_plotdf", "Pedigree", function( + obj, packed = TRUE, width = 6, align = c(1.5, 2), + subreg = NULL, cex = 1, symbolsize = cex, pconnect = 0.5, branch = 0.6, aff_mark = TRUE, label = NULL, ... ) { - famlist <- unique(ped(ped)$family) + famlist <- unique(famid(obj)) if (length(famlist) > 1) { nfam <- length(famlist) all_df <- vector("list", nfam) for (i_fam in famlist) { - ped_fam <- ped[ped(ped)$family == i_fam] + ped_fam <- obj[famid(obj) == i_fam] all_df[[i_fam]] <- ped_to_plotdf(ped_fam, packed, width, align, subreg, cex, symbolsize, ... ) @@ -53,7 +92,7 @@ ped_to_plotdf <- function( label = character(), tips = character(), adjx = numeric(), adjy = numeric() ) - plist <- align(ped, packed = packed, width = width, align = align) + plist <- align(obj, packed = packed, width = width, align = align) if (!is.null(subreg)) { plist <- subregion(plist, subreg) @@ -62,7 +101,7 @@ ped_to_plotdf <- function( maxlev <- nrow(plist$pos) params_plot <- set_plot_area( - cex, ped(ped)$id, maxlev, xrange, symbolsize, ... + cex, id(ped(obj)), maxlev, xrange, symbolsize, ... ) boxw <- params_plot$boxw @@ -71,7 +110,7 @@ ped_to_plotdf <- function( legh <- params_plot$legh ## Get all boxes to plot - # idx is the index of the boxes in the alignement + # idx is the index of the boxes in the alignment idx <- which(plist$nid > 0) # index value in the ped of each box id <- plist$nid @@ -80,20 +119,20 @@ ped_to_plotdf <- function( # y position i <- (seq_len(length(plist$nid)) - 1) %% length(plist$n) + 1 # sex of each box - sex <- as.numeric(ped(ped)$sex)[id[idx]] + sex <- as.numeric(sex(ped(obj)))[id[idx]] - all_aff <- scales(ped)$fill - bord_df <- scales(ped)$border - n_aff <- length(unique(all_aff$order)) + all_aff <- fill(obj) + n_aff <- length(unique(fill(obj)$order)) polylist <- polygons(max(1, n_aff)) + ped_df <- as.data.frame(ped(obj)) # border mods of each box - border_mods <- ped(ped)[id[idx], unique(bord_df[["column"]])] - border_idx <- match(border_mods, bord_df[["mods"]]) + border_mods <- ped_df[id[idx], unique(border(obj)$column_mods)] + border_idx <- match(border_mods, border(obj)$mods) for (aff in seq_len(n_aff)) { aff_df <- all_aff[all_aff$order == aff, ] - aff_mods <- ped(ped)[id[idx], unique(aff_df[["column_mods"]])] + aff_mods <- ped_df[id[idx], unique(aff_df[["column_mods"]])] aff_idx <- match(aff_mods, aff_df[["mods"]]) @@ -116,7 +155,7 @@ ped_to_plotdf <- function( fill = aff_df[aff_idx, "fill"], density = aff_df[aff_idx, "density"], angle = aff_df[aff_idx, "angle"], - border = bord_df[border_idx, "border"], + border = border(obj)$border[border_idx], id = "polygon" ) plot_df <- rbind.fill(plot_df, ind) @@ -124,7 +163,7 @@ ped_to_plotdf <- function( aff_mark_df <- data.frame( x0 = pos[idx] + poly_aff_x_mr[sex], y0 = i[idx] + boxh / 2, - label = ped(ped)[id[idx], unique(aff_df[["column_values"]])], + label = ped_df[id[idx], unique(aff_df[["column_values"]])], fill = "black", type = "text", cex = cex, id = "aff_mark" @@ -134,7 +173,7 @@ ped_to_plotdf <- function( } ## Add status - status <- ped(ped)[id[idx], "status"] + status <- ped_df[id[idx], "status"] idx_dead <- idx[status == 1 & !is.na(status)] if (length(idx_dead) > 0) { @@ -151,7 +190,7 @@ ped_to_plotdf <- function( ## Add ids id_df <- data.frame( x0 = pos[idx], y0 = i[idx] + boxh + labh * 1.2, - label = ped(ped)[id[idx], "id"], fill = "black", + label = ped_df[id[idx], "id"], fill = "black", type = "text", cex = cex, id = "id" ) @@ -160,10 +199,10 @@ ped_to_plotdf <- function( ## Add a label if given if (!is.null(label)) { - check_columns(ped(ped), label) + check_columns(ped_df, label) label <- data.frame( x0 = pos[idx], y0 = i[idx] + boxh + labh * 2.8, - label = ped(ped)[id[idx], label], + label = ped_df[id[idx], label], fill = "black", type = "text", cex = cex, id = "label" @@ -333,4 +372,4 @@ ped_to_plotdf <- function( } } list(df = plot_df, par_usr = params_plot) -} +}) diff --git a/R/pedigree.R b/R/pedigree.R deleted file mode 100644 index a062d56b..00000000 --- a/R/pedigree.R +++ /dev/null @@ -1,288 +0,0 @@ -#' Create a Pedigree object -#' -#' This constructor help to create a `Pedigree` object from -#' different `data.frame` or a set of vectors. -#' -#' If any errors are found in the data, the function will return -#' the data.frame with the errors for the Pedigree and the relationship -#' data.frame. -#' -#' @inheritParams align -#' @param obj A vector of the individuals identifiers or a data.frame -#' with the individuals informations. -#' The minimum columns required are `indID`, `fatherId`, `motherId` and -#' `gender`. -#' The `family` column can also be used to specify the family of the -#' individuals and will be merge to the `id` field separated by an -#' underscore. -#' The following columns are also recognize `sterilisation`, `available`, -#' `vitalStatus`, `affection`. The four of them will be transformed with the -#' [vect_to_binary()] function when the normalisation is selected and will -#' be set respectively to `steril`, `avail`, -#' `status` and `affected`. -#' If you do not use the normalisation, the columns will be checked to -#' be `0` or `1`. -#' They respectively correspond to the sterilisation status, -#' the availability status, the death status and the affection status -#' of the individuals. The values recognized for those columns are `1` or -#' `0`. -#' @param relation A matrix or a data.frame with 3 required columns -#' (i.e. id1, id2, code) specifying special relationship between pairs -#' of individuals. -#' #' The code values are: -#' - `1` = Monozygotic twin -#' - `2` = Dizygotic twin -#' - `3` = twin of unknown zygosity -#' - `4` = Spouse -#' -#' If `famid` is given in the call to create Pedigrees, then -#' `famid` needs to be in the last column of `relation`. -#' @param cols_ren_ped A named list with the columns to rename for the -#' pedigree dataframe. -#' @param cols_ren_rel A named list with the columns to rename for the -#' relationship matrix. -#' @param scales A list of two data.frame with the scales to use for the -#' affection status and the other one for the border color (e.g availability). -#' @param normalize A logical to know if the data should be normalised. -#' @param ... Other arguments to pass to the function `generate_colors`. -#' @return A Pedigree object. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' @export -setGeneric("Pedigree", signature = "obj", - function(obj, ...) standardGeneric("Pedigree") -) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,numeric -#' @docType methods -setMethod("Pedigree", "numeric", function(obj, ... -) { - Pedigree(as.character(obj), ...) -}) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,character -#' @docType methods -#' @inheritParams is_parent -#' @inheritParams sex_to_factor -#' @inheritParams family_check -#' @inheritParams is_informative -#' @param status A numeric vector of vital status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : alive -#' - `1` : dead -#' - `NA` : vital status not known -#' @param steril A numeric vector of sterilisation status of each individual -#' (e.g., genotyped). The values are: -#' - `0` : not sterilised -#' - `1` : sterilised -#' - `NA` : sterilisation status not known -setMethod("Pedigree", "character", function(obj, dadid, momid, - sex, family = NA, avail = NULL, affected = NULL, status = NULL, - steril = NULL, relation = NULL, - missid = "0", col_aff = "affection", normalize = TRUE, ... -) { - n <- length(obj) - ## Code transferred from noweb to markdown vignette. - ## Sections from the noweb/vignettes are noted here with - ## Doc: Error and Data Checks - ## Doc: Errors1 - if (length(momid) != n) stop("Mismatched lengths, id and momid") - if (length(dadid) != n) stop("Mismatched lengths, id and momid") - if (length(sex) != n) stop("Mismatched lengths, id and sex") - if (length(steril) != n & !is.null(steril)) { - stop("Mismatched lengths, id and steril") - } - - if (length(avail) != n & !is.null(avail)) { - stop("Mismatched lengths, id and avail") - } - if (length(status) != n & !is.null(status)) { - stop("Mismatched lengths, id and status") - } - - - ped_df <- data.frame( - family = family, - id = obj, - dadid = dadid, - momid = momid, - sex = sex - ) - if (any(!is.na(affected))) { - if (is.vector(affected)) { - ped_df$affection <- affected - } else { - ped_df <- cbind(ped_df, affected) - col_aff <- colnames(affected) - } - } - if (any(!is.na(avail))) { - ped_df$available <- avail - } - if (any(!is.na(status))) { - ped_df$vitalStatus <- status - } - if (any(!is.na(steril))) { - ped_df$sterilisation <- steril - } - if (is.null(relation)) { - relation <- data.frame( - id1 = character(), - id2 = character(), - code = numeric(), - family = character() - ) - } - Pedigree(ped_df, relation = relation, - missid = missid, col_aff = col_aff, ... - ) -}) - -#' @export -#' @rdname Pedigree -#' @aliases Pedigree,data.frame -#' @docType methods -setMethod("Pedigree", "data.frame", function( - obj = data.frame( - id = character(), - dadid = character(), - momid = character(), - sex = numeric(), - family = character(), - available = numeric(), - affection = numeric() - ), - relation = data.frame( - id1 = character(), - id2 = character(), - code = numeric(), - family = character() - ), - cols_ren_ped = list( - "indId" = "id", - "fatherId" = "dadid", - "motherId" = "momid", - "gender" = "sex", - "family" = "family", - "sterilisation" = "steril", - "vitalStatus" = "status", - "affection" = "affected" - ), - cols_ren_rel = list( - "indId1" = "id1", - "indId2" = "id2" - ), - scales = list( - fill = data.frame( - order = numeric(), - column_values = character(), - column_mods = character(), - mods = numeric(), - labels = character(), - affected = logical(), - fill = character(), - density = numeric(), - angle = numeric() - ), - border = data.frame( - column = character(), - mods = numeric(), - labels = character(), - border = character() - ) - ), - hints = list( - order = NULL, - spouse = NULL - ), - normalize = TRUE, - missid = "0", - col_aff = "affection", - ... -) { - ped_df <- obj - if (!is.data.frame(ped_df)) { - stop("ped_df must be a data.frame") - } - - if (is.matrix(relation)) { - rel_df <- data.frame( - id1 = relation[, 1], - id2 = relation[, 2], - code = relation[, 3] - ) - if (dim(relation)[2] > 3) { - rel_df$family <- relation[, 4] - } - } else if (is.data.frame(relation)) { - rel_df <- relation - } else { - stop("relation must be a matrix or a data.frame") - } - - ## Rename columns ped - old_cols <- as.vector(unlist(cols_ren_ped)) - new_cols <- names(cols_ren_ped) - cols_to_ren <- match(old_cols, names(ped_df)) - names(ped_df)[cols_to_ren[!is.na(cols_to_ren)]] <- - new_cols[!is.na(cols_to_ren)] - - ## Rename columns rel - old_cols <- as.vector(unlist(cols_ren_rel)) - new_cols <- names(cols_ren_rel) - cols_to_ren <- match(old_cols, names(rel_df)) - names(rel_df)[cols_to_ren[!is.na(cols_to_ren)]] <- - new_cols[!is.na(cols_to_ren)] - - ## Set family, id, dadid and momid to character - to_char <- c("family", "indId", "fatherId", "motherId") - to_char <- colnames(ped_df)[colnames(ped_df) %in% to_char] - ped_df[to_char] <- lapply(ped_df[to_char], as.character) - - ## Normalise the data before creating the object - if (normalize) { - ped_df <- norm_ped(ped_df, missid = missid) - rel_df <- norm_rel(rel_df, missid = missid) - } else { - cols_need <- c("id", "dadid", "momid", "sex") - cols_to_use <- c("steril", "avail", "family", "status", "affected") - ped_df <- check_columns( - ped_df, cols_need, "", cols_to_use, - others_cols = TRUE, cols_to_use_init = TRUE - ) - cols_need <- c("id1", "id2", "code") - cols_to_use <- c("family") - rel_df <- check_columns( - rel_df, cols_need, "", cols_to_use, cols_to_use_init = TRUE - ) - } - if (any(!is.na(ped_df$error))) { - warning("The Pedigree informations are not valid.", - "Here is the normalised Pedigree informations", - "with the identified problems" - ) - return(ped_df) - } - - if (any(!is.na(rel_df$error))) { - warning("The relationship informations are not valid.", - "Here is the normalised relationship informations", - "with the identified problems" - ) - return(rel_df) - } - - rownames(ped_df) <- ped_df$id - ## Create the object - ped <- new("Pedigree", ped = ped_df, rel = rel_df, - scales = scales, hints = hints - ) - - generate_colors(ped, col_aff = col_aff, ...) -} -) diff --git a/R/pedigreeClass.R b/R/pedigreeClass.R deleted file mode 100644 index d7a039d7..00000000 --- a/R/pedigreeClass.R +++ /dev/null @@ -1,377 +0,0 @@ -#' @importFrom methods as new slot slot<- validObject -NULL - -#' S4 class to represent a pedigree. -#' -#' A pedigree is a ensemble of individuals linked to each other into -#' a family tree. -#' -#' They are created from a data.frame containing the individuals informations -#' and a relation ship data.frame for the special links between individuals. -#' A list of scales can be provided to create a legend. -#' To create a Pedigree object, use the function -#' [Pedigree()]. -#' -#' @slot ped A data.frame with the individuals informations. The minimum -#' columns required are 'id', 'dadid', 'momid' and 'sex'. Other columns can be -#' added to the data.frame and will be recognised by the functions. Some -#' errors can be detected by the validity function and some of them can be -#' corrected and others will be added to a dedicated column. -#' @slot rel A data.frame for the special relationship between -#' individuals. -#' The minimum columns required are 'id1', 'id2' and 'code'. -#' @slot scales A data.frame to use for the affection status. -#' This data.frame is generated by the function -#' [generate_aff_inds()] followed by -#' [generate_colors()]. -#' @slot hints List of two elements. -#' - **order** is a numeric vector with one element per subject in the -#' Pedigree. It determines the relative order of subjects within a sibship, as -#' well as the relative order of processing for the founder couples. (For this -#' latter, the female founders are ordered as though they were sisters). -#' - **spouse** is a matrix with one row per hinted marriage, usually -#' only a few marriages in a Pedigree will need an added hint, for -#' instance reverse the plot order of a husband/wife pair. -#' Each row contains the index of the left spouse, the right hand spouse, -#' and the anchor (i.e : `1` = left, `2` = right, `0` = either). -#' -#' @return A Pedigree object. -#' @seealso [Pedigree()] -#' @docType class -#' @name Pedigree-class -#' @rdname Pedigree-class -#' @aliases Pedigree-class -#' @include validity.R -#' @include Pedigree.R -#' @export -setClass( - "Pedigree", - slots = c( - ped = "data.frame", - rel = "data.frame", - scales = "list", - hints = "list" - ) -) - -setValidity("Pedigree", is_valid) - -#### S4 Accessors #### - -#' @title Pedigree ped accessors -#' @param object A Pedigree object. -#' @return The slot `ped` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases ped,Pedigree-method -#' @export -setGeneric("ped", function(object){ - standardGeneric("ped") -}) - -setMethod("ped", signature(object = "Pedigree"), function(object) { - object@ped -}) - -setGeneric("ped<-", function(object, value) { - standardGeneric("ped<-") -}) - -setMethod("ped<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@ped <- value - validObject(object) - object -}) - -#' @description Pedigree rel accessors -#' @param object A Pedigree object. -#' @return The slot `rel` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases rel,Pedigree-method -#' @export -setGeneric("rel", function(object){ - standardGeneric("rel") -}) - -setMethod("rel", signature(object = "Pedigree"), function(object) { - object@rel -}) - -setGeneric("rel<-", function(object, value) { - standardGeneric("rel<-") -}) - -setMethod("rel<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@rel <- value - validObject(object) - object -}) - -#' @description Pedigree scales accessors -#' @param object A Pedigree object. -#' @return The slot `scales` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases scales,Pedigree-method -#' @export -setGeneric("scales", function(object){ - standardGeneric("scales") -}) - -setMethod("scales", signature(object = "Pedigree"), function(object) { - object@scales -}) - -setGeneric("scales<-", function(object, value) { - standardGeneric("scales<-") -}) - -setMethod("scales<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@scales <- value - validObject(object) - object -}) - -#' @description Pedigree hints accessors -#' @param object A Pedigree object. -#' @return The slot `hints` present in the Pedigree object. -#' @rdname extract-methods -#' @aliases hints,Pedigree-method -#' @export -setGeneric("hints", function(object){ - standardGeneric("hints") -}) - -setMethod("hints", signature(object = "Pedigree"), function(object) { - object@hints -}) - -setGeneric("hints<-", function(object, value) { - standardGeneric("hints<-") -}) - -setMethod("hints<-", signature(object = "Pedigree", value = "ANY"), function(object, value) { - object@hints <- value - validObject(object) - object -}) - - -#### S4 methods #### - -#' @title Pedigree methods -#' @description Pedigree show method -#' @param object A Pedigree object. -#' @return A character vector with the informations about the object. -#' @rdname extract-methods -#' @aliases show,Pedigree-method -setMethod("show", signature(object = "Pedigree"), function(object) { - nb_fam <- length(levels(as.factor(object@ped$family))) - cat("Pedigree object with", nrow(object@ped), "individuals and", - nrow(object@rel), "special relationships across", nb_fam, "families", - fill = TRUE) -}) - -#' @description Pedigree summary method. -#' @param object A Pedigree object. -#' @return A character vector with the summary of the object. -#' @rdname extract-methods -#' @aliases summary,Pedigree-method -setMethod("summary", signature(object = "Pedigree"), function(object) { - cat("Pedigree object with", nrow(object@ped), "individuals", fill = TRUE) - print(summary(object@ped, maxsum = 5)) - cat("and", nrow(object@rel), "special relationships.", fill = TRUE) - print(summary(object@rel)) - cat("The filling scales columns are:", - levels(as.factor(object@scales$fill$column_values)), fill = TRUE - ) - cat("The border scale column are:", - levels(as.factor(object@scales$border$column)), fill = TRUE - ) -}) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @param ... Other arguments. -#' @rdname extract-methods -#' @return The slot `i` present in the Pedigree object. -setMethod("[[", c(x = "Pedigree", i = "ANY", j = "missing"), - function(x, i, j, ..., drop = TRUE) { - slot(x, i) - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param name A slot name. -#' @rdname extract-methods -#' @return The slot `name` present in the Pedigree object. -#' -setMethod("$", c(x = "Pedigree"), - function(x, name) { - slot(x, name) - } -) - -#' @description Replace parts of a Pedigree object -#' @param x A Pedigree object. -#' @param name A slot name. -#' @param value A vector of values to replace. -#' @rdname extract-methods -#' @return The Pedigree object with the slot `name` replaced by `value`. -setMethod("$<-", c(x = "Pedigree"), - function(x, name, value) { - slot(x, name) <- value - validObject(x) - x - } -) - -#' @description Subset the hints list based on the index given -#' @param hints A list of hints -#' @param index A vector of index -#' @return A list of hints subsetted -#' @rdname extract-methods -#' @aliases sub_sel_hints,Pedigree-method -#' @keywords internal -sub_sel_hints <- function(hints, index) { - if (!is.null(hints$order)) { - temp <- list(order = hints$order[index]) - } else { - temp <- list(order = NULL) - } - - if (!is.null(hints$spouse)) { - indx1 <- match(hints$spouse[, 1], index, nomatch = 0) - indx2 <- match(hints$spouse[, 2], index, nomatch = 0) - keep <- (indx1 > 0 & indx2 > 0) # keep only if both id's are kept - if (any(keep)) { - temp$spouse <- cbind(indx1[keep], indx2[keep], - hints$spouse[keep, 3] - ) - } - } else { - temp$spouse <- NULL - } - temp -} - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @rdname extract-methods -#' @return A Pedigree object subsetted. -setMethod("[", c(x = "Pedigree", i = "ANY", j = "ANY"), - function(x, i, j, drop = TRUE) { - if (is.factor(i)) { - i <- as.character(i) - } - if (is.character(i)) { - i <- which(x$ped$id %in% i) - } - ped_df <- x$ped[i, j, drop = drop] - allId <- unique(c(ped_df$id, ped_df$dadid, ped_df$momid)) - rel_df <- x$rel[x$rel$id1 %in% allId | x$rel$id2 %in% allId, ] - idx <- match(allId, ped_df$id, nomatch = 0) - sub_hints <- sub_sel_hints(x$hints, idx) - fill <- x$scales$fill[x$scales$fill$column_values %in% names(ped_df),] - border <- x$scales$border[x$scales$border$column %in% names(ped_df),] - scales <- list(fill = fill, border = border) - - new_ped <- Pedigree(ped_df, rel_df, scales, - hints = sub_hints, cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @return A Pedigree object subsetted. -#' @rdname extract-methods -setMethod("[", c(x = "Pedigree", i = "missing", j = "ANY"), - function(x, i, j, drop = TRUE) { - ped_df <- x$ped[, j, drop = drop] - new_ped <- Pedigree(ped_df, x$rel, x$scales, - cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' @description Extract parts of a Pedigree object -#' @param x A Pedigree object. -#' @param i A vector of individuals id or a vector of index. -#' @param j A vector of columns names. -#' @param drop A logical value indicating if the dimensions should be dropped. -#' @return A Pedigree object subsetted. -#' @rdname extract-methods -setMethod("[", c(x = "Pedigree", i = "ANY", j = "missing"), - function(x, i, j, drop = TRUE) { - if (is.factor(i)) { - i <- as.character(i) - } - if (is.character(i)) { - i <- which(x$ped$id %in% i) - } - ped_df <- x$ped[i,] - allId <- unique(c(ped_df$id, ped_df$dadid, ped_df$momid)) - rel_df <- x$rel[x$rel$id1 %in% allId & x$rel$id2 %in% allId, ] - idx <- match(allId, ped_df$id, nomatch = 0) - sub_hints <- sub_sel_hints(x$hints, idx) - new_ped <- Pedigree(ped_df, rel_df, x$scales, - hints = sub_hints, cols_ren_ped = NULL, normalize = FALSE - ) - validObject(new_ped) - new_ped - } -) - -#' Convert a Pedigree object to a data.frame -#' @param x A Pedigree object. -#' @return A data.frame with the individuals informations. -#' @docType methods -#' @aliases as.data.frame,Pedigree-method -#' @export -setMethod("as.data.frame", c(x = "Pedigree"), - function(x) { - x$ped - } -) - -#' Convert a Pedigree object to a list -#' @param x A Pedigree object. -#' @return A list with all the slots of the Pedigree object. -#' @docType methods -#' @aliases as.list,Pedigree-method -#' @export -setMethod("as.list", c(x = "Pedigree"), - function(x) { - list( - ped = x$ped, - rel = x$rel, - scales = x$scales, - hints = x$hints - ) - } -) - -#' Compute the length of a Pedigree object -#' @param x A Pedigree object. -#' @return The number of individuals in the Pedigree object. -#' @docType methods -#' @aliases length,Pedigree-method -#' @export -setMethod("length", c(x = "Pedigree"), - function(x) { - nrow(x$ped) - } -) \ No newline at end of file diff --git a/R/plot.R b/R/plot.R index e6879791..3bc23e65 100644 --- a/R/plot.R +++ b/R/plot.R @@ -8,33 +8,43 @@ NULL #' Plot Pedigrees #' #' @description -#' plot objects created with the Pedigree function +#' This function is used to plot a Pedigree object. +#' +#' It is a wrapper for [plot_fromdf()] and [ped_to_plotdf()] as well as +#' [ped_to_legdf()] if `legend = TRUE`. #' #' @details #' Two important parameters control the looks of the result. One is the user #' specified maximum width. The smallest possible width is the maximum number #' of subjects on a line, if the user's suggestion is too low it is -#' increased to 1 + that amount (to give just a little wiggle room). To make a -#' Pedigree where all children are centered under parents simply make the width -#' large enough, however, the symbols may get very small. +#' increased to 1 + that amount (to give just a little wiggle room). +#' +#' To make a Pedigree where all children are centered under parents simply +#' make the width large enough, however, the symbols may get very small. #' #' The second is `align`, a vector of 2 alignment parameters `a` and #' `b`. #' For each set of siblings at a set of locations `x` and with parents at -#' `p=c(p1,p2)` the alignment penalty is \deqn{(1/k^a)\sum{i=1}{k} [(x_i - -#' (p1+p2)/2)]^2} sum(x- mean(p))^2/(k^a) where k is the number of siblings in -#' the set. +#' `p=c(p1,p2)` the alignment penalty is +#' +#' \deqn{(1/k^a)\sum{i=1}{k} [(x_i - (p1+p2)/2)]^2} +#' +#' \deqn{\sum(x- \overline(p))^2/(k^a)} +#' +#' Where k is the number of siblings in the set. +#' #' When `a = 1` moving a sibship with `k` sibs one unit to the #' left or right of optimal will incur the same cost as moving one with #' only 1 or two sibs out of place. -#' If `a=0` then large sibships are harder to move than small ones, +#' +#' If `a = 0` then large sibships are harder to move than small ones, #' with the default value `a = 1.5` they are slightly easier to move #' than small ones. The rationale for the default is as long as the parents #' are somewhere between the first and last siblings the result looks fairly -#' good, so we are more flexible with the spacing of a large family. By -#' tethering all the sibs to a single spot they are kept close to each other. +#' good, so we are more flexible with the spacing of a large family. +#' By tethering all the sibs to a single spot they are kept close to each other. #' The alignment penalty for spouses is \eqn{b(x_1 - x_2)^2}{b *(x1-x2)^2}, -#' which tends to keep them together. The size of `b` controls the relative +#' which tends to keep them together. The size of `b` controls the relative #' importance of sib-parent and spouse-spouse closeness. #' #' @param x A Pedigree object. @@ -46,7 +56,7 @@ NULL #' @param fam_to_plot default=1. If the Pedigree contains multiple families, #' this parameter can be used to select which family to plot. #' It can be a numeric value or a character value. If numeric, it is the -#' index of the family to plot returned by `unique(x$ped$family)`. +#' index of the family to plot returned by `unique(x$ped$famid)`. #' If character, it is the family id to plot. #' @param legend default=FALSE. If TRUE, a legend will be added to the plot. #' @param leg_cex default=0.8. Controls the size of the legend text. @@ -79,6 +89,7 @@ NULL #' @include plot_fromdf.R #' @aliases plot.Pedigree #' @aliases plot,Pedigree +#' @keywords Pedigree-plot #' @export #' @docType methods setMethod("plot", c(x = "Pedigree", y = "missing"), @@ -89,10 +100,7 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), legend = FALSE, leg_cex = 0.8, leg_symbolsize = 0.5, leg_loc = NULL, leg_adjx = 0, leg_adjy = 0, ... ) { - lst <- ped_to_plotdf(x, packed, width, align, subreg, - cex, symbolsize, pconnect, branch, aff_mark, label, ... - ) - famlist <- unique(x$ped$family) + famlist <- unique(famid(x)) if (length(famlist) > 1) { message("Multiple families present, only plotting family ", fam_to_plot @@ -100,9 +108,13 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), if (is.numeric(fam_to_plot)) { fam_to_plot <- famlist[fam_to_plot] } - lst <- lst[[fam_to_plot]] + x <- x[famid(x) == fam_to_plot] } + lst <- ped_to_plotdf(x, packed, width, align, subreg, + cex, symbolsize, pconnect, branch, aff_mark, label, ... + ) + p <- plot_fromdf(lst$df, usr = lst$par_usr$usr, title = title, ggplot_gen = ggplot_gen, boxw = lst$par_usr$boxw, boxh = lst$par_usr$boxh @@ -121,14 +133,14 @@ setMethod("plot", c(x = "Pedigree", y = "missing"), wh_fr[3] + 0.1, wh_fr[3] + 0.4 ) } - leg$leg_df$x0 <- scales::rescale(leg$leg_df$x0, + leg$df$x0 <- scales::rescale(leg$df$x0, c(leg_loc[1], leg_loc[2]) ) - leg$leg_df$y0 <- scales::rescale(leg$leg_df$y0, + leg$df$y0 <- scales::rescale(leg$df$y0, c(leg_loc[3], leg_loc[4]) ) clip(leg_loc[1] - 1, leg_loc[2] + 1, leg_loc[3] - 1, leg_loc[4] + 1) - plot_fromdf(leg$leg_df, add_to_existing = TRUE, + plot_fromdf(leg$df, add_to_existing = TRUE, boxw = lst$par_usr$boxw * leg_symbolsize, boxh = lst$par_usr$boxh * leg_symbolsize ) diff --git a/R/plot_fct.R b/R/plot_fct.R index 547ec5ac..96729369 100644 --- a/R/plot_fct.R +++ b/R/plot_fct.R @@ -2,16 +2,16 @@ #' @importFrom graphics polygon frame segments NULL -#' Routine to subset a Pedigree +#' Subset a region of a Pedigree #' -#' @param subreg 4-element vector for (min x, max x, min depth, max depth), +#' @param subreg A 4-element vector for (min x, max x, min depth, max depth), #' used to edit away portions of the plot coordinates returned by #' [align()]. #' This is useful for zooming in on a particular region of the Pedigree. #' @inheritParams findspouse #' -#' @return a Pedigree -#' @keywords internal +#' @return A Pedigree structure with the specified region +#' @keywords internal, Pedigree-plot subregion <- function(plist, subreg) { if (subreg[3] < 1 || subreg[4] > length(plist$n)) { stop("Invalid depth indices in subreg") @@ -55,7 +55,9 @@ subregion <- function(plist, subreg) { } n <- max(n2) - out <- list(n = n2[seq_len(n)], nid = nid2[, seq_len(n), drop = FALSE], + out <- list( + n = n2[seq_len(n)], + nid = nid2[, seq_len(n), drop = FALSE], pos = pos2[, seq_len(n), drop = FALSE], spouse = spouse2[, seq_len(n), drop = FALSE], fam = fam2[, seq_len(n), drop = FALSE] @@ -66,14 +68,23 @@ subregion <- function(plist, subreg) { out } # end subregion() -## Plotting function -#' Generate a circular element + +#' Circular element +#' +#' Create a list of x and y coordinates for a circle +#' with a given number of slices. #' -#' @param nslice number of slices in the circle +#' @param nslice Number of slices in the circle #' @param n Total number of points in the circle #' -#' @return a list of x and y coordinates -#' @keywords internal +#' @return A list of x and y coordinates per slice. +#' @keywords internal, Pedigree-plot +#' @examples +#' +#' circfun(1) +#' circfun(1, 10) +#' circfun(4, 50) +#' @export circfun <- function(nslice, n = 50) { nseg <- ceiling(n / nslice) # segments of arc per slice @@ -89,15 +100,25 @@ circfun <- function(nslice, n = 50) { } -## Doc: polyfun -#' Generate a polygonal element +#' Polygonal element #' -#' @param nslice number of slices in the polygon +#' Create a list of x and y coordinates for a polygon +#' with a given number of slices and a list of coordinates +#' for the polygon. +#' +#' @param nslice Number of slices in the polygon #' @param coor Element form which to generate the polygon #' containing x and y coordinates and theta #' #' @return a list of x and y coordinates -#' @keywords internal +#' @keywords internal, Pedigree-plot +#' @examples +#' polyfun(2, list( +#' x = c(-0.5, -0.5, 0.5, 0.5), +#' y = c(-0.5, 0.5, 0.5, -0.5), +#' theta = -c(3, 5, 7, 9) * pi / 4 +#' )) +#' @export polyfun <- function(nslice, coor) { # make the indirect segments view zmat <- matrix(0, ncol = 4, nrow = length(coor$x)) @@ -140,13 +161,22 @@ polyfun <- function(nslice, coor) { out } -#' Create a list of the different polygonal elements +#' List of polygonal elements +#' +#' Create a list of polygonal elements with x, y coordinates +#' and theta for the square, circle, diamond and triangle. +#' The number of slices in each element can be specified. #' -#' @param nslice number of slices in each element +#' @param nslice Number of slices in each element +#' If nslice > 1, the elements are created with [polyfun()]. #' #' @return a list of polygonal elements with x, y coordinates -#' and theta -#' @keywords internal +#' and theta by slice. +#' @keywords internal, Pedigree-plot +#' @examples +#' polygons() +#' polygons(4) +#' @export polygons <- function(nslice = 1) { if (nslice == 1) { polylist <- list( @@ -195,20 +225,21 @@ polygons <- function(nslice = 1) { #'@importFrom ggplot2 geom_polygon aes annotate NULL -#' Draw segments for a Pedigree +#' Draw segments #' #' @param x0 x coordinate of the first point #' @param y0 y coordinate of the first point #' @param x1 x coordinate of the second point #' @param y1 y coordinate of the second point #' @param p ggplot object -#' @param ggplot_gen logical, if TRUE add the segments to the ggplot object -#' @param col line color -#' @param lwd line width -#' @param lty line type +#' @param ggplot_gen If TRUE add the segments to the ggplot object +#' @param col Line color +#' @param lwd Line width +#' @param lty Line type #' -#' @return Plot the segments or add it to a ggplot object -#' @keywords internal +#' @return Plot the segments to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_segment <- function( x0, y0, x1, y1, p, ggplot_gen, @@ -223,62 +254,71 @@ draw_segment <- function( p } -#' Draw a polygon for a Pedigree +#' Draw a polygon #' #' @param x x coordinates #' @param y y coordinates -#' @param fill fill color -#' @param border border color -#' @param density density of shading -#' @param angle angle of shading +#' @param fill Fill color +#' @param border Border color +#' @param density Density of shading +#' @param angle Angle of shading #' @inheritParams draw_segment #' -#' @return Plot the polygon or add it to a ggplot object -#' @keywords internal +#' @return Plot the polygon to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_polygon <- function( x, y, p, ggplot_gen = FALSE, fill = "grey", border = NULL, density = NULL, angle = 45 ) { - polygon(x, y, col = fill, border = border, density = density, angle = angle) + polygon( + x, y, col = fill, border = border, + density = density, angle = angle + ) if (ggplot_gen) { - p <- p + geom_polygon(aes(x = x, y = y), fill = fill, color = border) + p <- p + + geom_polygon( + aes(x = x, y = y), fill = fill, color = border + ) # To add pattern stripes use ggpattern::geom_polygon_pattern # pattern_density = density[i], pattern_angle = angle[i])) } p } -#' Draw text for a Pedigree +#' Draw texts #' -#' @param label text to be displayed -#' @param cex character expansion of the text -#' @param col text color +#' @param label Text to be displayed +#' @param cex Character expansion of the text +#' @param col Text color #' @param adjx x adjustment #' @param adjy y adjustment #' @inheritParams draw_segment #' @inheritParams draw_polygon #' -#' @return Plot the text or add it to a ggplot object -#' @keywords internal +#' @return Plot the text to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_text <- function(x, y, label, p, ggplot_gen = FALSE, cex = 1, col = NULL, adjx = 0, adjy = 0 ) { text(x, y, label, cex = cex, col = col, adj = c(adjx, adjy)) if (ggplot_gen) { p <- p + annotate( - "text", x = x, y = y, label = label, size = cex / 0.3, color = col + "text", x = x, y = y, label = label, + size = cex / 0.3, color = col ) } p } -## Doc: 4 arcs for multiple instances of subj -#' Draw arcs for multiple instances of a subject +#' Draw arcs #' #' @inheritParams draw_segment #' -#' @return Plot the arcs or add it to a ggplot object -#' @keywords internal +#' @return Plot the arcs to the current device +#' or add it to a ggplot object +#' @keywords internal, Pedigree-plot draw_arc <- function(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black" ) { @@ -293,16 +333,17 @@ draw_arc <- function(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, #' Set plotting area #' -#' @param cex character expansion of the text -#' @param maxlev maximum level -#' @param xrange range of x values -#' @param symbolsize size of the symbols -#' @param ... other arguments passed to [par()] -#' @inheritParams is_parent +#' @param id A character vector with the identifiers of each individuals +#' @param cex Character expansion of the text +#' @param maxlev Maximum level +#' @param xrange Range of x values +#' @param symbolsize Size of the symbols +#' @param ... Other arguments passed to [par()] #' -#' @return a list of user coordinates, old par, box width, box height, +#' @return List of user coordinates, old par, box width, box height, #' label height and leg height -#' @keywords internal +#' +#' @keywords internal, Pedigree-plot set_plot_area <- function(cex, id, maxlev, xrange, symbolsize, ...) { old_par <- par(xpd = TRUE, ...) ## took out mar=mar psize <- par("pin") # plot region in inches diff --git a/R/plot_fromdf.R b/R/plot_fromdf.R index a72fa416..3774d7c9 100644 --- a/R/plot_fromdf.R +++ b/R/plot_fromdf.R @@ -6,9 +6,10 @@ NULL #' Create a plot from a data.frame #' #' @description -#' This function is used to create a plot from a data.frame. If ggplot_gen is -#' set to TRUE, the plot will be generated with ggplot2 and will be returned -#' invisibly. +#' This function is used to create a plot from a data.frame. +#' +#' If `ggplot_gen = TRUE`, the plot will be generated with ggplot2 and +#' will be returned invisibly. #' #' @param df A data.frame with the following columns: #' - `type`: The type of element to plot. Can be `text`, @@ -36,17 +37,22 @@ NULL #' @param title The title of the plot. #' @param add_to_existing If `TRUE`, the plot will be added to the current #' plot. -#' @param boxh Height of the legend boxes -#' @param boxw Width of the legend boxes +#' @param boxh Height of the polygons elements +#' @param boxw Width of the polygons elements #' @inheritParams draw_segment #' @inheritParams ped_to_plotdf #' @include plot_fct.R +#' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == 1,]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == 1,]) #' lst <- ped_to_plotdf(ped1) -#' #plot_fromdf(lst$df, lst$usr) +#' #plot_fromdf(lst$df, lst$par_usr$usr, +#' # boxw = lst$par_usr$boxw, boxh = lst$par_usr$boxh +#' #) +#' #' @return an invisible ggplot object and a plot on the current plotting device +#' @keywords internal, Pedigree-plot #' @export plot_fromdf <- function( df, usr = NULL, title = NULL, ggplot_gen = FALSE, boxw = 1, diff --git a/R/shrink.R b/R/shrink.R index 86aadeae..432b4b29 100644 --- a/R/shrink.R +++ b/R/shrink.R @@ -6,6 +6,9 @@ #' Pedigree condensed to a minimally informative size for algorithms or testing #' that are limited by size of the Pedigree. #' +#' If **avail** or **affected** are `NULL`, they are extracted with their +#' corresponding accessors from the Ped object. +#' #' @details #' Iteratively remove subjects from the Pedigree. The random removal of members #' was previously controlled by a seed argument, but we remove this, forcing @@ -14,11 +17,13 @@ #' Next, available terminal subjects with unknown phenotype if both parents #' available. Last, iteratively shrinks Pedigrees by preferentially removing #' individuals (chosen at random if there are multiple of the same status): +#' #' 1. Subjects with unknown affected status #' 2. Subjects with unaffected affected status #' 3. Affected subjects. #' -#' @inheritParams align +#' @param obj A Pedigree or Ped object. +#' @inheritParams Ped #' @inheritParams is_informative #' @param max_bits Optional, the bit size for which to shrink the Pedigree #' @@ -34,119 +39,150 @@ #' #' @examples #' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == '1',]) +#' ped1 <- Pedigree(sampleped[sampleped$famid == '1',]) #' shrink(ped1, max_bits = 12) #' -#' @author Original by Dan Schaid, updated by Jason Sinnwell +#' @author Original by Dan Schaid, +#' updated by Jason Sinnwell and Louis Le Nézet +#' @keywords shrink #' @seealso [Pedigree()], [bit_size()] #' @export -shrink <- function( - ped, avail = ped(ped)$avail, affected = ped(ped)$affected, max_bits = 16 -) { - if (any(is.na(avail))) { - stop("NA values not allowed in avail vector.") +#' @usage NULL +setGeneric("shrink", signature = "obj", + function(obj, ...) standardGeneric("shrink") +) + +#' @rdname shrink +#' @export +setMethod("shrink", "Pedigree", + function(obj, avail = NULL, affected = NULL, max_bits = 16) { + lst_trim <- shrink(ped(obj), + avail = avail, + affected = affected, + max_bits = max_bits + ) + all_ids <- id(lst_trim$pedObj) + lst_trim$pedObj <- subset( + obj, id(ped(obj)) %in% all_ids, del_parents = TRUE + ) + lst_trim } +) - id_trim <- numeric() - id_lst <- list() - n_origin <- length(ped(ped)$id) +#' @rdname shrink +#' @export +setMethod("shrink", "Ped", + function(obj, avail = NULL, affected = NULL, max_bits = 16) { + if (is.null(avail)) { + avail <- avail(obj) + } + if (is.null(affected)) { + affected <- affected(obj) + } + if (any(is.na(avail))) { + stop("NA values not allowed in avail vector.") + } - bitsize_old <- bit_size(ped)$bit_size + id_trim <- numeric() + id_lst <- list() + n_origin <- length(obj) - ## first find unavailable subjects to remove anyone who is not available - ## and does not have an available descendant + bitsize_old <- bit_size(obj)$bit_size - id_trim_unav <- find_unavailable(ped, avail) + ## first find unavailable subjects to remove anyone who is not available + ## and does not have an available descendant + id_trim_unav <- find_unavailable(obj, avail) - if (length(id_trim_unav)) { - ped_trim <- trim(ped, id_trim_unav) - avail <- avail[match(ped_trim$ped$id, ped(ped)$id)] - id_trim <- c(id_trim, id_trim_unav) - id_lst$unavail <- id_trim_unav + if (length(id_trim_unav)) { + ped_trim <- subset( + obj, id_trim_unav, keep = FALSE, del_parents = TRUE + ) + avail <- avail[match(id(ped_trim), id(obj))] + id_trim <- c(id_trim, id_trim_unav) + id_lst$unavail <- id_trim_unav - } else { - ## no trimming, reset to original ped - ped_trim <- ped - } + } else { + ## no trimming, reset to original ped + ped_trim <- obj + } - ## Next trim any available terminal subjects with unknown phenotype but - ## only if both parents are available + ## Next trim any available terminal subjects with unknown phenotype but + ## only if both parents are available - ## added n_new>0 check because no need to trim anymore if empty ped + ## added n_new>0 check because no need to trim anymore if empty ped - n_chg <- 1 - id_lst$noninform <- NULL - n_new <- length(ped_trim$ped$id) + n_chg <- 1 + id_lst$noninform <- NULL + n_new <- length(ped_trim) - while (n_chg > 0 && n_new > 0) { - n_old <- length(ped_trim$ped$id) + while (n_chg > 0 && n_new > 0) { + n_old <- length(ped_trim) - ## find_avail_noninform finds non-informative, - ## but after suggesting their removal, - ## checks for more unavailable subjects before returning - id_trim_noninf <- find_avail_noninform(ped_trim, avail) + ## find_avail_noninform finds non-informative, + ## but after suggesting their removal, + ## checks for more unavailable subjects before returning + id_trim_noninf <- find_avail_noninform(ped_trim, avail) - if (length(id_trim_noninf)) { - ped_new <- trim(ped_trim, id_trim_noninf) - avail <- avail[match(ped_new$ped$id, ped_trim$ped$id)] - id_trim <- c(id_trim, id_trim_noninf) - id_lst$noninform <- c(id_lst$noninform, id_trim_noninf) - ped_trim <- ped_new + if (length(id_trim_noninf)) { + ped_new <- subset(ped_trim, id_trim_noninf, keep = FALSE) + avail <- avail[match(id(ped_new), id(ped_trim))] + id_trim <- c(id_trim, id_trim_noninf) + id_lst$noninform <- c(id_lst$noninform, id_trim_noninf) + ped_trim <- ped_new + } + n_new <- length(ped_trim) + n_chg <- n_old - n_new } - n_new <- length(ped_trim$ped$id) - n_chg <- n_old - n_new - } - - ## Determine number of subjects & bit_size after initial trimming - n_inter <- length(ped_trim$ped$id) - - bit_size <- bit_size(ped_trim)$bit_size - ## Now sequentially shrink to fit bit_size <= max_bits + ## Determine number of subjects & bit_size after initial trimming + n_inter <- length(ped_trim) - bitsize_vec <- c(bitsize_old, bit_size) + bit_size <- bit_size(ped_trim)$bit_size - is_trim <- TRUE - id_lst$affect <- NULL + ## Now sequentially shrink to fit bit_size <= max_bits - while (is_trim && (bit_size > max_bits)) { - ## First, try trimming by unknown status - save <- find_avail_affected(ped_trim, avail, affstatus = NA) - is_trim <- save$is_trim - - ## Second, try trimming by unaffected status if no unknowns to trim - if (!is_trim) { - save <- find_avail_affected(ped_trim, avail, affstatus = 0) - is_trim <- save$is_trim - } + bitsize_vec <- c(bitsize_old, bit_size) + is_trim <- TRUE + id_lst$affect <- NULL - ## Third, try trimming by affected status if no unknowns & no - ## unaffecteds to trim - if (!is_trim) { - save <- find_avail_affected(ped_trim, avail, affstatus = 1) + while (is_trim && (bit_size > max_bits)) { + ## First, try trimming by unknown status + save <- find_avail_affected(ped_trim, avail, affstatus = NA) is_trim <- save$is_trim - } - if (is_trim) { - ped_trim <- save$ped - avail <- save$new_avail - bit_size <- save$bit_size - bitsize_vec <- c(bitsize_vec, bit_size) - id_trim <- c(id_trim, save$id_trim) - id_lst$affect <- c(id_lst$affect, save$id_trim) + ## Second, try trimming by unaffected status if no unknowns to trim + if (!is_trim) { + save <- find_avail_affected(ped_trim, avail, affstatus = 0) + is_trim <- save$is_trim + } + + ## Third, try trimming by affected status if no unknowns & no + ## unaffecteds to trim + if (!is_trim) { + save <- find_avail_affected(ped_trim, avail, affstatus = 1) + is_trim <- save$is_trim + } + + if (is_trim) { + ped_trim <- save$ped + avail <- save$new_avail + bit_size <- save$bit_size + bitsize_vec <- c(bitsize_vec, bit_size) + id_trim <- c(id_trim, save$id_trim) + id_lst$affect <- c(id_lst$affect, save$id_trim) + } } - } - ## end while (is_trim) & (bit_size > max_bits) + ## end while (is_trim) & (bit_size > max_bits) - n_final <- length(ped_trim$ped$id) + n_final <- length(ped_trim) - obj <- list(pedObj = ped_trim, id_trim = id_trim, id_lst = id_lst, - bit_size = bitsize_vec, avail = avail, pedSizeOriginal = n_origin, - pedSizeIntermed = n_inter, pedSizeFinal = n_final - ) + obj <- list( + pedObj = ped_trim, id_trim = id_trim, id_lst = id_lst, + bit_size = bitsize_vec, avail = avail, pedSizeOriginal = n_origin, + pedSizeIntermed = n_inter, pedSizeFinal = n_final + ) - obj -} - -TRUE \ No newline at end of file + obj + } +) diff --git a/R/trim.R b/R/trim.R deleted file mode 100644 index 7f62322e..00000000 --- a/R/trim.R +++ /dev/null @@ -1,30 +0,0 @@ -# Automatically generated from all.nw using noweb TODO add documentation -#' Trim a Pedigree -#' -#' Carries out the removal of the subjects identified from a Pedigree object. -#' -#' @inheritParams align -#' @inheritParams is_parent -#' @param id_rm Vector of ids to remove -#' -#' @return A Pedigree object with the subjects removed -#' -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' trim(ped1, "1_101") -#' @export -trim <- function(ped, id_rm, missid = "0") { - ## trim subjects from a Pedigree who match the removeID trim relation - ## matrix as well - rmidx <- match(id_rm, ped(ped)$id) - if (length(rmidx) > 0) { - ped(ped)[ped(ped)$dadid %in% id_rm | - ped(ped)$momid %in% id_rm, - c("dadid", "momid")] <- missid - ped[-rmidx, ] - } else { - ped - } -} -TRUE diff --git a/R/unrelated.R b/R/unrelated.R index c6e315d4..e631bf57 100644 --- a/R/unrelated.R +++ b/R/unrelated.R @@ -1,10 +1,7 @@ #' @importFrom stats runif NULL -# Automatically generated from all.nw using noweb Authors: Dan Schaid, Shannon -# McDonnell Updated by Jason Sinnwell - -#' Get unrelated subjects +#' Find Unrelated subjects #' #' @description #' Determine set of maximum number of unrelated available subjects from a @@ -13,7 +10,8 @@ NULL #' @details #' Determine set of maximum number of unrelated available subjects from a #' Pedigree, given vectors id, father, and mother for a Pedigree structure, and -#' status vector of T/F for whether each subject is available (e.g. has DNA) +#' status vector of `TRUE` / `FALSE` for whether each subject is +#' available (e.g. has DNA). #' #' This is a greedy algorithm that uses the kinship matrix, sequentially #' removing rows/cols that are non-zero for subjects that have the most number @@ -23,79 +21,101 @@ NULL #' zeros for rows, a random choice is made. Hence, running this function #' multiple times can return different sets of unrelated subjects. #' -#' @inheritParams align -#' @inheritParams is_informative +#' If **avail** is `NULL`, it is extracted with its +#' corresponding accessor from the Ped object. +#' +#' @inheritParams shrink #' #' @return A vector of the ids of subjects that are unrelated. #' @examples -#' data(sampleped) -#' fam1 <- sampleped[sampleped$family == 1, ] -#' #' +#' data(sampleped) +#' fam1 <- sampleped[sampleped$famid == 1, ] #' ped1 <- Pedigree(fam1) -#' -#' ## to see plot: -#' id1 <- unrelated(ped1) -#' -#' id1 +#' unrelated(ped1) #' ## some possible vectors #' ## [1] '110' '113' '133' '109' #' ## [1] '113' '118' '141' '109' #' ## [1] '113' '118' '140' '109' #' ## [1] '110' '113' '116' '109' #' ## [1] '113' '133' '141' '109' +#' +#' @author Dan Schaid and Shannon McDonnell updated by Jason Sinnwell +#' @export +#' @usage NULL +setGeneric("unrelated", signature = "obj", + function(obj, ...) standardGeneric("unrelated") +) + +#' @rdname unrelated #' @export -unrelated <- function(ped, avail = ped(ped)$avail) { - # Requires: kinship function +setMethod("unrelated", "Ped", + function(obj, avail = NULL) { + if (is.null(avail)) { + avail <- avail(obj) + } + # Requires: kinship function - # Given vectors id, father, and mother for a Pedigree structure, and avail - # = vector of T/F or 1/0 for whether each subject (corresponding to id - # vector) is available (e.g., has DNA available), determine set of maximum - # number of unrelated available subjects from a Pedigree. + # Given vectors id, father, and mother for a Pedigree structure, + # and avail = vector of T/F or 1/0 for whether each subject + # (corresponding to id vector) is available + # (e.g., has DNA available), determine set of maximum + # number of unrelated available subjects from a Pedigree. - # This is a greedy algorithm that uses the kinship matrix, sequentially - # removing rows/cols that are non-zero for subjects that have the most - # number of zero kinship coefficients (greedy by choosing a row of kinship - # matrix that has the most number of zeros, and then remove any cols and - # their corresponding rows that are non-zero. To account for ties of the - # count of zeros for rows, a random choice is made. Hence, running this - # function multiple times can return different sets of unrelated subjects. + # This is a greedy algorithm that uses the kinship matrix, sequentially + # removing rows/cols that are non-zero for subjects that have the most + # number of zero kinship coefficients + # (greedy by choosing a row of kinship matrix that has the most number + # of zeros, and then remove any cols and their corresponding rows that + # are non-zero). + # To account for ties of the count of zeros for rows, a random choice + # is made. + # Hence, running this function multiple times can return different + # sets of unrelated subjects. - id <- ped(ped)$id + id <- id(obj) - kin <- kinship(ped) + kin <- kinship(obj) - ord <- order(id) - id <- id[ord] - avail <- as.logical(avail[ord]) - kin <- kin[ord, ][, ord] + ord <- order(id) + id <- id[ord] + avail <- as.logical(avail[ord]) + kin <- kin[ord, ][, ord] - rord <- order(runif(nrow(kin))) + rord <- order(runif(nrow(kin))) - id <- id[rord] - avail <- avail[rord] - kin <- kin[rord, ][, rord] + id <- id[rord] + avail <- avail[rord] + kin <- kin[rord, ][, rord] - kin_avail <- kin[avail, , drop = FALSE][, avail, drop = FALSE] + kin_avail <- kin[avail, , drop = FALSE][, avail, drop = FALSE] - diag(kin_avail) <- 0 + diag(kin_avail) <- 0 - while (any(kin_avail > 0)) { - nr <- nrow(kin_avail) - indx <- seq_len(nrow(kin_avail)) - zero_count <- apply(kin_avail == 0, 1, sum) + while (any(kin_avail > 0)) { + nr <- nrow(kin_avail) + indx <- seq_len(nrow(kin_avail)) + zero_count <- apply(kin_avail == 0, 1, sum) - mx <- max(zero_count[zero_count < nr]) - zero_mx <- indx[zero_count == mx][1] + mx <- max(zero_count[zero_count < nr]) + zero_mx <- indx[zero_count == mx][1] - exclude <- indx[kin_avail[, zero_mx] > 0] + exclude <- indx[kin_avail[, zero_mx] > 0] - kin_avail <- kin_avail[-exclude, , drop = FALSE][ - , -exclude, drop = FALSE - ] + kin_avail <- kin_avail[-exclude, , drop = FALSE][ + , -exclude, drop = FALSE + ] + } + + sort(dimnames(kin_avail)[[1]]) } +) - sort(dimnames(kin_avail)[[1]]) -} -TRUE +#' @rdname unrelated +#' @export +setMethod("unrelated", "Pedigree", + function(obj, avail = NULL) { + unrelated(ped(obj), avail = avail) + } +) diff --git a/R/useful_inds.R b/R/useful_inds.R index aea76f38..948bcc25 100644 --- a/R/useful_inds.R +++ b/R/useful_inds.R @@ -1,65 +1,58 @@ -#' Compute the usefulness of individuals +#' Usefulness of individuals #' -#' @description Check for usefulness of individuals +#' @description Compute the usefulness of individuals #' #' @details Check for the informativeness of the individuals based on the #' informative parameter given, the number of children and the usefulness -#' of their parents. A `useful` column is added to the dataframe with the +#' of their parents. A `useful` slot is added to the Ped object with the #' usefulness of the individual. This boolean is hereditary. #' #' @param num_child_tot A numeric vector of the number of children of each #' individuals #' @param keep_infos Boolean to indicate if individuals with unknown status #' but available or reverse should be kept +#' @inheritParams Ped #' @inheritParams is_informative -#' @inheritParams num_child -#' @inheritParams kinship -#' @inheritParams is_parent #' #' @return #' ## When obj is a vector #' A vector of useful individuals identifiers #' -#' ## When obj is a Pedigree -#' The Pedigree object with a new column named 'useful' containing 1 for -#' useful individuals and 0 otherwise. -#' @examples -#' data(sampleped) -#' ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -#' ped1 <- num_child(ped1) -#' useful_inds(ped1, informative = "AvAf")$ped +#' ## When obj is a Pedigree or Ped object +#' The Pedigree or Ped object with the slot 'useful' containing `TRUE` for +#' useful individuals and `FALSE` otherwise. +#' @keywords shrink #' @export +#' @usage NULL setGeneric("useful_inds", signature = "obj", function(obj, ...) standardGeneric("useful_inds") ) #' @include is_informative.R -#' @export #' @rdname useful_inds -#' @docType methods -#' @aliases useful_inds,character +#' @export setMethod("useful_inds", "character", function(obj, dadid, momid, avail, affected, num_child_tot, - informative = "AvAf", keep_infos = FALSE, missid = "0" + informative = "AvAf", keep_infos = FALSE ) { id <- obj # Get informative individuals id_inf <- is_informative(id, avail, affected, - informative, missid + informative ) - is_inf <- id %in% id_inf + isinf <- id %in% id_inf # Keep individual affected or available if (keep_infos) { - is_inf <- is_inf | + isinf <- isinf | (!is.na(affected) & affected == 1) | (!is.na(avail) & avail == 1) } # Check if parents participate to the Pedigree structure ped_part <- num_child_tot > 1 - to_kept <- is_inf | ped_part + to_kept <- isinf | ped_part num_ind_old <- 0 num_ind_new <- length(id[to_kept]) @@ -80,26 +73,45 @@ setMethod("useful_inds", "character", } ) -#' @docType methods -#' @aliases useful_inds,Pedigree -#' @export #' @rdname useful_inds #' @param reset Boolean to indicate if the `useful` column should be reset +#' @examples +#' +#' data(sampleped) +#' ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) +#' ped(useful_inds(ped1, informative = "AvAf")) +#' @export setMethod("useful_inds", "Pedigree", function(obj, - informative = "AvAf", keep_infos = FALSE, - missid = "0", reset = FALSE + informative = "AvAf", keep_infos = FALSE, reset = FALSE ) { - cols_needed <- c( - "avail", "affected", "num_child_tot" + new_ped <- useful_inds(ped(obj), + informative, keep_infos, reset ) - check_columns(obj$ped, cols_needed, "", "", others_cols = TRUE) - useful <- useful_inds(obj$ped$id, obj$ped$dadid, obj$ped$momid, - obj$ped$avail, obj$ped$affected, obj$ped$num_child_tot, - informative, keep_infos, missid + + obj@ped <- new_ped + validObject(obj) + obj +}) + +#' @rdname useful_inds +#' @export +setMethod("useful_inds", "Ped", function(obj, + informative = "AvAf", keep_infos = FALSE, reset = FALSE +) { + useful <- useful_inds(id(obj), dadid(obj), momid(obj), + avail(obj), affected(obj), obj@num_child_tot, + informative, keep_infos ) - if (!reset) { - check_columns(obj$ped, NULL, "useful", NULL, others_cols = TRUE) + + if (!reset & any(!is.na(useful(obj)))) { + stop( + "The useful slot already has values in the Ped object", + " and reset is set to FALSE" + ) } - obj$ped$useful <- ifelse(obj$ped$id %in% useful, 1, 0) + obj@useful <- vect_to_binary( + ifelse(id(obj) %in% useful, 1, 0), logical = TRUE + ) + validObject(obj) obj }) diff --git a/R/utils.R b/R/utils.R index e26e2659..9cb63e56 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,7 +1,7 @@ #' @importFrom dplyr select one_of %>% NULL -#' Check for columns name usage +#' Check columns presence in a dataframe #' #' @description Check for presence / absence of columns names #' depending on their need @@ -38,8 +38,11 @@ NULL #' ColU1 = 'B', ColU2 = '1', #' ColTU1 = 'A', ColTU2 = 3, #' ColNR1 = 4, ColNR2 = 5) -#' tryCatch(check_columns(df, c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), -#' c('ColTU1', 'ColTU2')), error = function(e) print(e)) +#' tryCatch( +#' check_columns(df, +#' c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), +#' c('ColTU1', 'ColTU2') +#' ), error = function(e) print(e)) #' #' @keywords internal check_columns <- function( @@ -125,18 +128,18 @@ check_columns <- function( #'@importFrom stringr str_detect NULL -#' Check is numeric +#' Is numeric or NA #' #' @description Check if a variable given is numeric or NA #' -#' @details Check if the values in `var` are numeric or if they are -#' NA in the case that `na_as_num` is set to TRUE. +#' @details Check if the values in **var** are numeric or if they are +#' `NA` in the case that `na_as_num` is set to TRUE. #' #' @param var Vector of value to test #' @param na_as_num Boolean defining if the `NA` string should be #' considered as numerical values #' -#' @return A vector of boolean of the same size as `var` +#' @return A vector of boolean of the same size as **var** #' @keywords internal check_num_na <- function(var, na_as_num = TRUE) { # Should the NA value considered as numeric values @@ -150,66 +153,95 @@ check_num_na <- function(var, na_as_num = TRUE) { } -#' Check wich individuals are parents +#' Are individuals parents #' #' @description Check which individuals are parents. #' -#' @param id A vector of each subjects identifiers -#' @param dadid A vector containing for each subject, the identifiers of the -#' biologicals fathers. -#' @param momid vector containing for each subject, the identifiers of the -#' biologicals mothers. -#' @param missid The missing identifier value. Founders are the individuals with -#' no father and no mother in the Pedigree -#' (i.e. `dadid` and `momid` equal to the value of this variable). -#' The default for `missid` is `"0"`. -#' -#' @return A vector of boolean of the same size as `id` -#' with TRUE if the individual is a parent and FALSE otherwise +#' @param obj A vector of each subjects identifiers or a Ped object +#' @inheritParams Ped #' +#' @return A vector of boolean of the same size as **obj** +#' with TRUE if the individual is a parent and FALSE otherwise +#' @inheritParams Ped #' @keywords internal -is_parent <- function(id, dadid, momid, missid = "0") { - # determine subjects who are parents assume input of dadid/momid indices, - # not ids +#' @usage NULL +setGeneric("is_parent", signature = "obj", + function(obj, ...) standardGeneric("is_parent") +) - if (length(id) != length(dadid) | length(id) != length(momid)) { - stop("The length of the vectors are not the same") +#' @rdname is_parent +#' @examples +#' +#' is_parent(c("1", "2", "3", "4"), c("3", "3", NA, NA), c("4", "4", NA, NA)) +#' @export +setMethod("is_parent", "character_OR_integer", + function(obj, dadid, momid, missid = NA_character_) { + # determine subjects who are parents assume input of + # dadid/momid indices, not ids + + if (length(obj) != length(dadid) | length(obj) != length(momid)) { + stop("The length of the vectors are not the same") + } + + is_father <- !is.na(match(obj, unique(dadid[!dadid %in% missid]))) + is_mother <- !is.na(match(obj, unique(momid[!momid %in% missid]))) + is_father | is_mother } +) - is_father <- !is.na(match(id, unique(dadid[dadid != missid]))) - is_mother <- !is.na(match(id, unique(momid[momid != missid]))) - is_father | is_mother -} +#' @rdname is_parent +#' @examples +#' +#' data(sampleped) +#' ped <- Pedigree(sampleped) +#' is_parent(ped(ped)) +#' @export +setMethod("is_parent", "Ped", + function(obj, missid = NA_character_) { + is_parent(id(obj), dadid(obj), momid(obj), missid) + } +) -#' Check wich individuals are founders +#' Are individuals founders #' #' @description Check which individuals are founders. #' -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A vector of boolean of the same size as `dadid` and `momid` -#' with TRUE if the individual has no parents (i.e is a founder) and FALSE -#' otherwise. +#' @return A vector of boolean of the same size as **dadid** and **momid** +#' with `TRUE` if the individual has no parents (i.e is a founder) and +#' `FALSE` otherwise. #' +#' @examples +#' is_founder(c("3", "3", NA, NA), c("4", "4", NA, NA)) #' @keywords internal -is_founder <- function(momid, dadid, missid = "0") { - (dadid == missid) & (momid == missid) +#' @export +is_founder <- function(momid, dadid, missid = NA_character_) { + (dadid %in% missid) & (momid %in% missid) } -#' Check wich individuals are disconnected +#' Are individuals disconnected #' #' @description Check which individuals are disconnected. #' #' @details An individuals is considered disconnected if the kinship with -#' all the other individuals is 0. +#' all the other individuals is `0`. #' -#' @inheritParams is_parent +#' @inheritParams Ped #' -#' @return A vector of boolean of the same size as `id` -#' with TRUE if the individual is disconnected and FALSE otherwise +#' @return A vector of boolean of the same size as **id** +#' with `TRUE` if the individual is disconnected and +#' `FALSE` otherwise #' #' @include kinship.R #' @keywords internal +#' @examples +#' is_disconnected( +#' c("1", "2", "3", "4", "5"), +#' c("3", "3", NA, NA, NA), +#' c("4", "4", NA, NA, NA) +#' ) +#' @export is_disconnected <- function(id, dadid, momid) { # check to see if any subjects are disconnected in Pedigree by checking for # kinship = 0 for all subjects excluding self @@ -221,13 +253,9 @@ is_disconnected <- function(id, dadid, momid) { #' @importFrom plyr revalue NULL -#' Transform a gender variable to an ordered factor +#' Gender variable to ordered factor #' -#' @param sex A character, factor or numeric vector corresponding to -#' the gender of the individuals. The following values are recognized: -#' - character() or factor() : "f", "m", "woman", "man", "male", "female", -#' "unknown", "terminated" -#' - numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +#' @inheritParams Ped #' #' @return an ordered factor vector containing the transformed variable #' "male" < "female" < "unknown" < "terminated" @@ -257,65 +285,61 @@ sex_to_factor <- function(sex) { #' @importFrom stringr str_remove_all NULL -#' Transform a relationship code variable to an ordered factor -#' -#' @param rel_code A character, factor or numeric vector corresponding to -#' the relation code of the individuals: -#' - MZ twin = Monozygotic twin -#' - DZ twin = Dizygotic twin -#' - UZ twin = twin of unknown zygosity -#' - Spouse = Spouse -#' The following values are recognized: -#' - character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with -#' of without space between the words. The case is not important. -#' - numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +#' Relationship code variable to ordered factor +#' +#' @inheritParams Rel #' #' @return an ordered factor vector containing the transformed variable #' "MZ twin" < "DZ twin" < "UZ twin" < "Spouse" #' @examples #' rel_code_to_factor(c(1, 2, 3, 4, "MZ twin", "DZ twin", "UZ twin", "Spouse")) #' @export -rel_code_to_factor <- function(rel_code) { - if (is.factor(rel_code) || is.numeric(rel_code)) { - rel_code <- as.character(rel_code) +rel_code_to_factor <- function(code) { + if (is.factor(code) || is.numeric(code)) { + code <- as.character(code) } ## Normalized difference notations for code code_equiv <- c( mztwin = "MZ twin", dztwin = "DZ twin", uztwin = "UZ twin", spouse = "Spouse", - `1` = "MZ twin", `2` = "DZ twin", `3` = "UZ twin", `4` = "Spouse" + "1" = "MZ twin", "2" = "DZ twin", "3" = "UZ twin", "4" = "Spouse" ) codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") - rel_code <- as.character(revalue(as.factor( + code <- as.character(revalue(as.factor( str_remove_all( - casefold(as.character(rel_code), upper = FALSE), + casefold(as.character(code), upper = FALSE), " " ) ), code_equiv, warn_missing = FALSE)) - rel_code <- factor(rel_code, codes, ordered = TRUE) - rel_code + code <- factor(code, codes, ordered = TRUE) + code } -TRUE -#' Transform a vector variable to binary vector +#' Vector variable to binary vector +#' +#' @description Transform a vector to a binary vector. +#' All values that are not `0`, `1`, `TRUE`, `FALSE`, or `NA` +#' are transformed to `NA`. #' #' @param vect A character, factor, logical or numeric vector corresponding to -#' a binary variable (i.e. 0 or 1). +#' a binary variable (i.e. `0` or `1`). #' The following values are recognized: #' - character() or factor() : "TRUE", "FALSE", "0", "1", "NA" will be -#' respectively transformed to 1, 0, 0, 1, NA. +#' respectively transformed to `1`, `0`, `0`, `1`, `NA`. #' Spaces and case are ignored. #' All other values will be transformed to NA. -#' - numeric() : 0 and 1 are kept, all other values are transformed to NA. -#' - logical() : TRUE and FALSE are tansformed to 1 and 0. -#' -#' @return numeric binary vector of the same size as `vect` with 0 and 1 +#' - numeric() : `0` and `1` are kept, all other values are transformed to NA. +#' - logical() : `TRUE` and `FALSE` are tansformed to `1` and `0`. +#' @param logical Boolean defining if the output should be a logical vector +#' instead of a numeric vector (i.e. `0` and `1` becomes `FALSE` and `TRUE). +#' @return numeric binary vector of the same size as **vect** +#' with `0` and `1` #' @examples #' vect_to_binary( #' c(0, 1, 2, 3.6, "TRUE", "FALSE", "0", "1", "NA", "B", TRUE, FALSE, NA) #' ) #' @export -vect_to_binary <- function(vect) { +vect_to_binary <- function(vect, logical = FALSE) { if (is.factor(vect) || is.numeric(vect) || is.logical(vect)) { vect <- as.character(vect) } @@ -330,6 +354,45 @@ vect_to_binary <- function(vect) { ), code_equiv, warn_missing = FALSE )) vect[!vect %in% c(0, 1)] <- NA - vect + if (logical) { + as.logical(vect) + } else { + vect + } +} + +#' Anchor variable to ordered factor +#' +#' @param anchor A character, factor or numeric vector corresponding to +#' the anchor of the individuals. The following values are recognized: +#' - character() or factor() : "0", "1", "2", "left", "right", "either" +#' - numeric() : 1 = "left", 2 = "right", 0 = "either" +#' +#' @return An ordered factor vector containing the transformed variable +#' "either" < "left" < "right" +#' @examples +#' anchor_to_factor(c(1, 2, 0, "left", "right", "either")) +#' @export +anchor_to_factor <- function(anchor) { + if (is.factor(anchor) || is.numeric(anchor)) { + anchor <- as.character(anchor) + } + ## Normalized difference notations for anchor + anchor_equiv <- c( + "0" = "either", "1" = "left", "2" = "right", + "left" = "left", "right" = "right", "either" = "either" + ) + anchor <- as.character(revalue(as.factor( + casefold(anchor, upper = FALSE) + ), anchor_equiv, warn_missing = FALSE)) + anchor_codes <- c("left", "right", "either") + if (any(!anchor %in% anchor_codes)) { + stop(paste( + "The following values are not recognized :", + paste0(unique(anchor[!anchor %in% anchor_codes]), collapse = ", "), + ".\n" + )) + } + + factor(anchor, anchor_codes, ordered = TRUE) } -TRUE diff --git a/R/validity.R b/R/validity.R deleted file mode 100644 index 19709715..00000000 --- a/R/validity.R +++ /dev/null @@ -1,219 +0,0 @@ -#' Print0 to max -#' -#' Print0 the elements inside a vector until a maximum is reached. -#' -#' @param x A vector. -#' @param max The maximum number of elements to print. -#' @param ... Additional arguments passed to print0 -#' -#' @return The character vector aggregated until the maximum is reached. -#' @keywords internal -paste0max <- function(x, max = 5, ...) { - if (length(x) > max) { - paste(paste0(unique(x[seq_len(max)]), collapse = ", ", ...), "...") - } else { - paste0(unique(x), collapse = ", ", ...) - } -} - -#' Check if the fields are present in an object slot -#' -#' @param obj An object. -#' @param slot A slot of object. -#' @param fields A character vector with the fields to check. -#' -#' @return A character vector with the errors if any. -#' @keywords internal -check_slot_fd <- function(obj, slot = NULL, fields = character()) { - if (is.object(obj)) { - obj <- as.list(obj) - } - if (is.data.frame(obj[[slot]])) { - array_names <- colnames(obj[[slot]]) - } else if (is.list(obj[[slot]])) { - array_names <- names(obj[[slot]]) - } else { - stop( - "Slot ", slot, " is not a data.frame or a list.", - class(obj[[slot]]), " found." - ) - } - if (length(array_names) == 0) { - paste0( - "Missing fields in ", slot, - " slot. See Pedigree documentation." - ) - } else if (any(!fields %in% array_names)) { - paste0( - "`", paste0max(fields[!fields %in% array_names]), - "`", " column(s) is not present in slot ", slot, "." - ) - } -} - -#' Check values in a slot -#' -#' Check if the all the values in a slot are in a vector of values. -#' -#' @param obj An object. -#' @param slot A slot of the object. -#' @param column A column of the slot. -#' @param values A vector of values to check. -#' @param present A logical value indicating if the values should be present -#' or not -#' -#' @return A character vector with the errors if any. -#' @keywords internal -check_values <- function(val, ref, present = TRUE) { - if (length(dim(val)) > 1) { - stop("val must be a vector") - } - - if (present) { - val_abs <- !val %in% ref - should <- " should be in " - } else { - val_abs <- val %in% ref - should <- " should not be in " - } - - if (any(val_abs)) { - paste0( - "Values ", paste0max(val[val_abs]), should, - paste0max(ref), "." - ) - } -} - -#' Pedigree validity method. -#' -#' Check if the Pedigree object is valid. -#' -#' It will check : -#' the fields of the slots -#' the values in the columns of the ped, rel and scale slot -#' @param object A Pedigree object. -#' @return A logical value or a character vector with the errors. -#' @keywords internal -is_valid <- function(object) { - missid <- "0" - errors <- c() - - #### Check that the slots have the right columns #### - ped_cols <- c( - "id", "dadid", "momid", "family", - "sex", "steril", "status", "avail", "affected" - ) - rel_cols <- c("id1", "id2", "code", "family") - fill_cols <- c( - "order", "column_values", "column_mods", "mods", - "labels", "affected", "fill", "density", "angle" - ) - border_cols <- c("column", "mods", "labels", "border") - errors <- c(errors, check_slot_fd(object, "ped", ped_cols)) - errors <- c(errors, check_slot_fd(object, "rel", rel_cols)) - errors <- c(errors, check_slot_fd(object, "scales", c("fill", "border"))) - errors <- c(errors, check_slot_fd(object$scales, "fill", fill_cols)) - errors <- c(errors, check_slot_fd(object$scales, "border", border_cols)) - - - #### Check that the ped columns have the right values #### - # Check for ped$id uniqueness - if (any(duplicated(object$ped$id))) { - errors <- c(errors, "Id in ped slot must be unique") - } - - # Control values for ped - errors <- c(errors, check_values(object$ped$id, missid, present = FALSE)) - errors <- c(errors, check_values( - object$ped$dadid, c(object$ped$id, missid) - )) - errors <- c(errors, check_values( - object$ped$momid, c(object$ped$id, missid) - )) - sex_code <- c("male", "female", "unknown", "terminated") - errors <- c(errors, check_values(object$ped$sex, sex_code)) - errors <- c(errors, check_values(object$ped$steril, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$status, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$avail, c(0, 1, NA))) - errors <- c(errors, check_values(object$ped$affected, c(0, 1, NA))) - - # Control sex for parents - id <- object$ped$id - momid <- object$ped$momid - dadid <- object$ped$dadid - sex <- object$ped$sex - is_dad <- id %in% dadid - is_mom <- id %in% momid - if (any(sex[is_dad] != "male")) { - errors <- c(errors, "Some dad are not male") - } - if (any(sex[is_mom] != "female")) { - errors <- c(errors, "Some mom are not female") - } - if (any( - (dadid %in% missid & (! momid %in% missid)) | - ((! dadid %in% missid) & momid %in% missid) - )) { - errors <- c(errors, "Individuals should have both parents or none") - } - - #### Check that the rel columns have the right values #### - codes <- c("MZ twin", "DZ twin", "UZ twin", "Spouse") - errors <- c(errors, check_values(object$rel$code, codes)) - errors <- c(errors, check_values( - object$rel$family, c(object$ped$family, NA) - )) - errors <- c(errors, check_values(object$rel$id1, object$ped$id)) - errors <- c(errors, check_values(object$rel$id2, object$ped$id)) - - # Check if twins has same parents - code <- object$rel$code - ncode <- as.numeric(code) - id1 <- object$rel$id1 - id2 <- object$rel$id2 - temp1 <- match(id1, id, nomatch = 0) - temp2 <- match(id2, id, nomatch = 0) - if (any(ncode < 3)) { - twins <- (ncode < 3) - if (any(momid[temp1[twins]] != momid[temp2[twins]])) { - errors <- c(errors, "twins found with different mothers") - } - if (any(dadid[temp1[twins]] != dadid[temp2[twins]])) { - errors <- c(errors, "twins found with different fathers") - } - } - - # Check if the monozygote twins has same gender - if (any(ncode == 1)) { - mztwins <- (ncode == 1) - if (any(sex[temp1[mztwins]] != sex[temp2[mztwins]])) { - errors <- c(errors, "MZ twins with different genders") - } - } - - # Check that the scales columns have the right values - errors <- c(errors, check_values( - object$scales$fill$column_values, colnames(object$ped) - )) - errors <- c(errors, check_values( - object$scales$fill$column_mods, colnames(object$ped) - )) - errors <- c(errors, check_values( - object$scales$border$column, colnames(object$ped) - )) - - # Check that all modalities are present in the scales - for (col in unique(object$scales$column)){ - errors <- c(errors, check_values( - object$ped$col, - object$scales$fill[object$scales$fill$column_mods == col, "mods"] - )) - } - - if (length(errors) == 0) { - TRUE - } else { - errors - } -} diff --git a/data/sampleped.rda b/data/sampleped.rda index 56622175..d090493d 100644 Binary files a/data/sampleped.rda and b/data/sampleped.rda differ diff --git a/inst/extdata/sampleped.tab b/inst/extdata/sampleped.tab index 043253e7..d2c91583 100644 --- a/inst/extdata/sampleped.tab +++ b/inst/extdata/sampleped.tab @@ -1,4 +1,4 @@ -"family" "id" "dadid" "momid" "sex" "affected" "available" +"famid" "id" "dadid" "momid" "sex" "affection" "avail" "1" 1 101 0 0 1 0 0 "2" 1 102 0 0 2 1 0 "3" 1 103 135 136 1 1 0 diff --git a/inst/script/dataset.R b/inst/script/dataset.R index 3eacf151..e025ea23 100644 --- a/inst/script/dataset.R +++ b/inst/script/dataset.R @@ -7,8 +7,12 @@ sampleped <- read.delim("inst/extdata/sampleped.tab", header = TRUE, sep = " ", stringsAsFactors = FALSE ) -sampleped[c("family", "id", "dadid", "momid")] <- as.data.frame( - lapply(sampleped[c("family", "id", "dadid", "momid")], as.character) +sampleped[c("famid", "id", "dadid", "momid")] <- as.data.frame( + lapply(sampleped[c("famid", "id", "dadid", "momid")], as.character) +) +sampleped <- mutate_if( + sampleped, is.character, + ~replace(., . %in% "0", NA) ) summary(sampleped) usethis::use_data(sampleped, overwrite = TRUE) diff --git a/man/Hints-class.Rd b/man/Hints-class.Rd new file mode 100644 index 00000000..136d404a --- /dev/null +++ b/man/Hints-class.Rd @@ -0,0 +1,145 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Hints-class} +\alias{Hints-class} +\alias{Hints} +\alias{Hints,Hints,missing_OR_NULL-method} +\alias{Hints,list,missing_OR_NULL-method} +\alias{Hints,numeric,data.frame-method} +\alias{Hints,numeric,missing_OR_NULL-method} +\alias{Hints,missing_OR_NULL,missing_OR_NULL-method} +\alias{horder} +\alias{horder,Hints-method} +\alias{horder<-} +\alias{horder<-,Hints-method} +\alias{spouse} +\alias{spouse,Hints-method} +\alias{spouse<-,Hints,data.frame-method} +\alias{as.list,Hints-method} +\alias{subset,Hints-method} +\title{Hints object} +\usage{ +Hints(horder, spouse) + +\S4method{Hints}{list,missing_OR_NULL}(horder, spouse) + +\S4method{Hints}{numeric,data.frame}(horder, spouse) + +\S4method{Hints}{numeric,missing_OR_NULL}(horder, spouse) +} +\arguments{ +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} + +\item{spouse}{A data.frame with one row per hinted marriage, usually only +a few marriages in a pedigree will need an added hint, for instance reverse +the plot order of a husband/wife pair. +Each row contains the id of the left spouse (i.e. \code{idl}), the id of the +right hand spouse (i.e. \code{idr}), and the anchor (i.e : \code{anchor} : +\code{1} = left, \code{2} = right, \code{0} = either). +Children will preferentially appear under the parents of the anchored spouse.} +} +\value{ +A Hints object. +} +\description{ +The hints are used to specify the order of the individuals in the pedigree +and to specify the order of the spouses. + +\subsection{Constructor :}{ + +You either need to provide \strong{horder} or \strong{spouse} in +the dedicated parameters (together or separately), or inside a list. +} +} +\section{Slots}{ + +\describe{ +\item{\code{horder}}{A numeric named vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within +a sibship, as well as the relative order of processing for the founder +couples. (For this latter, the female founders are ordered as though they +were sisters).} + +\item{\code{spouse}}{A data.frame with one row per hinted marriage, usually +only a few marriages in a Pedigree will need an added hint, for +instance reverse the plot order of a husband/wife pair. +Each row contains the identifiers of the left spouse, the right hand spouse, +and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +}} + +\section{Accessors}{ + +\itemize{ +\item \code{horder(x)} : Get the horder vector +} + + +\itemize{ +\item \code{horder(x) <- value} : Set the horder vector +} + + +\itemize{ +\item \code{spouse(x)} : Get the spouse data.frame +} + + +\itemize{ +\item \code{spouse(x) <- value} : Set the spouse data.frame +} +} + +\section{Generics}{ + +\itemize{ +\item \code{as.list(x)}: Convert a Hints object to a list +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Hints object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +Hints( + list( + horder = c("1" = 1, "2" = 2, "3" = 3), + spouse = data.frame( + idl = c("1", "2"), + idr = c("2", "3"), + anchor = c(1, 2) + ) + ) +) + +Hints( + horder = c("1" = 1, "2" = 2, "3" = 3), + spouse = data.frame( + idl = c("1", "2"), + idr = c("2", "3"), + anchor = c(1, 2) + ) +) + +Hints( + horder = c("1" = 1, "2" = 2, "3" = 3) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Ped-class.Rd b/man/Ped-class.Rd new file mode 100644 index 00000000..1d2a91be --- /dev/null +++ b/man/Ped-class.Rd @@ -0,0 +1,333 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Ped-class} +\alias{Ped-class} +\alias{Ped} +\alias{Ped,data.frame-method} +\alias{Ped,character_OR_integer-method} +\alias{Ped,missing-method} +\alias{id} +\alias{id,Ped-method} +\alias{id<-} +\alias{id<-,Ped,character_OR_integer-method} +\alias{dadid} +\alias{dadid,Ped-method} +\alias{dadid<-} +\alias{dadid<-,Ped,character_OR_integer-method} +\alias{momid} +\alias{momid,Ped-method} +\alias{momid<-} +\alias{momid<-,Ped,character_OR_integer-method} +\alias{famid} +\alias{famid,Ped-method} +\alias{famid<-} +\alias{famid<-,Ped,character_OR_integer-method} +\alias{sex} +\alias{sex,Ped-method} +\alias{sex<-} +\alias{sex<-,Ped,character_OR_integer-method} +\alias{affected} +\alias{affected,Ped-method} +\alias{affected<-} +\alias{affected<-,Ped,numeric_OR_logical-method} +\alias{avail} +\alias{avail,Ped-method} +\alias{avail<-} +\alias{avail<-,Ped,numeric_OR_logical-method} +\alias{status} +\alias{status,Ped-method} +\alias{status<-} +\alias{status<-,Ped,numeric_OR_logical-method} +\alias{isinf} +\alias{isinf,Ped-method} +\alias{isinf<-} +\alias{isinf<-,Ped,numeric_OR_logical-method} +\alias{kin} +\alias{kin,Ped-method} +\alias{kin<-} +\alias{kin<-,Ped,numeric-method} +\alias{useful} +\alias{useful,Ped-method} +\alias{useful<-} +\alias{useful<-,Ped,numeric_OR_logical-method} +\alias{mcols<-,Ped,list-method} +\alias{mcols<-,Ped,data.frame-method} +\alias{summary,Ped-method} +\alias{show,Ped-method} +\alias{as.list,Ped-method} +\alias{as.data.frame,Ped-method} +\alias{subset,Ped-method} +\title{Ped object} +\usage{ +\S4method{Ped}{data.frame}(obj, cols_used_init = FALSE, cols_used_del = FALSE) + +\S4method{Ped}{character_OR_integer}( + obj, + sex, + dadid, + momid, + famid = NA, + steril = NA, + status = NA, + avail = NA, + affected = NA, + missid = NA_character_ +) +} +\arguments{ +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} + +\item{cols_used_init}{Boolean defining if the columns that will be used +should be initialised to NA.} + +\item{cols_used_del}{Boolean defining if the columns that will be used +should be deleted.} + +\item{sex}{A character, factor or numeric vector corresponding to +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: +\itemize{ +\item character() or factor() : "f", "m", "woman", "man", "male", "female", +"unknown", "terminated" +\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +}} + +\item{dadid}{A vector containing for each subject, the identifiers of the +biologicals fathers.} + +\item{momid}{A vector containing for each subject, the identifiers of the +biologicals mothers.} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} + +\item{steril}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{status}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} +} +\value{ +A Ped object. +} +\description{ +S4 class to represent the identity informations of the individuals +in a pedigree. + +\subsection{Constructor :}{ + +You either need to provide a vector of the same size for each slot +or a \code{data.frame} with the corresponding columns. + +The metadata will correspond to the columns that do not correspond +to the Ped slots. +} +} +\details{ +The minimal needed informations are \code{id}, \code{dadid}, \code{momid} and \code{sex}. +The other slots are used to store recognized informations. +Additional columns can be added to the Ped object and will be +stored in the \code{elementMetadata} slot of the Ped object. +} +\section{Slots}{ + +\describe{ +\item{\code{id}}{A character vector with the id of the individuals.} + +\item{\code{dadid}}{A character vector with the id of the father of the individuals.} + +\item{\code{momid}}{A character vector with the id of the mother of the individuals.} + +\item{\code{sex}}{An ordered factor vector for the sex of the individuals +(i.e. \code{male} < \code{female} < \code{unknown} < \code{terminated}).} + +\item{\code{famid}}{A character vector with the family identifiers of the +individuals (optional).} + +\item{\code{steril}}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{\code{status}}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{\code{avail}}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{\code{affected}}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = not affected, \code{TRUE} = affected, \code{NA} = unknown).} + +\item{\code{useful}}{A logical vector with the usefulness status of the +individuals +(i.e. \code{FALSE} = not useful, \code{TRUE} = useful).} + +\item{\code{isinf}}{A logical vector indicating if the individual is informative +or not +(i.e. \code{FALSE} = not informative, \code{TRUE} = informative).} + +\item{\code{kin}}{A numeric vector with minimal kinship value between the +individuals and the useful individuals.} + +\item{\code{num_child_tot}}{A numeric vector with the total number of children +of the individuals.} + +\item{\code{num_child_dir}}{A numeric vector with the number of children +of the individuals.} + +\item{\code{num_child_ind}}{A numeric vector with the number of children +of the individuals.} + +\item{\code{elementMetadata}}{A DataFrame with the additional metadata columns +of the Ped object.} + +\item{\code{metadata}}{Meta informations about the pedigree.} +}} + +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Ped object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. For each getter, you have a setter with the +same name, to be use as \code{slot(x) <- value}. +The \code{value} parameter is a vector of the same length as \code{x}, except +for the \code{mcols()} accessors where \code{value} is a list or a data.frame with +each elements with the same length as \code{x}. + + +\itemize{ +\item \code{id(x)} : Individuals identifiers +} + + +\itemize{ +\item \code{dadid(x)} : Individuals' father identifiers +} + + +\itemize{ +\item \code{momid(x)} : Individuals' mother identifiers +} + + +\itemize{ +\item \code{famid(x)} : Individuals' family identifiers +} + + +\itemize{ +\item \code{sex(x)} : Individuals' gender +} + + +\itemize{ +\item \code{affected(x)} : Individuals' affection status +} + + +\itemize{ +\item \code{avail(x)} : Individuals' availability status +} + + +\itemize{ +\item \code{status(x)} : Individuals' death status +} + + +\itemize{ +\item \code{isinf(x)} : Individuals' informativeness status +} + + +\itemize{ +\item \code{kin(x)} : Individuals' kinship distance to the +informative individuals +} + + +\itemize{ +\item \code{useful(x)} : Individuals' usefullness status +} + + +\itemize{ +\item \code{mcols(x)} : Individuals' metadata +} +} + +\section{Generics}{ + +\itemize{ +\item \code{summary(x)}: Compute the summary of a Ped object +} + + +\itemize{ +\item \code{show(x)}: Convert the Ped object to a data.frame +and print it with its summary. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Ped object to a list with +the metadata columns at the end. +} + + +\itemize{ +\item \code{as.data.frame(x)}: Convert a Ped object to a data.frame with +the metadata columns at the end. +} + + +\itemize{ +\item \code{subset(x, i, del_parents = FALSE, keep = TRUE)}: Subset a Ped object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{del_parents} : A logical value indicating if the parents +of the individuals should be deleted. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +data(sampleped) +Ped(sampleped) + +Ped( + obj = c("1", "2", "3", "4", "5", "6"), + dadid = c("4", "4", "6", "0", "0", "0"), + momid = c("5", "5", "5", "0", "0", "0"), + sex = c(1, 2, 3, 1, 2, 1), + missid = "0" +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Pedigree-class.Rd b/man/Pedigree-class.Rd index 3790495f..215b6b92 100644 --- a/man/Pedigree-class.Rd +++ b/man/Pedigree-class.Rd @@ -1,55 +1,462 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R \docType{class} \name{Pedigree-class} \alias{Pedigree-class} -\title{S4 class to represent a pedigree.} +\alias{Pedigree} +\alias{Pedigree,character_OR_integer-method} +\alias{Pedigree,data.frame-method} +\alias{Pedigree,missing-method} +\alias{famid,Pedigree-method} +\alias{ped} +\alias{ped,Pedigree,ANY-method} +\alias{ped,Pedigree,missing-method} +\alias{ped<-} +\alias{ped<-,Pedigree,ANY,ANY-method} +\alias{ped<-,Pedigree,missing,Ped-method} +\alias{mcols,Pedigree-method} +\alias{mcols<-,Pedigree,ANY-method} +\alias{rel} +\alias{rel,Pedigree,ANY-method} +\alias{rel,Pedigree,missing-method} +\alias{rel<-} +\alias{rel<-,Pedigree,ANY,ANY-method} +\alias{rel<-,Pedigree,missing,Rel-method} +\alias{scales} +\alias{scales,Pedigree-method} +\alias{scales<-} +\alias{scales<-,Pedigree,Scales-method} +\alias{fill,Pedigree-method} +\alias{fill<-,Pedigree,data.frame-method} +\alias{border,Pedigree-method} +\alias{border<-,Pedigree,data.frame-method} +\alias{hints} +\alias{hints,Pedigree-method} +\alias{hints<-} +\alias{hints<-,Pedigree,Hints-method} +\alias{horder,Pedigree-method} +\alias{horder<-,Pedigree-method} +\alias{spouse,Pedigree-method} +\alias{spouse<-} +\alias{spouse<-,Pedigree,data.frame-method} +\alias{length,Pedigree-method} +\alias{show,Pedigree-method} +\alias{summary,Pedigree-method} +\alias{as.list,Pedigree-method} +\alias{subset,Pedigree-method} +\alias{[,Pedigree,ANY,missing,ANY-method} +\title{Pedigree object} +\usage{ +Pedigree(obj, ...) + +\S4method{Pedigree}{character_OR_integer}( + obj, + dadid, + momid, + sex, + famid = NA, + avail = NULL, + affected = NULL, + status = NULL, + steril = NULL, + rel_df = NULL, + missid = NA_character_, + col_aff = "affection", + normalize = TRUE, + ... +) + +\S4method{Pedigree}{data.frame}( + obj = data.frame(indId = character(), fatherId = character(), motherId = character(), + gender = numeric(), family = character(), available = numeric(), vitalStatus = + numeric(), affection = numeric(), sterilisation = numeric()), + rel_df = data.frame(id1 = character(), id2 = character(), code = numeric(), famid = + character()), + cols_ren_ped = list(indId = "id", fatherId = "dadid", motherId = "momid", family = + "famid", gender = "sex", sterilisation = "steril", affection = "affected", available + = "avail", vitalStatus = "status"), + cols_ren_rel = list(id1 = "indId1", id2 = "indId2", famid = "family"), + hints = list(horder = NULL, spouse = NULL), + normalize = TRUE, + missid = NA_character_, + col_aff = "affection", + ... +) +} +\arguments{ +\item{obj}{A vector of the individuals identifiers or a data.frame +with the individuals informations. See \code{\link[=Ped]{Ped()}} for more informations.} + +\item{...}{ + Arguments passed on to \code{\link[=generate_colors]{generate_colors}} + \describe{ + \item{\code{}}{} + }} + +\item{dadid}{A vector containing for each subject, the identifiers of the +biologicals fathers.} + +\item{momid}{A vector containing for each subject, the identifiers of the +biologicals mothers.} + +\item{sex}{A character, factor or numeric vector corresponding to +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: +\itemize{ +\item character() or factor() : "f", "m", "woman", "man", "male", "female", +"unknown", "terminated" +\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" +}} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} + +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown). +Can also be a data.frame with the same length as \code{obj}. If it is a +matrix, it will be converted to a data.frame and the columns will be +named after the \code{col_aff} argument.} + +\item{status}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = alive, \code{TRUE} = dead, \code{NA} = unknown).} + +\item{steril}{A logical vector with the sterilisation status of the +individuals +(i.e. \code{FALSE} = not sterilised, \code{TRUE} = sterilised, \code{NA} = unknown).} + +\item{rel_df}{A data.frame with the special relationships between +individuals. See \code{\link[=Rel]{Rel()}} for more informations. +The minimum columns required are \code{id1}, \code{id2} and \code{code}. +The \code{famid} column can also be used to specify the family +of the individuals. +If a matrix is given, the columns needs to be ordered as +\code{id1}, \code{id2}, \code{code} and \code{famid}. +The code values are: +\itemize{ +\item \code{1} = Monozygotic twin +\item \code{2} = Dizygotic twin +\item \code{3} = twin of unknown zygosity +\item \code{4} = Spouse +} + +The value relation code recognized by the function are the one defined +by the \code{\link[=rel_code_to_factor]{rel_code_to_factor()}} function.} + +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} + +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} + +\item{normalize}{A logical to know if the data should be normalised.} + +\item{cols_ren_ped}{A named list with the columns to rename for the +pedigree dataframe. This is useful if you want to use a dataframe with +different column names. The names of the list should be the new column +names and the values should be the old column names. The default values +are to be used with \code{normalize = TRUE}.} + +\item{cols_ren_rel}{A named list with the columns to rename for the +relationship matrix. This is useful if you want to use a dataframe with +different column names. The names of the list should be the new column +names and the values should be the old column names.} + +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}.} +} \value{ A Pedigree object. } \description{ A pedigree is a ensemble of individuals linked to each other into a family tree. +A Pedigree object store the informations of the individuals and the +special relationships between them. It also permit to store the +informations needed to plot the pedigree (i.e. scales and hints). + +\subsection{Constructor :}{ + +Main constructor of the package. +This constructor help to create a \code{Pedigree} object from +different \code{data.frame} or a set of vectors. + +If any errors are found in the data, the function will return +the data.frame with the errors of the Ped object and the +Rel object. +} } \details{ -They are created from a data.frame containing the individuals informations -and a relation ship data.frame for the special links between individuals. -A list of scales can be provided to create a legend. -To create a Pedigree object, use the function -\code{\link[=Pedigree]{Pedigree()}}. +If the normalization is set to \code{TRUE}, then the data will be +standardized using the function \code{norm_ped()} and \code{norm_rel()}. + +If a data.frame is given, the columns names needed will depend if +the normalization is selected or not. If the normalization is selected, +the columns names needed are as follow and if not the columns names +needed are in parenthesis: +\itemize{ +\item \code{indID}: the individual identifier (\code{id}) +\item \code{fatherId}: the identifier of the biological father (\code{dadid}) +\item \code{motherId}: the identifier of the biological mother (\code{momid}) +\item \code{gender}: the sex of the individual (\code{sex}) +\item \code{family}: the family identifier of the individual (\code{famid}) +\item \code{sterilisation}: the sterilisation status of the individual (\code{steril}) +\item \code{available}: the availability status of the individual (\code{avail}) +\item \code{vitalStatus}: the death status of the individual (\code{status}) +\item \code{affection}: the affection status of the individual (\code{affected}) +\item \code{...}: other columns that will be stored in the \code{elementMetadata} slot +} + +The minimum columns required are : +\itemize{ +\item \code{indID} / \code{id} +\item \code{fatherId} / \code{dadid} +\item \code{motherId} / \code{momid} +\item \code{gender} / \code{sex} +} + +The \code{family} / \code{famid} column can also be used to specify the family of the +individuals and will be merge to the \code{indId} / \code{id} field separated by an +underscore. +The columns \code{sterilisation}, \code{available}, \code{vitalStatus}, \code{affection} +will be transformed with the \code{\link[=vect_to_binary]{vect_to_binary()}} function when the +normalisation is selected. +If you do not use the normalisation, the columns will be checked to +be \code{0} or \code{1}. + +If \code{affected} is a data.frame, \strong{col_aff} will be overwritten by the column +names of the data.frame. } \section{Slots}{ \describe{ -\item{\code{ped}}{A data.frame with the individuals informations. The minimum -columns required are 'id', 'dadid', 'momid' and 'sex'. Other columns can be -added to the data.frame and will be recognised by the functions. Some -errors can be detected by the validity function and some of them can be -corrected and others will be added to a dedicated column.} - -\item{\code{rel}}{A data.frame for the special relationship between -individuals. -The minimum columns required are 'id1', 'id2' and 'code'.} - -\item{\code{scales}}{A data.frame to use for the affection status. -This data.frame is generated by the function -\code{\link[=generate_aff_inds]{generate_aff_inds()}} followed by -\code{\link[=generate_colors]{generate_colors()}}.} - -\item{\code{hints}}{List of two elements. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a Pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse, -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either). -}} +\item{\code{ped}}{A Ped object for the identity informations. See \code{\link[=Ped]{Ped()}} for +more informations.} + +\item{\code{rel}}{A Rel object for the special relationships. See \code{\link[=Rel]{Rel()}} for +more informations.} + +\item{\code{scales}}{A Scales object for the filling and bordering +colors used in the plot. See \code{\link[=Scales]{Scales()}} for more informations.} + +\item{\code{hints}}{A Hints object for the ordering of the +individuals in the plot. See \code{\link[=Hints]{Hints()}} for more informations.} }} +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Pedigree object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. + + +\itemize{ +\item \code{famid(x)} : Get the family identifiers of a Pedigree object. This +function is a wrapper around \code{famid(ped(x))}. +} + + +\itemize{ +\item \code{ped(x, slot)} : Get the value of a specific slot of the Ped object +} + + +\itemize{ +\item \code{ped(x)} : Get the Ped object +} + + +\itemize{ +\item \code{ped(x, slot) <- value} : Set the value of a specific slot of +the Ped object +Wrapper of \code{slot(ped(x)) <- value} +} + + +\itemize{ +\item \code{ped(x) <- value} : Set the Ped object +} + + +\itemize{ +\item \code{mcols(x)} : Get the metadata of a Pedigree object. +This function is a wrapper around \code{mcols(ped(x))}. +} + + +\itemize{ +\item \code{mcols(x) <- value} : Set the metadata of a Pedigree object. +This function is a wrapper around \code{mcols(ped(x)) <- value}. +} + + +\itemize{ +\item \code{rel(x, slot)} : Get the value of a specific slot of the Rel object +} + + +\itemize{ +\item \code{rel(x)} : Get the Rel object +} + + +\itemize{ +\item \code{rel(x, slot) <- value} : Set the value of a specific slot of the +Rel object +Wrapper of \code{slot(rel(x)) <- value} +} + + +\itemize{ +\item \code{rel(x) <- value} : Set the Rel object +} + + +\itemize{ +\item \code{scales(x)} : Get the Scales object +} + + +\itemize{ +\item \code{scales(x) <- value} : Set the Scales object +} + + +\itemize{ +\item \code{fill(x)} : Get the fill data.frame from the Scales object. +Wrapper of \code{fill(scales(x))} +} + + +\itemize{ +\item \code{fill(x) <- value} : Set the fill data.frame from the Scales object. +Wrapper of \code{fill(scales(x)) <- value} +} + + +\itemize{ +\item \code{border(x)} : Get the border data.frame from the Scales object. +Wrapper of \code{border(scales(x))} +} + + +\itemize{ +\item \code{border(x) <- value} : Set the border data.frame from the Scales object. +Wrapper of \code{border(scales(x)) <- value} +} + + +\itemize{ +\item \code{hints(x)} : Get the Hints object +} + + +\itemize{ +\item \code{hints(x) <- value} : Set the Hints object +} + + +\itemize{ +\item \code{horder(x)} : Get the horder vector from the Hints object. +Wrapper of \code{horder(hints(x))} +} + + +\itemize{ +\item \code{horder(x) <- value} : Set the horder vector from the Hints object. +Wrapper of \code{horder(hints(x)) <- value} +} + + +\itemize{ +\item \code{spouse(x)} : Get the spouse data.frame from the Hints object. +Wrapper of \code{spouse(hints(x))}. +} + + +\itemize{ +\item \code{spouse(x) <- value} : Set the spouse data.frame from the Hints object. +Wrapper of \code{spouse(hints(x)) <- value}. +} +} + +\section{Generics}{ + +\itemize{ +\item \code{length(x)}: Get the length of a Pedigree object. +Wrapper of \code{length(ped(x))}. +} + + +\itemize{ +\item \code{show(x)}: Print the information of the Ped and Rel +object inside the Pedigree object. +} + + +\itemize{ +\item \code{summary(x)}: Compute the summary of the Ped and Rel object +inside the Pedigree object. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Pedigree object to a list +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Pedigree object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{del_parents} : A logical value indicating if the parents +of the individuals should be deleted. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} + + +\itemize{ +\item \code{x[i, del_parents, keep]}: Subset a Pedigree object +based on the individuals identifiers given. +} +} + +\examples{ + +Pedigree( + obj = c("1", "2", "3", "4", "5", "6"), + dadid = c("4", "4", "6", "0", "0", "0"), + momid = c("5", "5", "5", "0", "0", "0"), + sex = c(1, 2, 3, 1, 2, 1), + avail = c(0, 1, 0, 1, 0, 1), + affected = matrix(c( + 0, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1 + ), ncol = 2), + col_aff = c("aff1", "aff2"), + missid = "0", + rel_df = matrix(c( + "1", "2", 2 + ), ncol = 3, byrow = TRUE), +) + +data(sampleped) +Pedigree(sampleped) +} \seealso{ -\code{\link[=Pedigree]{Pedigree()}} +\code{\link[=Pedigree]{Pedigree()}}, \code{\link[=Ped]{Ped()}}, \code{\link[=Rel]{Rel()}}, \code{\link[=Scales]{Scales()}}, \code{\link[=Hints]{Hints()}} + +\code{\link[=Ped]{Ped()}}, \code{\link[=Rel]{Rel()}}, \code{\link[=Scales]{Scales()}} } diff --git a/man/Pedigree.Rd b/man/Pedigree.Rd deleted file mode 100644 index 2828092c..00000000 --- a/man/Pedigree.Rd +++ /dev/null @@ -1,190 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Pedigree.R -\docType{methods} -\name{Pedigree} -\alias{Pedigree} -\alias{Pedigree,numeric-method} -\alias{Pedigree,numeric} -\alias{Pedigree,character-method} -\alias{Pedigree,character} -\alias{Pedigree,data.frame-method} -\alias{Pedigree,data.frame} -\title{Create a Pedigree object} -\usage{ -Pedigree(obj, ...) - -\S4method{Pedigree}{numeric}(obj, ...) - -\S4method{Pedigree}{character}( - obj, - dadid, - momid, - sex, - family = NA, - avail = NULL, - affected = NULL, - status = NULL, - steril = NULL, - relation = NULL, - missid = "0", - col_aff = "affection", - normalize = TRUE, - ... -) - -\S4method{Pedigree}{data.frame}( - obj = data.frame(id = character(), dadid = character(), momid = character(), sex = - numeric(), family = character(), available = numeric(), affection = numeric()), - relation = data.frame(id1 = character(), id2 = character(), code = numeric(), family = - character()), - cols_ren_ped = list(indId = "id", fatherId = "dadid", motherId = "momid", gender = - "sex", family = "family", sterilisation = "steril", vitalStatus = "status", affection - = "affected"), - cols_ren_rel = list(indId1 = "id1", indId2 = "id2"), - scales = list(fill = data.frame(order = numeric(), column_values = character(), - column_mods = character(), mods = numeric(), labels = character(), affected = - logical(), fill = character(), density = numeric(), angle = numeric()), border = - data.frame(column = character(), mods = numeric(), labels = character(), border = - character())), - hints = list(order = NULL, spouse = NULL), - normalize = TRUE, - missid = "0", - col_aff = "affection", - ... -) -} -\arguments{ -\item{obj}{A vector of the individuals identifiers or a data.frame -with the individuals informations. -The minimum columns required are \code{indID}, \code{fatherId}, \code{motherId} and -\code{gender}. -The \code{family} column can also be used to specify the family of the -individuals and will be merge to the \code{id} field separated by an -underscore. -The following columns are also recognize \code{sterilisation}, \code{available}, -\code{vitalStatus}, \code{affection}. The four of them will be transformed with the -\code{\link[=vect_to_binary]{vect_to_binary()}} function when the normalisation is selected and will -be set respectively to \code{steril}, \code{avail}, -\code{status} and \code{affected}. -If you do not use the normalisation, the columns will be checked to -be \code{0} or \code{1}. -They respectively correspond to the sterilisation status, -the availability status, the death status and the affection status -of the individuals. The values recognized for those columns are \code{1} or -\code{0}.} - -\item{...}{Other arguments to pass to the function \code{generate_colors}.} - -\item{dadid}{A vector containing for each subject, the identifiers of the -biologicals fathers.} - -\item{momid}{vector containing for each subject, the identifiers of the -biologicals mothers.} - -\item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: -\itemize{ -\item character() or factor() : "f", "m", "woman", "man", "male", "female", -"unknown", "terminated" -\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" -}} - -\item{family}{A vector of family identifiers} - -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} - -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} - -\item{status}{A numeric vector of vital status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : alive -\item \code{1} : dead -\item \code{NA} : vital status not known -}} - -\item{steril}{A numeric vector of sterilisation status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : not sterilised -\item \code{1} : sterilised -\item \code{NA} : sterilisation status not known -}} - -\item{relation}{A matrix or a data.frame with 3 required columns -(i.e. id1, id2, code) specifying special relationship between pairs -of individuals. -#' The code values are: -\itemize{ -\item \code{1} = Monozygotic twin -\item \code{2} = Dizygotic twin -\item \code{3} = twin of unknown zygosity -\item \code{4} = Spouse -} - -If \code{famid} is given in the call to create Pedigrees, then -\code{famid} needs to be in the last column of \code{relation}.} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{col_aff}{A string with the column name to use for the affection status.} - -\item{normalize}{A logical to know if the data should be normalised.} - -\item{cols_ren_ped}{A named list with the columns to rename for the -pedigree dataframe.} - -\item{cols_ren_rel}{A named list with the columns to rename for the -relationship matrix.} - -\item{scales}{A list of two data.frame with the scales to use for the -affection status and the other one for the border color (e.g availability).} - -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} -} -\value{ -A Pedigree object. -} -\description{ -This constructor help to create a \code{Pedigree} object from -different \code{data.frame} or a set of vectors. -} -\details{ -If any errors are found in the data, the function will return -the data.frame with the errors for the Pedigree and the relationship -data.frame. -} -\examples{ -data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -} diff --git a/man/Pedixplorer_package.Rd b/man/Pedixplorer_package.Rd index c138c605..4b3a607d 100644 --- a/man/Pedixplorer_package.Rd +++ b/man/Pedixplorer_package.Rd @@ -16,7 +16,8 @@ additional functionality and bug fixes. } \details{ The package download, NEWS, and README are available on CRAN: -\\url{https://cran.r-project.org/package=kinship2} +\\url{https://cran.r-project.org/package=kinship2} for the +previous version of the package. } \section{Functions}{ @@ -30,12 +31,14 @@ given identifiers, sex, affection status(es), and special relationships probability having an allele sampled from two individuals be the same via IBD. -\code{\link[=ped_to_plotdf]{ped_to_plotdf()}} : Method to transform a Pedigree -object into a dataframe of graphical elements. -Allows extra information to be included in the id under the plot symbol - -\code{\link[=plot_fromdf]{plot_fromdf()}} : Method to plot a Pedigree from a -dataframe of graphical elements. +\code{\link[=plot]{plot()}} : Method to transform a Pedigree +object into a graphical plot. +Allows extra information to be included in the id under the +plot symbol. +This method use the \code{\link[=plot_fromdf]{plot_fromdf()}} function to transform the Pedigree +object into a data frame of graphical elements, the same is done for the +legend with the \code{\link[=ped_to_legdf]{ped_to_legdf()}} function. +When done, the data frames are plotted with the \code{\link[=plot_fromdf]{plot_fromdf()}} function. \code{\link[=shrink]{shrink()}}: Shrink a Pedigree to a specific bit size, removing non-informative members first. @@ -62,6 +65,7 @@ library(Pedixplorer) \seealso{ Useful links: \itemize{ + \item \url{https://github.com/LouisLeNezet/Pedixplorer} \item Report bugs at \url{https://github.com/LouisLeLezet/Pedixplorer/issues} } @@ -83,4 +87,3 @@ Other contributors: } } -\keyword{internal} diff --git a/man/Rel-class.Rd b/man/Rel-class.Rd new file mode 100644 index 00000000..dea6d06a --- /dev/null +++ b/man/Rel-class.Rd @@ -0,0 +1,178 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Rel-class} +\alias{Rel-class} +\alias{Rel} +\alias{Rel,data.frame-method} +\alias{Rel,character_OR_integer-method} +\alias{Rel,missing-method} +\alias{code} +\alias{code,Rel-method} +\alias{id1} +\alias{id1,Rel-method} +\alias{id2} +\alias{id2,Rel-method} +\alias{famid,Rel-method} +\alias{famid<-,Rel,character_OR_integer-method} +\alias{summary,Rel-method} +\alias{show,Rel-method} +\alias{as.list,Rel-method} +\alias{as.data.frame,Rel-method} +\alias{subset,Rel-method} +\title{Rel object} +\usage{ +\S4method{Rel}{data.frame}(obj) + +\S4method{Rel}{character_OR_integer}(obj, id2, code, famid = NA_character_) +} +\arguments{ +\item{obj}{A character vector with the id of the first individuals of each +pairs or a \code{data.frame} with all the informations in corresponding columns.} + +\item{id2}{A character vector with the id of the second individuals of each +pairs} + +\item{code}{A character, factor or numeric vector corresponding to +the relation code of the individuals: +\itemize{ +\item MZ twin = Monozygotic twin +\item DZ twin = Dizygotic twin +\item UZ twin = twin of unknown zygosity +\item Spouse = Spouse +The following values are recognized: +\item character() or factor() : "MZ twin", "DZ twin", "UZ twin", "Spouse" with +of without space between the words. The case is not important. +\item numeric() : 1 = "MZ twin", 2 = "DZ twin", 3 = "UZ twin", 4 = "Spouse" +}} + +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} +} +\value{ +A Rel object. +} +\description{ +S4 class to represent the special relationships in a Pedigree. + +\subsection{Constructor :}{ + +You either need to provide a vector of the same size for each slot +or a \code{data.frame} with the corresponding columns. +} +} +\details{ +A Rel object is a list of special relationships +between individuals in the pedigree. +It is used to create a Pedigree object. +The minimal needed informations are \code{id1}, \code{id2} and \code{code}. + +If a \code{famid} is provided, the individuals \code{id} will be aggregated +to the \code{famid} character to ensure the uniqueness of the \code{id}. +} +\section{Slots}{ + +\describe{ +\item{\code{id1}}{A character vector with the id of the first individual.} + +\item{\code{id2}}{A character vector with the id of the second individual.} + +\item{\code{code}}{An ordered factor vector with the code of the special +relationship. + +(i.e. \verb{MZ twin} < \verb{DZ twin} < \verb{UZ twin} < \code{Spouse}).} + +\item{\code{famid}}{A character vector with the famid of the individuals.} +}} + +\section{Accessors}{ + +For all the following accessors, the \code{x} parameters is a Rel object. +Each getters return a vector of the same length as \code{x} with the values +of the corresponding slot. + + +\itemize{ +\item \code{code(x)} : Relationships' code +} + + +\itemize{ +\item \code{id1(x)} : Relationships' first individuals' identifier +} + + +\itemize{ +\item \code{id2(x)} : Relationships' second individuals' identifier +} + + +\itemize{ +\item \code{famid(x)} : Relationships' individuals' family identifier +} + + +\itemize{ +\item \code{famid(x) <- value} : Set the relationships' individuals' family +identifier +\itemize{ +\item \code{value} : A character or integer vector of the same length as x +with the family identifiers +} +} +} + +\section{Generics}{ + +\itemize{ +\item \code{summary(x)}: Compute the summary of a Rel object +} + + +\itemize{ +\item \code{show(x)}: Convert the Rel object to a data.frame +and print it with its summary. +} + + +\itemize{ +\item \code{as.list(x)}: Convert a Rel object to a list +} + + +\itemize{ +\item \code{as.data.frame(x)}: Convert a Rel object to a data.frame +} + + +\itemize{ +\item \code{subset(x, i, keep = TRUE)}: Subset a Rel object +based on the individuals identifiers given. +\itemize{ +\item \code{i} : A vector of individuals identifiers to keep. +\item \code{keep} : A logical value indicating if the individuals +should be kept or deleted. +} +} +} + +\examples{ + +rel_df <- data.frame( + id1 = c("1", "2", "3"), + id2 = c("2", "3", "4"), + code = c(1, 2, 3) +) +Rel(rel_df) + +Rel( + obj = c("1", "2", "3"), + id2 = c("2", "3", "4"), + code = c(1, 2, 3) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} +} diff --git a/man/Scales-class.Rd b/man/Scales-class.Rd new file mode 100644 index 00000000..13cc3d70 --- /dev/null +++ b/man/Scales-class.Rd @@ -0,0 +1,162 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClass.R, R/AllConstructor.R, +% R/AllAccessors.R, R/AllGeneric.R +\docType{class} +\name{Scales-class} +\alias{Scales-class} +\alias{Scales} +\alias{Scales,data.frame,data.frame-method} +\alias{Scales,missing,missing-method} +\alias{fill} +\alias{fill,Scales-method} +\alias{fill<-} +\alias{fill<-,Scales,data.frame-method} +\alias{border} +\alias{border,Scales-method} +\alias{border<-} +\alias{border<-,Scales,data.frame-method} +\alias{as.list,Scales-method} +\title{Scales object} +\usage{ +Scales(fill, border) + +\S4method{Scales}{data.frame,data.frame}(fill, border) +} +\arguments{ +\item{fill}{A data.frame with the informations for the affection status. +The columns needed are: +\itemize{ +\item 'order': the order of the affection to be used +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'affected': a logical value indicating if the mod correspond to an affected +individuals +\item 'fill': the color to use for this mods +\item 'density': the density of the shading +\item 'angle': the angle of the shading +}} + +\item{border}{A data.frame with the informations for the availability status. +The columns needed are: +\itemize{ +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'border': the color to use for this mods +}} +} +\value{ +A Scales object. +} +\description{ +A Scales object is a list of two data.frame. +The first one is used to represent the affection status of the individuals +and therefore the filling of the individuals in the pedigree plot. +The second one is used to represent the availability status of the +individuals and therefore the border color of the individuals in the +pedigree plot. + +\subsection{Constructor :}{ + +You need to provide both \strong{fill} and \strong{border} in the dedicated parameters. +However this is usually done using the \code{\link[=generate_colors]{generate_colors()}} function with a +Pedigree object. +} +} +\section{Slots}{ + +\describe{ +\item{\code{fill}}{A data.frame with the informations for the affection status. +The columns needed are: +\itemize{ +\item 'order': the order of the affection to be used +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'affected': a logical value indicating if the mod correspond to an affected +individuals +\item 'fill': the color to use for this mods +\item 'density': the density of the shading +\item 'angle': the angle of the shading +}} + +\item{\code{border}}{A data.frame with the informations for the availability status. +The columns needed are: +\itemize{ +\item 'column_values': name of the column containing the raw values in the +Ped object +\item 'column_mods': name of the column containing the mods of the transformed +values in the Ped object +\item 'mods': all the different mods +\item 'labels': the corresponding labels of each mods +\item 'border': the color to use for this mods +}} +}} + +\section{Accessors}{ + +\itemize{ +\item \code{fill(x)} : Get the fill data.frame +} + + +\itemize{ +\item \code{fill(x) <- value} : Set the fill data.frame +} + + +\itemize{ +\item \code{border(x)} : Get the border data.frame +} + + +\itemize{ +\item \code{border(x) <- value} : Set the border data.frame +} +} + +\section{Generics}{ + +\itemize{ +\item \code{as.list(x)}: Convert a Scales object to a list +} +} + +\examples{ + +Scales( + fill = data.frame( + order = 1, + column_values = "affected", + column_mods = "affected_mods", + mods = c(0, 1), + labels = c("unaffected", "affected"), + affected = c(FALSE, TRUE), + fill = c("white", "red"), + density = c(NA, 20), + angle = c(NA, 45) + ), + border = data.frame( + column_values = "avail", + column_mods = "avail_mods", + mods = c(0, 1), + labels = c("not available", "available"), + border = c("black", "blue") + ) +) +} +\seealso{ +\code{\link[=Pedigree]{Pedigree()}} + +\code{\link[=generate_colors]{generate_colors()}} +} diff --git a/man/align.Rd b/man/align.Rd index af9c6199..a5494a3c 100644 --- a/man/align.Rd +++ b/man/align.Rd @@ -1,77 +1,64 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/align.R +\docType{methods} \name{align} \alias{align} -\title{Generate plotting information for a Pedigree} +\alias{align,Pedigree-method} +\title{Align a Pedigree object} \usage{ -align( - ped, +\S4method{align}{Pedigree}( + obj, packed = TRUE, width = 10, align = TRUE, - hints = ped$hints, - missid = "0" + hints = NULL, + missid = "NA_character_" ) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{width}{for a packed output, the minimum width of the plot, in +\item{width}{For a packed output, the minimum width of the plot, in inches.} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}. If \code{NULL} then the Hints stored in \strong{obj} will be used.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ A list with components \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n}: A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid}: A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos}: A matrix giving the horizontal position of each plot point +\item \code{fam}: A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values +\item \code{spouse}: A matrix with values \itemize{ \item \code{0} = not a spouse \item \code{1} = subject plotted to the immediate right is a spouse \item \code{2} = subject plotted to the immediate right is an inbred spouse } -\item twins Optional matrix which will only be present if the Pedigree +\item \code{twins}: Optional matrix which will only be present if the Pedigree contains twins : \itemize{ \item \code{0} = not a twin @@ -87,16 +74,22 @@ layout of a plot of the Pedigree. } \details{ This is an internal routine, used almost exclusively by -\code{ped_to_plotdf()}. The subservient functions \code{auto_hint()}, -\code{alignped1()}, \code{alignped2()}, -\code{alignped3()}, and \code{alignped4()} +\code{\link[=ped_to_plotdf]{ped_to_plotdf()}}. + +The subservient functions \code{\link[=auto_hint]{auto_hint()}}, +\code{\link[=alignped1]{alignped1()}}, \code{\link[=alignped2]{alignped2()}}, +\code{\link[=alignped3]{alignped3()}}, and \code{\link[=alignped4]{alignped4()}} contain the bulk of the computation. -If the \strong{hints} are missing the \code{auto_hint()} routine is called to + +If the \strong{hints} are missing the \code{\link[=auto_hint]{auto_hint()}} routine is called to supply an initial guess. -If multiple families are present in the Pedigree, this routine is called -once for each family, and the results are combined in the list returned. -For more information you can read the associated vignette:align -\code{vignette("alignement_details")}. + +If multiple families are present in the \strong{obj} Pedigree, this routine +is called once for each family, and the results are combined in the +list returned. + +For more information you can read the associated vignette: +\code{vignette("pedigree_alignment")}. } \examples{ data(sampleped) diff --git a/man/alignped1.Rd b/man/alignped1.Rd index e9ae73cd..161e3b62 100644 --- a/man/alignped1.Rd +++ b/man/alignped1.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped1.R \name{alignped1} \alias{alignped1} -\title{First routine alignement} +\title{Alignment first routine} \usage{ alignped1(idx, dadx, momx, level, horder, packed, spouselist) } @@ -15,58 +15,69 @@ alignped1(idx, dadx, momx, level, horder, packed, spouselist) \item{level}{Vector of the level of each subject} -\item{horder}{Vector of the horizontal order of each subject} +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{spouselist}{Matrix of the spouses with one row per hinted marriage, -usually only a few marriages in a pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +\item{spouselist}{Matrix of spouses with 4 columns: +\itemize{ +\item \code{1}: husband index +\item \code{2}: wife index +\item \code{3}: husband anchor +\item \code{4}: wife anchor +}} } \value{ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ -First alignement routine which create the subtree founded on a single +First alignment routine which create the subtree founded on a single subject as though it were the only tree. } \details{ -\enumerate{ -\item In this routine the \strong{nid} array consists of the final +In this routine the \strong{nid} array consists of the final \verb{nid array + 1/2} of the final spouse array. Note that the \strong{spouselist} matrix will only contain spouse pairs that are not yet processed. The logic for anchoring is slightly tricky. -First, if row 4 of the spouselist matrix is 0, we anchor at the first +\subsection{1. Anchoring:}{ + +First, if col 4 of the spouselist matrix is 0, we anchor at the first opportunity. Also note that if \code{spouselist[, 3] == spouselist[, 4]} it is the husband who is the anchor (just write out the possibilities). -\item Create the set of 3 return structures, which will be matrices +} + +\subsection{2. Return values initialization:}{ + +Create the set of 3 return structures, which will be matrices with \code{1 + nspouse} columns. If there are children then other routines will widen the result. -\item Create the two complimentary lists \strong{lspouse} and \strong{rspouse} -to denote those plotted on the left and on the right. +} + +\subsection{3. Create \strong{lspouse} and \strong{rspouse}:}{ + +This two complimentary lists denote the spouses plotted on the left +and on the right. For someone with lots of spouses we try to split them evenly. If the number of spouses is odd, then men should have more on the right than on the left, women more on the right. @@ -79,11 +90,19 @@ by plotting canine data, lspouse could initially be empty but \code{length(rspouse) > 1}. This caused \code{nleft > length(indx)}. A fix was to not let \strong{indx} to be indexed beyond its length, fix by JPS 5/2013. -\item For each spouse get the list of children. If there are any we +} + +\subsection{4. List the children:}{ + +For each spouse get the list of children. If there are any we call \code{\link[=alignped2]{alignped2()}} to generate their tree and then mark the connection to their parent. If multiple marriages have children we need to join the trees. -\item To finish up we need to splice together the tree made up from +} + +\subsection{5. Splice the tree:}{ + +To finish up we need to splice together the tree made up from all the kids, which only has data from \code{lev + 1} down, with the data here. There are 3 cases: \enumerate{ @@ -103,3 +122,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped2.Rd b/man/alignped2.Rd index ade26d80..45dddde9 100644 --- a/man/alignped2.Rd +++ b/man/alignped2.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped2.R \name{alignped2} \alias{alignped2} -\title{Second routine alignement} +\title{Alignment second routine} \usage{ alignped2(idx, dadx, momx, level, horder, packed, spouselist) } @@ -15,38 +15,40 @@ alignped2(idx, dadx, momx, level, horder, packed, spouselist) \item{level}{Vector of the level of each subject} -\item{horder}{Vector of the horizontal order of each subject} +\item{horder}{A named numeric vector with one element per subject in the +Pedigree. It determines the relative horizontal order of subjects within a +sibship, as well as the relative order of processing for the founder couples. +(For this latter, the female founders are ordered as though +they were sisters). +The names of the vector should be the individual identifiers.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{spouselist}{Matrix of the spouses with one row per hinted marriage, -usually only a few marriages in a pedigree will need an added hint, for -instance reverse the plot order of a husband/wife pair. -Each row contains the index of the left spouse, the right hand spouse -and the anchor (i.e : \code{1} = left, \code{2} = right, \code{0} = either).} +\item{spouselist}{Matrix of spouses with 4 columns: +\itemize{ +\item \code{1}: husband index +\item \code{2}: wife index +\item \code{3}: husband anchor +\item \code{4}: wife anchor +}} } \value{ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ @@ -63,7 +65,7 @@ The code below has one non-obvious special case. Suppose that two sibs marry. When the first sib is processed by \code{alignped1} then both partners (and any children) will be added to the rval structure below. When the second sib is processed they will come back as a 1 element tree -(the marriage will no longer be on the spouselist), which should be added +(the marriage will no longer be on the \strong{spouselist}), which should be added onto rval. The rule thus is to not add any 1 element tree whose value (which must be \code{idx[i]} is already in the rval structure for this level. } @@ -74,5 +76,7 @@ align(ped) } \seealso{ -\code{\link[=align]{align()}}, \code{\link[=alignped1]{alignped1()}} +\code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped3.Rd b/man/alignped3.Rd index f399eb32..f1da616c 100644 --- a/man/alignped3.Rd +++ b/man/alignped3.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/alignped3.R \name{alignped3} \alias{alignped3} -\title{Third routine alignement} +\title{Alignment third routine} \usage{ alignped3(alt1, alt2, packed, space = 1) } \arguments{ -\item{alt1}{Alignement of the first tree} +\item{alt1}{Alignment of the first tree} -\item{alt2}{Alignement of the second tree} +\item{alt2}{Alignment of the second tree} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} \item{space}{Space between two subjects} } @@ -22,21 +22,16 @@ A list containing the elements to plot the Pedigree. It contains a set of matrices along with the spouselist matrix. The latter has marriages removed as they are processed. \itemize{ -\item n A vector giving the number of subjects on each horizonal level of the +\item \code{n} : A vector giving the number of subjects on each horizonal level of the plot -\item nid A matrix with one row for each level, giving the numeric id of +\item \code{nid} : A matrix with one row for each level, giving the numeric id of each subject plotted. (A value of \code{17} means the 17th subject in the Pedigree). -\item pos A matrix giving the horizontal position of each plot point -\item fam A matrix giving the family id of each plot point. +\item \code{pos} : A matrix giving the horizontal position of each plot point +\item \code{fam} : A matrix giving the family id of each plot point. A value of \code{3} would mean that the two subjects in positions 3 and 4, in the row above, are this subject's parents. -\item spouse A matrix with values -\itemize{ -\item \code{0} = not a spouse -\item \code{1} = subject plotted to the immediate right is a spouse -\item \code{2} = subject plotted to the immediate right is an inbred spouse -} +\item \code{spouselist} : Spouse matrix with anchors informations } } \description{ @@ -49,8 +44,8 @@ the left tree is the same as the leftmost person in the right tree; we need not plot two copies of the same person side by side. (When initializing the output structures do not worry about this, there is no harm if they are a column bigger than finally needed.) -Beyond that the work is simple bookkeeping. -\subsection{Slide}{ +Beyond that the work is simple book keeping. +\subsection{1. Slide:}{ For the unpacked case, which is the traditional way to draw a Pedigree when we can assume the paper is infinitely wide, all parents are @@ -59,7 +54,7 @@ merged as solid blocks. On input they both have a left margin of 0. Compute how far over we have to slide the right tree. } -\subsection{Merge}{ +\subsection{2. Merge:}{ Now merge the two trees. Start at the top level and work down. } @@ -73,3 +68,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/alignped4.Rd b/man/alignped4.Rd index b9c797bb..bfa9365d 100644 --- a/man/alignped4.Rd +++ b/man/alignped4.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/alignped4.R \name{alignped4} \alias{alignped4} -\title{Fourth and last routine alignement} +\title{Alignment fourth routine} \usage{ alignped4(rval, spouse, level, width, align) } @@ -15,15 +15,15 @@ the subject is a spouse or not.} \item{level}{Vector of the level of each subject} -\item{width}{for a packed output, the minimum width of the plot, in +\item{width}{For a packed output, the minimum width of the plot, in inches.} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} } \value{ @@ -49,14 +49,14 @@ line, if the user suggestion is too low it is increased to that For each set of siblings \code{x} with parents at \code{p_1} and \code{p_2} the alignment penalty is : -\eqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} +\deqn{(1/k^a)\sum{i=1}{k} (x_i - (p_1 + p_2)^2} where \code{k} is the number of siblings in the set. } Using the fact that when \code{a = 1} : -\eqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} +\deqn{\sum(x_i-c)^2 = \sum(x_i-\mu)^2 + k(c-\mu)^2} then moving a sibship with \code{k} sibs one unit to the left or right of optimal will incur the same cost as moving one with only 1 or @@ -99,3 +99,5 @@ align(ped) \seealso{ \code{\link[=align]{align()}} } +\keyword{alignment} +\keyword{internal,} diff --git a/man/ancestors.Rd b/man/ancestors.Rd index 6293c067..5635f530 100644 --- a/man/ancestors.Rd +++ b/man/ancestors.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/align.R \name{ancestors} \alias{ancestors} -\title{Routine function to get ancestors of a subject} +\title{Ancestors indexes of a subject} \usage{ ancestors(idx, momx, dadx) } @@ -20,11 +20,16 @@ A vector of ancestor indexes Given the index of one or multiple individual(s), this function iterate through the mom and dad indexes to list out all the ancestors of the said individual(s). -This function is use in the \code{align()} function to +This function is use in the \code{\link[=align]{align()}} function to identify which spouse pairs has a common ancestor and therefore if they need to be connected with a double line (i.e. inbred). } +\examples{ +ancestors(c(1), c(3, 4, 5, 6), c(7, 8, 9, 10)) +ancestors(c(1, 2), c(3, 4, 5, 6), c(7, 8, 9, 10)) +} \seealso{ \code{\link[=align]{align()}} } +\keyword{internal} diff --git a/man/anchor_to_factor.Rd b/man/anchor_to_factor.Rd new file mode 100644 index 00000000..b20ea140 --- /dev/null +++ b/man/anchor_to_factor.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{anchor_to_factor} +\alias{anchor_to_factor} +\title{Anchor variable to ordered factor} +\usage{ +anchor_to_factor(anchor) +} +\arguments{ +\item{anchor}{A character, factor or numeric vector corresponding to +the anchor of the individuals. The following values are recognized: +\itemize{ +\item character() or factor() : "0", "1", "2", "left", "right", "either" +\item numeric() : 1 = "left", 2 = "right", 0 = "either" +}} +} +\value{ +An ordered factor vector containing the transformed variable +"either" < "left" < "right" +} +\description{ +Anchor variable to ordered factor +} +\examples{ +anchor_to_factor(c(1, 2, 0, "left", "right", "either")) +} diff --git a/man/as.data.frame-Pedigree-method.Rd b/man/as.data.frame-Pedigree-method.Rd deleted file mode 100644 index 2b6c0754..00000000 --- a/man/as.data.frame-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{as.data.frame,Pedigree-method} -\alias{as.data.frame,Pedigree-method} -\title{Convert a Pedigree object to a data.frame} -\usage{ -\S4method{as.data.frame}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -A data.frame with the individuals informations. -} -\description{ -Convert a Pedigree object to a data.frame -} diff --git a/man/as.list-Pedigree-method.Rd b/man/as.list-Pedigree-method.Rd deleted file mode 100644 index 5a6660dd..00000000 --- a/man/as.list-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{as.list,Pedigree-method} -\alias{as.list,Pedigree-method} -\title{Convert a Pedigree object to a list} -\usage{ -\S4method{as.list}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -A list with all the slots of the Pedigree object. -} -\description{ -Convert a Pedigree object to a list -} diff --git a/man/auto_hint.Rd b/man/auto_hint.Rd index c53a6183..786c966c 100644 --- a/man/auto_hint.Rd +++ b/man/auto_hint.Rd @@ -2,53 +2,40 @@ % Please edit documentation in R/auto_hint.R \name{auto_hint} \alias{auto_hint} -\title{First initial guess for the alignment of a Pedigree} +\alias{auto_hint,Pedigree-method} +\title{Initial hint for a Pedigree alignment} \usage{ -auto_hint(ped, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE) +\S4method{auto_hint}{Pedigree}(obj, hints = NULL, packed = TRUE, align = FALSE, reset = FALSE) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} +\item{hints}{A Hints object or a named list containing \code{horder} and +\code{spouse}. If \code{NULL} then the Hints stored in \strong{obj} will be used.} -\item{packed}{Should the Pedigree be compressed, i.e., allow diagonal -lines connecting parents to children in order to have a smaller overall -width for the plot.} +\item{packed}{Should the Pedigree be compressed. +(i.e. allow diagonal lines connecting parents to children in order +to have a smaller overall width for the plot.)} -\item{align}{for a packed Pedigree, align children under parents \code{TRUE}, +\item{align}{For a packed Pedigree, align children under parents \code{TRUE}, to the extent possible given the page width, or align to to the left margin \code{FALSE}. This argument can be a two element vector, giving the alignment parameters, or a logical value. -If \code{TRUE}, the default is \code{c(1.5, 2)}, or numeric the routine +If \code{TRUE}, the default is \code{c(1.5, 2)}, or if numeric the routine \code{alignped4()} will be called.} -\item{reset}{If \code{TRUE}, then even if \code{ped} object has hints, reset -them to the initial values} +\item{reset}{If \code{TRUE}, then even if the Ped object has Hints, reset +them to the initial values.} } \value{ -The \strong{hints} list containing components \code{order} and \code{spouse} +The initial \linkS4class{Hints} object. } \description{ Compute an initial guess for the alignment of a Pedigree } \details{ -A Pedigree structure can contain a \code{hints} object which helps to +A Pedigree structure can contain a \linkS4class{Hints} object which helps to reorder the Pedigree (e.g. left-to-right order of children within family) so as to plot with minimal distortion. This routine is used to create an initial version of the hints. They can then be modified if desired. @@ -57,22 +44,24 @@ This routine would not normally be called by a user. It moves children within families, so that marriages are on the "edge" of a set children, closest to the spouse. For pedigrees that have only a single connection between two families this simple-minded approach works surprisingly well. -For more complex structures hand-tuning of the hints matrix may be required. +For more complex structures hand-tuning of the hints may be required. -The Pedigree in the example below is one where rearranging the founders -greatly decreases the number of extra connections. When \code{auto_hint()} is -called with a a vector of numbers as the second argument, the values for the -founder females are used to order the founder families left to right across -the plot. The values within a sibship are used as the preliminary order of +When \code{auto_hint()} is called with a a vector of numbers as the \strong{hints} +argument, the values for the founder females are used to order the founder +families left to right across the plot. +The values within a sibship are used as the preliminary order of siblings within a family; this may be changed to move one of them to the edge so as to match up with a spouse. The actual values in the vector are not important, only their order. } \examples{ data(sampleped) -ped <- Pedigree(sampleped[sampleped$family == 1, ]) +ped <- Pedigree(sampleped[sampleped$famid == 1, ]) auto_hint(ped) } \seealso{ -\code{\link[=align]{align()}}, \code{\link[=best_hint]{best_hint()}} +\code{\link[=align]{align()}}, \code{\link[=best_hint]{best_hint()}}, \linkS4class{Hints} } +\keyword{alignment,} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/best_hint.Rd b/man/best_hint.Rd index 0eafb54e..eaf0ea81 100644 --- a/man/best_hint.Rd +++ b/man/best_hint.Rd @@ -2,37 +2,41 @@ % Please edit documentation in R/best_hint.R \name{best_hint} \alias{best_hint} -\title{Best hint for alignement} +\alias{best_hint,Pedigree-method} +\title{Best hint for a Pedigree alignment} \usage{ -best_hint(ped, wt = c(1000, 10, 1), tolerance = 0) +\S4method{best_hint}{Pedigree}(obj, wt = c(1000, 10, 1), tolerance = 0) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} -\item{wt}{A vector of three weights for the three error measures -\itemize{ +\item{wt}{A vector of three weights for the three error measures. +Default is \code{c(1000, 10, 1)}. +\enumerate{ \item The number of duplicate individuals in the plot \item The sum of the absolute values of the differences in the positions of duplicate individuals \item The sum of the absolute values of the differences between -the center of the children and the parents -Default is \code{c(1000, 10, 1)}. +the center of the children and the parents. }} -\item{tolerance}{The maximum stress level to accept. Default is \code{0}} +\item{tolerance}{The maximum stress level to accept. +Default is \code{0}} } \value{ -The best hint object out of all the permutations +The best Hints object out of all the permutations } \description{ -When computer time is cheap, use this routine to get a 'best' Pedigree. +When computer time is cheap, use this routine to get a \emph{best} +Pedigree alignment. This routine will try all possible founder orders, and return the one -with the least 'stress'. +with the least \strong{stress}. } \details{ -The auto_hint routine will rearrange sibling order, but not founder order. -This calls auto_hint with every possible founder order, and finds that -plot with the least 'stress'. +The \code{\link[=auto_hint]{auto_hint()}} routine will rearrange sibling order, but not +founder order. +This calls \code{\link[=auto_hint]{auto_hint()}} with every possible founder order, and finds that +plot with the least "stress". The stress is computed as a weighted sum of three error measures: \itemize{ \item nbArcs The number of duplicate individuals in the plot @@ -42,10 +46,10 @@ positions of duplicate individuals the center of the children and the parents } -\eqn{stress = - wt[1] \times nbArcs + - wt[2] \times lgArcs + - wt[3] \times lgParentsChilds +\deqn{stress = + wt[1] * nbArcs + + wt[2] * lgArcs + + wt[3] * lgParentsChilds } If during the search, a plot is found with a stress level less than @@ -53,9 +57,11 @@ If during the search, a plot is found with a stress level less than } \examples{ data(sampleped) -ped <- Pedigree(sampleped[sampleped$family == 1,]) +ped <- Pedigree(sampleped[sampleped$famid == 1,]) best_hint(ped) } \seealso{ -\code{\link[=auto_hint]{auto_hint()}} +\code{\link[=auto_hint]{auto_hint()}}, \code{\link[=align]{align()}} } +\keyword{alignment,} +\keyword{auto_hint} diff --git a/man/bit_size.Rd b/man/bit_size.Rd index e398c5b4..4c2d1da1 100644 --- a/man/bit_size.Rd +++ b/man/bit_size.Rd @@ -1,51 +1,51 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bit_size.R -\docType{methods} \name{bit_size} \alias{bit_size} -\alias{bit_size,character-method} -\alias{bit_size,character} +\alias{bit_size,character_OR_integer-method} \alias{bit_size,Pedigree-method} -\alias{bit_size,Pedigree} -\title{Get Pedigree bit_size} +\alias{bit_size,Ped-method} +\title{Bit size of a Pedigree} \usage{ -bit_size(obj, ...) +\S4method{bit_size}{character_OR_integer}(obj, momid, missid = NA_character_) -\S4method{bit_size}{character}(obj, momid, missid = "0") +\S4method{bit_size}{Pedigree}(obj) -\S4method{bit_size}{Pedigree}(obj, missid = "0") +\S4method{bit_size}{Ped}(obj) } \arguments{ -\item{obj}{A Pedigree object or a vector of fathers identifierss} +\item{obj}{A Ped or Pedigree object or a vector of fathers identifiers} -\item{...}{Additional arguments passed to methods} - -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ A list with the following components: \itemize{ -\item bit_size The bit_size of input Pedigree +\item bit_size The bit size of the Pedigree \item nFounder The number of founders in the Pedigree -\item nNonFounder The number of nonfounders in the Pedigree +\item nNonFounder The number of non founders in the Pedigree } } \description{ -Calculate Pedigree bit_size, defined as : +Utility function used in the \code{shrink()} function +to calculate the bit size of a Pedigree. +} +\details{ +The bit size of a Pedigree is defined as : -\eqn{ +\deqn{ 2 \times NbNonFounders - NbFounders } -} -\details{ -This is a utility function used in \code{shrink()} -to calculate the bit_size of a Pedigree. + +Where \code{NbNonFounders} is the number of non founders in the Pedigree +(i.e. individuals with identified parents) and +\code{NbFounders} is the number of founders in the Pedigree +(i.e. individuals without identified parents). } \examples{ data(sampleped) @@ -55,4 +55,5 @@ bit_size(ped) \seealso{ \code{\link[=shrink]{shrink()}} } -\keyword{internal} +\keyword{internal,} +\keyword{shrink} diff --git a/man/check_columns.Rd b/man/check_columns.Rd index 89250e3d..0791d4df 100644 --- a/man/check_columns.Rd +++ b/man/check_columns.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{check_columns} \alias{check_columns} -\title{Check for columns name usage} +\title{Check columns presence in a dataframe} \usage{ check_columns( df, @@ -65,8 +65,11 @@ df <- data.frame(ColN1 = c(1, 2), ColN2 = 4, ColU1 = 'B', ColU2 = '1', ColTU1 = 'A', ColTU2 = 3, ColNR1 = 4, ColNR2 = 5) -tryCatch(check_columns(df, c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), - c('ColTU1', 'ColTU2')), error = function(e) print(e)) +tryCatch( + check_columns(df, + c('ColN1', 'ColN2'), c('ColU1', 'ColU2'), + c('ColTU1', 'ColTU2') +), error = function(e) print(e)) } \keyword{internal} diff --git a/man/check_hints.Rd b/man/check_hints.Rd deleted file mode 100644 index 149f2d99..00000000 --- a/man/check_hints.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check_hints.R -\name{check_hints} -\alias{check_hints} -\title{Detect hints inconsistencies} -\usage{ -check_hints(hints, sex) -} -\arguments{ -\item{hints}{Plotting hints for the Pedigree. -This is a list with components \code{order} and \code{spouse}, the second one -is optional. -\itemize{ -\item \strong{order} is a numeric vector with one element per subject in the -Pedigree. It determines the relative order of subjects within a sibship, as -well as the relative order of processing for the founder couples. (For this -latter, the female founders are ordered as though they were sisters). -\item \strong{spouse} is a matrix with one row per hinted marriage, usually -only a few marriages in a pedigree will need an added hint, for instance -reverse the plot order of a husband/wife pair. Each row contains the -index of the left spouse, the right hand spouse, and the anchor -(i.e : \code{1} = left, \code{2} = right, \code{0} = either). -Children will preferentially appear under the parents of the anchored -spouse. -}} - -\item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: -\itemize{ -\item character() or factor() : "f", "m", "woman", "man", "male", "female", -"unknown", "terminated" -\item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" -}} -} -\value{ -Nothing, but will stop if there is a problem. -} -\description{ -This routine tries to detect inconsistencies in spousal hints. -} -\details{ -These arise in \code{auto_hint()} with complex Pedigrees. -One can have ABA (subject A is on both the left and the right of B), -cycles, etc. -Users can introduce problems as well if they modify the hints. -} -\examples{ -data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) -ht1 <- auto_hint(ped1) -check_hints(ht1, ped1$ped$sex) - -} -\seealso{ -\code{\link[=auto_hint]{auto_hint()}}, \code{\link[=best_hint]{best_hint()}} -} -\keyword{internal} diff --git a/man/check_num_na.Rd b/man/check_num_na.Rd index 3e0627cd..0ee6d4c3 100644 --- a/man/check_num_na.Rd +++ b/man/check_num_na.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{check_num_na} \alias{check_num_na} -\title{Check is numeric} +\title{Is numeric or NA} \usage{ check_num_na(var, na_as_num = TRUE) } @@ -13,13 +13,13 @@ check_num_na(var, na_as_num = TRUE) considered as numerical values} } \value{ -A vector of boolean of the same size as \code{var} +A vector of boolean of the same size as \strong{var} } \description{ Check if a variable given is numeric or NA } \details{ -Check if the values in \code{var} are numeric or if they are -NA in the case that \code{na_as_num} is set to TRUE. +Check if the values in \strong{var} are numeric or if they are +\code{NA} in the case that \code{na_as_num} is set to TRUE. } \keyword{internal} diff --git a/man/check_slot_fd.Rd b/man/check_slot_fd.Rd index 0731ac6b..51ac5eb3 100644 --- a/man/check_slot_fd.Rd +++ b/man/check_slot_fd.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R +% Please edit documentation in R/AllValidity.R \name{check_slot_fd} \alias{check_slot_fd} \title{Check if the fields are present in an object slot} diff --git a/man/check_values.Rd b/man/check_values.Rd index e2eb9b15..c0cc1589 100644 --- a/man/check_values.Rd +++ b/man/check_values.Rd @@ -1,22 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R +% Please edit documentation in R/AllValidity.R \name{check_values} \alias{check_values} \title{Check values in a slot} \usage{ -check_values(val, ref, present = TRUE) +check_values(val, ref, name = NULL, present = TRUE) } \arguments{ -\item{present}{A logical value indicating if the values should be present -or not} - -\item{obj}{An object.} +\item{val}{A vector of values to check.} -\item{slot}{A slot of the object.} +\item{ref}{A vector of reference values.} -\item{column}{A column of the slot.} +\item{name}{A character vector with the name of the values to check.} -\item{values}{A vector of values to check.} +\item{present}{A logical value indicating if the values should be present +or not} } \value{ A character vector with the errors if any. diff --git a/man/circfun.Rd b/man/circfun.Rd index 31819db8..04885b73 100644 --- a/man/circfun.Rd +++ b/man/circfun.Rd @@ -2,19 +2,27 @@ % Please edit documentation in R/plot_fct.R \name{circfun} \alias{circfun} -\title{Generate a circular element} +\title{Circular element} \usage{ circfun(nslice, n = 50) } \arguments{ -\item{nslice}{number of slices in the circle} +\item{nslice}{Number of slices in the circle} \item{n}{Total number of points in the circle} } \value{ -a list of x and y coordinates +A list of x and y coordinates per slice. } \description{ -Generate a circular element +Create a list of x and y coordinates for a circle +with a given number of slices. } -\keyword{internal} +\examples{ + +circfun(1) +circfun(1, 10) +circfun(4, 50) +} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/descendants.Rd b/man/descendants.Rd index 49308efe..420cbb35 100644 --- a/man/descendants.Rd +++ b/man/descendants.Rd @@ -1,31 +1,28 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/descendants.R -\docType{methods} \name{descendants} \alias{descendants} -\alias{descendants,character,character-method} -\alias{descendants,character} -\alias{descendants,character,Pedigree-method} -\alias{descendants,Pedigree} -\title{Find all the descendants} +\alias{descendants,character_OR_integer,character_OR_integer-method} +\alias{descendants,character_OR_integer,Pedigree-method} +\alias{descendants,character_OR_integer,Ped-method} +\title{Descendants of individuals} \usage{ -descendants(idlist, obj, ...) +\S4method{descendants}{character_OR_integer,character_OR_integer}(idlist, obj, dadid, momid) -\S4method{descendants}{character,character}(idlist, obj, dadid, momid) +\S4method{descendants}{character_OR_integer,Pedigree}(idlist, obj) -\S4method{descendants}{character,Pedigree}(idlist, obj) +\S4method{descendants}{character_OR_integer,Ped}(idlist, obj) } \arguments{ \item{idlist}{List of individuals identifiers to be considered} -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A Ped or Pedigree object or a vector of the +individuals identifiers.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -34,7 +31,7 @@ The list is not ordered. } \description{ Find all the descendants of a particular list of individuals -given a Pedigree +given a Pedigree object. } \examples{ data("sampleped") diff --git a/man/draw_arc.Rd b/man/draw_arc.Rd index 1a0be3c5..328d8f0c 100644 --- a/man/draw_arc.Rd +++ b/man/draw_arc.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_arc} \alias{draw_arc} -\title{Draw arcs for multiple instances of a subject} +\title{Draw arcs} \usage{ draw_arc(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black") } @@ -17,16 +17,18 @@ draw_arc(x0, y0, x1, y1, p, ggplot_gen = FALSE, lwd = 1, col = "black") \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{lwd}{line width} +\item{lwd}{Line width} -\item{col}{line color} +\item{col}{Line color} } \value{ -Plot the arcs or add it to a ggplot object +Plot the arcs to the current device +or add it to a ggplot object } \description{ -Draw arcs for multiple instances of a subject +Draw arcs } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_polygon.Rd b/man/draw_polygon.Rd index 9dbdf111..7135bc60 100644 --- a/man/draw_polygon.Rd +++ b/man/draw_polygon.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_polygon} \alias{draw_polygon} -\title{Draw a polygon for a Pedigree} +\title{Draw a polygon} \usage{ draw_polygon( x, @@ -22,20 +22,22 @@ draw_polygon( \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{fill}{fill color} +\item{fill}{Fill color} -\item{border}{border color} +\item{border}{Border color} -\item{density}{density of shading} +\item{density}{Density of shading} -\item{angle}{angle of shading} +\item{angle}{Angle of shading} } \value{ -Plot the polygon or add it to a ggplot object +Plot the polygon to the current device +or add it to a ggplot object } \description{ -Draw a polygon for a Pedigree +Draw a polygon } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_segment.Rd b/man/draw_segment.Rd index 4eeee997..804b836a 100644 --- a/man/draw_segment.Rd +++ b/man/draw_segment.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_segment} \alias{draw_segment} -\title{Draw segments for a Pedigree} +\title{Draw segments} \usage{ draw_segment( x0, @@ -27,18 +27,20 @@ draw_segment( \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{col}{line color} +\item{col}{Line color} -\item{lwd}{line width} +\item{lwd}{Line width} -\item{lty}{line type} +\item{lty}{Line type} } \value{ -Plot the segments or add it to a ggplot object +Plot the segments to the current device +or add it to a ggplot object } \description{ -Draw segments for a Pedigree +Draw segments } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/draw_text.Rd b/man/draw_text.Rd index e6476e2f..4aedd8b3 100644 --- a/man/draw_text.Rd +++ b/man/draw_text.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_fct.R \name{draw_text} \alias{draw_text} -\title{Draw text for a Pedigree} +\title{Draw texts} \usage{ draw_text( x, @@ -21,24 +21,26 @@ draw_text( \item{y}{y coordinates} -\item{label}{text to be displayed} +\item{label}{Text to be displayed} \item{p}{ggplot object} -\item{ggplot_gen}{logical, if TRUE add the segments to the ggplot object} +\item{ggplot_gen}{If TRUE add the segments to the ggplot object} -\item{cex}{character expansion of the text} +\item{cex}{Character expansion of the text} -\item{col}{text color} +\item{col}{Text color} \item{adjx}{x adjustment} \item{adjy}{y adjustment} } \value{ -Plot the text or add it to a ggplot object +Plot the text to the current device +or add it to a ggplot object } \description{ -Draw text for a Pedigree +Draw texts } -\keyword{internal} +\keyword{Pedigree-plot} +\keyword{internal,} diff --git a/man/duporder.Rd b/man/duporder.Rd index 632722bd..a8732d74 100644 --- a/man/duporder.Rd +++ b/man/duporder.Rd @@ -2,39 +2,25 @@ % Please edit documentation in R/auto_hint.R \name{duporder} \alias{duporder} -\title{Routine to find the duplicate pairs of a subject} +\title{Find the duplicate pairs of a subject} \usage{ -duporder(idlist, plist, lev, ped) +duporder(idlist, plist, lev, obj) } \arguments{ \item{idlist}{List of individuals identifiers to be considered} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ A matrix of duplicate pairs } \description{ -Routine to find the duplicate pairs of a subject +Find the duplicate pairs of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -44,4 +30,5 @@ the order they should be plotted. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/exclude_stray_marryin.Rd b/man/exclude_stray_marryin.Rd index 86cb95c2..57dbd3e2 100644 --- a/man/exclude_stray_marryin.Rd +++ b/man/exclude_stray_marryin.Rd @@ -7,12 +7,12 @@ exclude_stray_marryin(id, dadid, momid) } \arguments{ -\item{id}{Vector of subject identifiers} +\item{id}{A character vector with the identifiers of each individuals} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -20,6 +20,10 @@ Returns a data frame of subject identifiers and their parents. The data frame is trimmed of any founders who are not parents. } \description{ -Exclude from a Pedigree any founders who are not parents. +Exclude any founders who are not parents. } -\keyword{internal} +\seealso{ +\code{\link[=shrink]{shrink()}} +} +\keyword{internal,} +\keyword{shrink} diff --git a/man/exclude_unavail_founders.Rd b/man/exclude_unavail_founders.Rd index 5da23d06..0b7856ce 100644 --- a/man/exclude_unavail_founders.Rd +++ b/man/exclude_unavail_founders.Rd @@ -4,26 +4,24 @@ \alias{exclude_unavail_founders} \title{Exclude unavailable founders} \usage{ -exclude_unavail_founders(id, dadid, momid, avail, missid = "0") +exclude_unavail_founders(id, dadid, momid, avail, missid = NA_character_) } \arguments{ -\item{id}{Vector of subject identifiers} +\item{id}{A character vector with the identifiers of each individuals} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{missid}{Character defining the missing ids} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ Returns a list with the following components: @@ -36,6 +34,10 @@ Returns a list with the following components: } } \description{ -Exclude from a Pedigree any unavailable founders. +Exclude any unavailable founders. +} +\seealso{ +\code{\link[=shrink]{shrink()}} } -\keyword{internal} +\keyword{internal,} +\keyword{shrink} diff --git a/man/extract-methods.Rd b/man/extract-methods.Rd deleted file mode 100644 index 2b946732..00000000 --- a/man/extract-methods.Rd +++ /dev/null @@ -1,123 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\name{ped} -\alias{ped} -\alias{ped,Pedigree-method} -\alias{rel} -\alias{rel,Pedigree-method} -\alias{scales} -\alias{scales,Pedigree-method} -\alias{hints} -\alias{hints,Pedigree-method} -\alias{show,Pedigree-method} -\alias{summary,Pedigree-method} -\alias{[[,Pedigree,ANY,missing-method} -\alias{$,Pedigree-method} -\alias{$<-,Pedigree-method} -\alias{sub_sel_hints} -\alias{sub_sel_hints,Pedigree-method} -\alias{[,Pedigree,ANY,ANY,ANY-method} -\alias{[,Pedigree,missing,ANY,ANY-method} -\alias{[,Pedigree,ANY,missing,ANY-method} -\title{Pedigree ped accessors} -\usage{ -ped(object) - -rel(object) - -scales(object) - -hints(object) - -\S4method{show}{Pedigree}(object) - -\S4method{summary}{Pedigree}(object) - -\S4method{[[}{Pedigree,ANY,missing}(x, i, j, ..., drop = TRUE) - -\S4method{$}{Pedigree}(x, name) - -\S4method{$}{Pedigree}(x, name) <- value - -sub_sel_hints(hints, index) - -\S4method{[}{Pedigree,ANY,ANY,ANY}(x, i, j, drop = TRUE) - -\S4method{[}{Pedigree,missing,ANY,ANY}(x, i, j, drop = TRUE) - -\S4method{[}{Pedigree,ANY,missing,ANY}(x, i, j, drop = TRUE) -} -\arguments{ -\item{object}{A Pedigree object.} - -\item{x}{A Pedigree object.} - -\item{i}{A vector of individuals id or a vector of index.} - -\item{j}{A vector of columns names.} - -\item{...}{Other arguments.} - -\item{drop}{A logical value indicating if the dimensions should be dropped.} - -\item{name}{A slot name.} - -\item{value}{A vector of values to replace.} - -\item{hints}{A list of hints} - -\item{index}{A vector of index} -} -\value{ -The slot \code{ped} present in the Pedigree object. - -The slot \code{rel} present in the Pedigree object. - -The slot \code{scales} present in the Pedigree object. - -The slot \code{hints} present in the Pedigree object. - -A character vector with the informations about the object. - -A character vector with the summary of the object. - -The slot \code{i} present in the Pedigree object. - -The slot \code{name} present in the Pedigree object. - -The Pedigree object with the slot \code{name} replaced by \code{value}. - -A list of hints subsetted - -A Pedigree object subsetted. - -A Pedigree object subsetted. - -A Pedigree object subsetted. -} -\description{ -Pedigree rel accessors - -Pedigree scales accessors - -Pedigree hints accessors - -Pedigree show method - -Pedigree summary method. - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object - -Replace parts of a Pedigree object - -Subset the hints list based on the index given - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object - -Extract parts of a Pedigree object -} -\keyword{internal} diff --git a/man/family_check.Rd b/man/family_check.Rd index 41792931..100aaf5d 100644 --- a/man/family_check.Rd +++ b/man/family_check.Rd @@ -1,32 +1,31 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/family_check.R -\docType{methods} \name{family_check} \alias{family_check} -\alias{family_check,character-method} -\alias{family_check,character} +\alias{family_check,character_OR_integer-method} \alias{family_check,Pedigree-method} -\alias{family_check,Pedigree} +\alias{family_check,Ped-method} \title{Check family} \usage{ -family_check(obj, ...) - -\S4method{family_check}{character}(obj, dadid, momid, family, newfam) +\S4method{family_check}{character_OR_integer}(obj, dadid, momid, famid, newfam) \S4method{family_check}{Pedigree}(obj) + +\S4method{family_check}{Ped}(obj) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{family}{A vector of family identifiers} +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} \item{newfam}{The result of a call to \code{make_famid()}. If this has already been computed by the user, adding it as an argument shortens the running @@ -34,10 +33,10 @@ time somewhat.} } \value{ a data frame with one row for each unique family id in the -\code{family} argument or the one detected in the Pedigree object. +\code{famid} argument or the one detected in the Pedigree object. Components of the output are: \itemize{ -\item \code{family} : The family id, as entered into the data set +\item \code{famid} : The family id, as entered into the data set \item \code{n} : Number of subjects in the family \item \code{unrelated} : Number of them that appear to be unrelated to anyone else in the entire Pedigree. This is usually marry-ins with no @@ -60,7 +59,7 @@ Error check for a family classification Given a family id vector, also compute the familial grouping from first principles using the parenting data, and compare the results. -The \code{make_famid} function is used to create a de novo family id from the +The \code{\link[=make_famid]{make_famid()}} function is used to create a de novo family id from the parentage data, and this is compared to the family id given in the data. If there are any joins, then an attribute 'join' is attached. @@ -82,19 +81,8 @@ fcheck.combined <- with(sampleped, family_check(id, dadid, momid, rep(1, nrow(sampleped)))) fcheck.combined -# make person 120's father be her son. -sampleped[20, 3] <- 131 -fcheck1.bad <- try( - { - with(sampleped, family_check(id, father, mother, family)) - }, - silent = FALSE -) - -## fcheck1.bad is a try-error - } \seealso{ -\code{\link[=make_famid]{make_famid()}}, \code{\link[=kinship]{kinship()}} +\code{\link[=make_famid]{make_famid()}} } \keyword{internal} diff --git a/man/find_avail_affected.Rd b/man/find_avail_affected.Rd index 46afc347..f8071489 100644 --- a/man/find_avail_affected.Rd +++ b/man/find_avail_affected.Rd @@ -2,31 +2,35 @@ % Please edit documentation in R/find_avail_affected.R \name{find_avail_affected} \alias{find_avail_affected} -\title{Find a single person to trim from a Pedigree whose is available} +\alias{find_avail_affected,Ped-method} +\alias{find_avail_affected,Pedigree-method} +\title{Find single affected and available individual from a Pedigree} \usage{ -find_avail_affected(ped, avail = ped(ped)$avail, affstatus = NA) +\S4method{find_avail_affected}{Ped}(obj, avail = NULL, affected = NULL, affstatus = NA) + +\S4method{find_avail_affected}{Pedigree}(obj, avail = NULL, affected = NULL, affstatus = NA) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} + +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{affstatus}{Affection status to search for.} } \value{ A list is returned with the following components \itemize{ -\item ped The new Pedigree object +\item ped The new Ped object \item newAvail Vector of availability status of trimmed individuals \item idTrimmed Vector of IDs of trimmed individuals -\item isTrimmed logical value indicating whether Pedigree has been trimmed -\item bit_size Bit size of the trimmed Pedigree +\item isTrimmed logical value indicating whether Ped object has been trimmed +\item bit_size Bit size of the trimmed Ped } } \description{ @@ -34,9 +38,12 @@ Finds one subject from among available non-parents with indicated affection status. } \details{ -When used within pedigree.shrink, this function is called with the first +When used within \code{\link[=shrink]{shrink()}}, this function is called with the first affected indicator, if the affected item in the Pedigree is a matrix of multiple affected indicators. + +If \strong{avail} or \strong{affected} is null, then the function will use the +corresponding Ped accessor. } \examples{ data(sampleped) @@ -46,3 +53,5 @@ find_avail_affected(ped, affstatus = 1) \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/find_avail_noninform.Rd b/man/find_avail_noninform.Rd index 66ab9e05..7570b9b7 100644 --- a/man/find_avail_noninform.Rd +++ b/man/find_avail_noninform.Rd @@ -2,39 +2,40 @@ % Please edit documentation in R/find_avail_noninform.R \name{find_avail_noninform} \alias{find_avail_noninform} +\alias{find_avail_noninform,Ped-method} +\alias{find_avail_noninform,Pedigree-method} \title{Find uninformative but available subject} \usage{ -find_avail_noninform(ped, avail = ped(ped)$avail, missid = "0") +\S4method{find_avail_noninform}{Ped}(obj, avail = NULL, affected = NULL) + +\S4method{find_avail_noninform}{Pedigree}(obj, avail = NULL, affected = NULL) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} } \value{ Vector of subject ids who can be removed by having lowest informativeness. } \description{ -Find uninformative but available subject +Finds subjects from among available non-parents with all affection +equal to \code{0}. } \details{ -Find subjects from a Pedigree who are available and uninformative - Identify subjects to remove from a Pedigree who are available but -non-informative. This is the second step to remove subjects in -pedigree.shrink if the Pedigree does not meet the desired bit size. +non-informative (unaffected). This is the second step to remove subjects in +\code{\link[=shrink]{shrink()}} if the Pedigree does not meet the desired bit size. + +If \strong{avail} or \strong{affected} is null, then the function will use the +corresponding Ped accessor. } \examples{ data(sampleped) @@ -45,3 +46,5 @@ find_avail_noninform(ped) \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/find_unavailable.Rd b/man/find_unavailable.Rd index 7ab7250e..c48630f2 100644 --- a/man/find_unavailable.Rd +++ b/man/find_unavailable.Rd @@ -2,31 +2,34 @@ % Please edit documentation in R/find_unavailable.R \name{find_unavailable} \alias{find_unavailable} +\alias{find_unavailable,Ped-method} +\alias{find_unavailable,Pedigree-method} \title{Find unavailable subjects in a Pedigree} \usage{ -find_unavailable(ped, avail = ped(ped)$avail) +\S4method{find_unavailable}{Ped}(obj, avail = NULL) + +\S4method{find_unavailable}{Pedigree}(obj, avail = NULL) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Ped or Pedigree object.} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} } \value{ Returns a vector of subject ids for who can be removed. } \description{ -Find the ID of subjects in a Pedigree iteratively, as anyone who is not -available and does not have an available descendant by successively removing -unavailable terminal nodes. +Find the identifiers of subjects in a Pedigree iteratively, +as anyone who is not available and does not have an available +descendant by successively removing unavailable terminal nodes. } \details{ +If \strong{avail} is null, then the function will use the +corresponding Ped accessor. + Originally written as pedTrim by Steve Iturria, modified by Dan Schaid 2007, and now split into the two separate functions: \code{find_unavailable()}, and \code{trim()} to do the tasks separately. @@ -38,18 +41,22 @@ isolated after trimming their unavailable offspring, and If the subject ids are character, make sure none of the characters in the ids is a colon (":"), which is a special character used to concatenate and split subjects within the utility. +The \code{trim()} functions is now replaced by the \code{subset()} function. } \section{Side Effects}{ - relation matrix from \code{trim} is trimmed of any + +Relation matrix from subsetting is trimmed of any special relations that include the subjects to trim. } \examples{ data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) find_unavailable(ped1) } \seealso{ \code{\link[=shrink]{shrink()}} } +\keyword{internal,} +\keyword{shrink} diff --git a/man/findsibs.Rd b/man/findsibs.Rd index 3ae0296d..2deb2084 100644 --- a/man/findsibs.Rd +++ b/man/findsibs.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/auto_hint.R \name{findsibs} \alias{findsibs} -\title{Routine to find the siblings of a subject} +\title{Find the siblings of a subject} \usage{ findsibs(idpos, plist, lev) } @@ -10,21 +10,7 @@ findsibs(idpos, plist, lev) \item{idpos}{The position of the subject} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} } @@ -32,7 +18,7 @@ It contains the following components: The positions of the siblings } \description{ -Routine to find the siblings of a subject +Find the siblings of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -41,4 +27,5 @@ It finds the siblings of a subject. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/findspouse.Rd b/man/findspouse.Rd index 71a9c3db..047b2149 100644 --- a/man/findspouse.Rd +++ b/man/findspouse.Rd @@ -2,39 +2,25 @@ % Please edit documentation in R/auto_hint.R \name{findspouse} \alias{findspouse} -\title{Routine to find the spouse of a subject} +\title{Find the spouse of a subject} \usage{ -findspouse(idpos, plist, lev, ped) +findspouse(idpos, plist, lev, obj) } \arguments{ \item{idpos}{The position of the subject} \item{plist}{The alignment structure representing the Pedigree layout. -For the differents matrices present in the list, each row represents a -level of the Pedigree and each column a potential subject. -It contains the following components: -\itemize{ -\item n Vector of the number of subjects per level -\item nid Matrix of the subjects indexes -\item pos Matrix of the subjects positions -\item fam Matrix of the siblings family identifiers -\item spouse Matrix of the left spouses -\itemize{ -\item \code{0} = not spouse -\item \code{1} = spouse -\item \code{2} = inbred spouse. -} -}} +See \code{\link[=align]{align()}} for details.} \item{lev}{The generation level of the subject} -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ The position of the spouse } \description{ -Routine to find the spouse of a subject +Find the spouse of a subject } \details{ This routine is used by \code{auto_hint()}. @@ -43,4 +29,5 @@ It finds the spouse of a subject. \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/fix_parents.Rd b/man/fix_parents.Rd index 8465f0a0..7d26e8c7 100644 --- a/man/fix_parents.Rd +++ b/man/fix_parents.Rd @@ -3,44 +3,41 @@ \name{fix_parents} \alias{fix_parents} \alias{fix_parents,character-method} -\alias{fix_parents,character} \alias{fix_parents,data.frame-method} -\title{Fix details on the parents for children of the Pedigree} +\title{Fix parents relationship and gender} \usage{ -fix_parents(obj, ...) +\S4method{fix_parents}{character}(obj, dadid, momid, sex, famid = NULL, missid = NA_character_) -\S4method{fix_parents}{character}(obj, dadid, momid, sex, family = NULL, missid = "0") - -\S4method{fix_parents}{data.frame}(obj, delete = FALSE, filter = NULL, missid = "0") +\S4method{fix_parents}{data.frame}(obj, delete = FALSE, filter = NULL, missid = NA_character_) } \arguments{ \item{obj}{A data.frame or a vector of the individuals identifiers. If a dataframe is given it must contain the columns \code{id}, \code{dadid}, -\code{momid}, \code{sex} and \code{family}. Family is optional.} - -\item{...}{Additional arguments passed to methods} +\code{momid}, \code{sex} and \code{famid} (optional).} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} -\item{family}{Optional family identification set it to NULL to invalidate. -If used it will modify the ids of the individuals by pasting it with an _.} +\item{famid}{A character vector with the family identifiers of the +individuals. If provide, will be aggregated to the individuals +identifiers separated by an underscore.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} \item{delete}{Boolean defining if missing parents needs to be: \itemize{ @@ -56,7 +53,7 @@ A data.frame with id, dadid, momid, sex as columns with the relationships fixed. } \description{ -Fix the sex of parents, add parents that are missing from the Pedigree +Fix the sex of parents, add parents that are missing from the data. Can be used with a dataframe or a vector of the different individuals informations. } @@ -64,16 +61,17 @@ different individuals informations. First look to add parents whose ids are given in momid/dadid. Second, fix sex of parents. Last look to add second parent for children for whom only one parent id is given. -If a family vector is given the family id will be added to the ids of all -individuals (id, dadid, momid) separated by an underscore befor proceeding. +If a \strong{famid} vector is given the family id will be added to the ids of all +individuals (\code{id}, \code{dadid}, \code{momid}) +separated by an underscore before proceeding. \subsection{Special case for dataframe}{ -Check for presence of both parents id in the \code{id} field. -If not both presence behaviour depend of \code{delete} parameter +Check for presence of both parents id in the \strong{id} field. +If not both presence behaviour depend of \strong{delete} parameter \itemize{ -\item If TRUE then use fix_parents function and merge back the other fields +\item If \code{TRUE} then use fix_parents function and merge back the other fields in the dataframe then set availability to O for non available parents. -\item If FALSE then delete the id of missing parents +\item If \code{FALSE} then delete the id of missing parents } } } @@ -93,10 +91,9 @@ test1char <- data.frame( ) test1newmom <- with(test1char, fix_parents(id, father, mother, sex, - missid = '0' + missid = NA_character_ )) -newped <- Pedigree(test1newmom) -as.data.frame(newped) +Pedigree(test1newmom) } \author{ diff --git a/man/generate_aff_inds.Rd b/man/generate_aff_inds.Rd index d61a6f6c..f8779a53 100644 --- a/man/generate_aff_inds.Rd +++ b/man/generate_aff_inds.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/generate_aff_inds.R \name{generate_aff_inds} \alias{generate_aff_inds} -\title{Process the information for affection} +\title{Process the affection informations} \usage{ generate_aff_inds( values, @@ -37,14 +37,31 @@ The different columns are: } } \description{ -Process the information for affection +Perform transformation uppon a vector given as the one +containing the affection status to obtain an \code{affected} binary state. } \details{ -Perform transformation uppon a column given as the one -containing affection status to get an \code{affected} column usable for -the rest of the script +This function helps to configure a binary state from a character or +numeric variable. +\subsection{If the variable is a \code{character} or a \code{factor}:}{ + +In this case the affected state will depend on the modality provided as +an affected status. All individuals with a value corresponding to one of the +element in the vector \strong{mods_aff} will be considered as affected. +} + +\subsection{If the variable is \code{numeric}:}{ + +In this case the affected state will be \code{TRUE} if the value of the individual +is above the \strong{threshold} if \strong{sup_thres_aff} is \code{TRUE} and \code{FALSE} +otherwise. +} } \examples{ generate_aff_inds(c(1, 2, 3, 4, 5), threshold = 3, sup_thres_aff = TRUE) generate_aff_inds(c("A", "B", "C", "A", "V", "B"), mods_aff = c("A", "B")) } +\author{ +Louis Le Nézet +} +\keyword{generate_scales} diff --git a/man/generate_border.Rd b/man/generate_border.Rd index f33ce0b7..3c1fc4a9 100644 --- a/man/generate_border.Rd +++ b/man/generate_border.Rd @@ -2,30 +2,35 @@ % Please edit documentation in R/generate_colors.R \name{generate_border} \alias{generate_border} -\title{Process the colors based on affection and availability} +\title{Process the border colors based on availability} \usage{ -generate_border(avail, colors_avail = c("green", "black")) +generate_border(values, colors_avail = c("green", "black")) } \arguments{ -\item{avail}{The vector containing the availability status. -The values need to be numeric and can only be 0, 1 or NA.} +\item{values}{The vector containing the values to process as available.} \item{colors_avail}{Set of 2 colors to use for the box's border of an -individual. The first color will be used for available individual (avail -== 1) and the second for the unavailable individual (avail == 0).} +individual. The first color will be used for available individual +(\code{avail == 1}) and the second for the unavailable individual +(\code{avail == 0}).} } \value{ -A dataframe containing the scale to use for the availability -status. +A list of three elements +\itemize{ +\item \code{mods} : The processed values column as a numeric factor +\item \code{avail} : A logical vector indicating if the individual is available +\item \code{sc_bord} : A dataframe containing the description of each modality of the +scale } -\description{ -Process the colors based on affection and availability } -\details{ -Perform transformation uppon a column given as the one +\description{ +Perform transformation uppon a vector given as the one containing the availability status to compute the border color. +The vector given will be transformed using the \code{\link[=vect_to_binary]{vect_to_binary()}} +function. } \examples{ generate_border(c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA)) } +\keyword{generate_scales} diff --git a/man/generate_colors.Rd b/man/generate_colors.Rd index aa631296..a6452615 100644 --- a/man/generate_colors.Rd +++ b/man/generate_colors.Rd @@ -1,28 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/generate_colors.R -\docType{methods} \name{generate_colors} \alias{generate_colors} \alias{generate_colors,character-method} -\alias{generate_colors,character} \alias{generate_colors,numeric-method} -\alias{generate_colors,numeric} -\alias{generate_colors,logical-method} -\alias{generate_colors,logical} \alias{generate_colors,Pedigree-method} -\alias{generate_colors,Pedigree} -\title{Process the colors based on affection and availability} +\title{Process the filling and border colors based on affection and availability} \usage{ -generate_colors(obj, ...) - \S4method{generate_colors}{character}( obj, avail, mods_aff = NULL, - threshold = 0.5, - sup_thres_aff = TRUE, keep_full_scale = FALSE, - breaks = 3, colors_aff = c("yellow2", "red"), colors_unaff = c("white", "steelblue4"), colors_avail = c("green", "black") @@ -31,20 +20,6 @@ generate_colors(obj, ...) \S4method{generate_colors}{numeric}( obj, avail, - mods_aff = NULL, - threshold = 0.5, - sup_thres_aff = TRUE, - keep_full_scale = FALSE, - breaks = 3, - colors_aff = c("yellow2", "red"), - colors_unaff = c("white", "steelblue4"), - colors_avail = c("green", "black") -) - -\S4method{generate_colors}{logical}( - obj, - avail, - mods_aff = NULL, threshold = 0.5, sup_thres_aff = TRUE, keep_full_scale = FALSE, @@ -72,21 +47,31 @@ generate_colors(obj, ...) } \arguments{ \item{obj}{A Pedigree object or a vector containing the affection status for -each individuals. The affection status can be numeric, logical or character.} +each individuals. The affection status can be numeric or a character.} -\item{...}{Other parameters to pass to the \code{generate_colors} function} - -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} \item{mods_aff}{Vector of modality to consider as affected in the case where the \code{values} is a factor.} +\item{keep_full_scale}{Boolean defining if the affection values need to +be set as a scale. If \code{values} is numeric the filling scale will be +calculated based on the values and the number of breaks given. +If \code{values} isn't numeric then each levels will get it's own color} + +\item{colors_aff}{Set of increasing colors to use for the filling of the +affected individuls.} + +\item{colors_unaff}{Set of increasing colors to use for the filling of the +unaffected individuls.} + +\item{colors_avail}{Set of 2 colors to use for the box's border of an +individual. The first color will be used for available individual +(\code{avail == 1}) and the second for the unavailable individual +(\code{avail == 0}).} + \item{threshold}{Numeric value separating the affected and healthy subject in the case where the \code{values} is numeric.} @@ -97,31 +82,18 @@ if the value of \code{values} is stricly above the \code{threshold}. If \code{FALSE}, the individuals will be considered affected if the value is stricly under the \code{threshold}.} -\item{keep_full_scale}{Boolean defining if the affection values need to -be set as a scale. If \code{values} is numeric the filling scale will be -calculated based on the values and the number of breaks given. -If \code{values} isn't numeric then each levels will get it's own color} - \item{breaks}{Number of breaks to use when using full scale with numeric values. The same number of breaks will be done for values from affected individuals and unaffected individuals.} -\item{colors_aff}{Set of increasing colors to use for the filling of the -affected individuls.} - -\item{colors_unaff}{Set of increasing colors to use for the filling of the -unaffected individuls.} - -\item{colors_avail}{Set of 2 colors to use for the box's border of an -individual. The first color will be used for available individual (avail -== 1) and the second for the unavailable individual (avail == 0).} - -\item{col_aff}{A string with the column name to use for the affection status.} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} \item{add_to_scale}{Boolean defining if the scales need to be added to the existing scales or if they need to replace the existing scales.} -\item{col_avail}{The name of the column containing the availability status.} +\item{col_avail}{A character vector with the name of the column to be used +for the availability status.} \item{reset}{If \code{TRUE} the scale of the specified column will be reset if already present.} @@ -129,32 +101,44 @@ already present.} \value{ \subsection{When used with a vector}{ -A list of three elements +A list of two elements \itemize{ -\item A vector containing the transformed filling modalities -\item A dataframe containing the description of each filling modalities -\item A dataframe containing the description of the border modalities +\item The list containing the filling colors processed and their description +\item The list containing the border colors processed and their description } } \subsection{When used with a Pedigree object}{ The Pedigree object with the \code{affected} and \code{avail} columns -processed accordingly. - -The Pedigree scales slots are updated +processed accordingly as well as the \code{scales} slot updated. } } \description{ -Process the colors based on affection and availability -} -\details{ Perform transformation uppon a dataframe given to compute the colors for the filling and the border of the individuals based on the affection and availability status. } +\details{ +The colors will be set using the \code{\link[=generate_fill]{generate_fill()}} and the +\code{\link[=generate_border]{generate_border()}} functions respectively for the filling and the border. +} \examples{ + +generate_colors( + c("A", "B", "A", "B", NA, "A", "B", "A", "B", NA), + c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), + mods_aff = "A", +) + +generate_colors( + c(10, 0, 5, 7, NA, 6, 2, 1, 3, NA), + c(1, 0, 1, 0, NA, 1, 0, 1, 0, NA), + threshold = 3, keep_full_scale = TRUE +) data("sampleped") ped <- Pedigree(sampleped) -generate_colors(ped, "affected", add_to_scale=FALSE)$scales +ped <- generate_colors(ped, "affected", add_to_scale=FALSE) +scales(ped) } +\keyword{generate_scales} diff --git a/man/generate_fill.Rd b/man/generate_fill.Rd index 044ab2a4..c72aff9f 100644 --- a/man/generate_fill.Rd +++ b/man/generate_fill.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/generate_colors.R \name{generate_fill} \alias{generate_fill} -\title{Process the colors based on affection} +\title{Process the filling colors based on affection} \usage{ generate_fill( values, @@ -17,7 +17,9 @@ generate_fill( \arguments{ \item{values}{The vector containing the values to process as affection.} -\item{affected}{The vector containing the affection status TRUE/FALSE.} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{labels}{The vector containing the labels to use for the affection.} @@ -37,21 +39,47 @@ affected individuls.} unaffected individuls.} } \value{ -A list of two elements +A list of three elements \itemize{ -\item The processed values column as a numeric factor -\item A dataframe containing the description of each modality of the scale +\item \code{mods} : The processed values column as a numeric factor +\item \code{affected} : A logical vector indicating if the individual is affected +\item \code{sc_fill} : A dataframe containing the description of each modality of the +scale } } \description{ -Process the colors based on affection -} -\details{ Perform transformation uppon a column given as the one containing affection status to compute the filling color. } +\details{ +The colors will be set using the +\code{\link[grDevices:colorRamp]{grDevices::colorRampPalette()}} function +with the colors given as parameters. + +The colors will be set as follow: +\itemize{ +\item If \strong{keep_full_scale} is \code{FALSE}: +Then the affected individuals will get the first color of the +\strong{colors_aff} vector and the unaffected individuals will get the +first color of the \strong{colors_unaff} vector. +\item If \strong{keep_full_scale} is \code{TRUE}: +\itemize{ +\item If \strong{values} isn't numeric: +Each levels of the affected \strong{values} vector will get it's own color from +the \strong{colors_aff} vector using the \code{\link[grDevices:colorRamp]{grDevices::colorRampPalette()}} and +the same will be done for the unaffected individuals using the +\strong{colors_unaff}. +\item If \strong{values} is numeric: +The mean of the affected individuals will be compared to the mean of the +unaffected individuals and the colors will be set up such as the color +gradient follow the direction of the affection. +} +} +} \examples{ aff <- generate_aff_inds(seq_len(5), threshold = 3, sup_thres_aff = TRUE) generate_fill(seq_len(5), aff$affected, aff$labels) generate_fill(seq_len(5), aff$affected, aff$labels, keep_full_scale = TRUE) + } +\keyword{generate_scales} diff --git a/man/get_twin_rel.Rd b/man/get_twin_rel.Rd index a542a631..73373c90 100644 --- a/man/get_twin_rel.Rd +++ b/man/get_twin_rel.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/auto_hint.R \name{get_twin_rel} \alias{get_twin_rel} -\title{Routine to get twin relationships} +\title{Get twin relationships} \usage{ -get_twin_rel(ped) +get_twin_rel(obj) } \arguments{ -\item{ped}{A Pedigree object} +\item{obj}{A Pedigree object} } \value{ A list containing components @@ -18,16 +18,16 @@ A list containing components } } \description{ -Routine to get twin relationships +Get twin relationships } \details{ This routine function determine the twin relationships -in a Pedigree. It complete the missing twin relationships for -triplets, quads, etc. It also determine the order of the twins +in a Pedigree. It determine the order of the twins in the Pedigree. It is used by \code{auto_hint()}. } \seealso{ \code{\link[=auto_hint]{auto_hint()}} } -\keyword{internal} +\keyword{auto_hint} +\keyword{internal,} diff --git a/man/ibd_matrix.Rd b/man/ibd_matrix.Rd index 4b3cb289..55049902 100644 --- a/man/ibd_matrix.Rd +++ b/man/ibd_matrix.Rd @@ -2,14 +2,16 @@ % Please edit documentation in R/ibd_matrix.R \name{ibd_matrix} \alias{ibd_matrix} -\title{Create an IBD matrix} +\title{IBD matrix} \usage{ ibd_matrix(id1, id2, ibd, idmap, diagonal) } \arguments{ -\item{id1}{First subject identifiers} +\item{id1}{A character vector with the id of the first individuals of each +pairs} -\item{id2}{Second subject identifiers} +\item{id2}{A character vector with the id of the second individuals of each +pairs} \item{ibd}{the IBD value for that pair} diff --git a/man/is_disconnected.Rd b/man/is_disconnected.Rd index aeb92c8e..15d8f298 100644 --- a/man/is_disconnected.Rd +++ b/man/is_disconnected.Rd @@ -2,28 +2,34 @@ % Please edit documentation in R/utils.R \name{is_disconnected} \alias{is_disconnected} -\title{Check wich individuals are disconnected} +\title{Are individuals disconnected} \usage{ is_disconnected(id, dadid, momid) } \arguments{ -\item{id}{A vector of each subjects identifiers} - \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ -A vector of boolean of the same size as \code{id} -with TRUE if the individual is disconnected and FALSE otherwise +A vector of boolean of the same size as \strong{id} +with \code{TRUE} if the individual is disconnected and +\code{FALSE} otherwise } \description{ Check which individuals are disconnected. } \details{ An individuals is considered disconnected if the kinship with -all the other individuals is 0. +all the other individuals is \code{0}. +} +\examples{ +is_disconnected( + c("1", "2", "3", "4", "5"), + c("3", "3", NA, NA, NA), + c("4", "4", NA, NA, NA) +) } \keyword{internal} diff --git a/man/is_founder.Rd b/man/is_founder.Rd index e09e09da..c5c5b154 100644 --- a/man/is_founder.Rd +++ b/man/is_founder.Rd @@ -2,28 +2,30 @@ % Please edit documentation in R/utils.R \name{is_founder} \alias{is_founder} -\title{Check wich individuals are founders} +\title{Are individuals founders} \usage{ -is_founder(momid, dadid, missid = "0") +is_founder(momid, dadid, missid = NA_character_) } \arguments{ -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ -A vector of boolean of the same size as \code{dadid} and \code{momid} -with TRUE if the individual has no parents (i.e is a founder) and FALSE -otherwise. +A vector of boolean of the same size as \strong{dadid} and \strong{momid} +with \code{TRUE} if the individual has no parents (i.e is a founder) and +\code{FALSE} otherwise. } \description{ Check which individuals are founders. } +\examples{ +is_founder(c("3", "3", NA, NA), c("4", "4", NA, NA)) +} \keyword{internal} diff --git a/man/is_informative.Rd b/man/is_informative.Rd index 53153e83..006968a4 100644 --- a/man/is_informative.Rd +++ b/man/is_informative.Rd @@ -3,44 +3,25 @@ \docType{methods} \name{is_informative} \alias{is_informative} -\alias{is_informative,character-method} -\alias{is_informative,character} +\alias{is_informative,character_OR_integer-method} \alias{is_informative,Pedigree-method} -\alias{is_informative,Pedigree} -\title{Is informative} +\title{Find informative individuals} \usage{ -is_informative(obj, ...) +\S4method{is_informative}{character_OR_integer}(obj, avail, affected, informative = "AvAf") -\S4method{is_informative}{character}(obj, avail, affected, informative = "AvAf", missid = "0") - -\S4method{is_informative}{Pedigree}( - obj, - col_aff = NULL, - informative = "AvAf", - missid = "0", - reset = FALSE -) +\S4method{is_informative}{Pedigree}(obj, col_aff = NULL, informative = "AvAf", reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{avail}{A logical vector with the availability status of the +individuals +(i.e. \code{FALSE} = not available, \code{TRUE} = available, \code{NA} = unknown).} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} - -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{affected}{A logical vector with the affection status of the +individuals +(i.e. \code{FALSE} = unaffected, \code{TRUE} = affected, \code{NA} = unknown).} \item{informative}{Informative individuals selection can take 5 values: \itemize{ @@ -53,44 +34,49 @@ is_informative(obj, ...) \item A boolean }} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{col_aff}{A string with the column name to use for the affection status.} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} -\item{reset}{If \code{TRUE}, the \code{id_inf} column is reset} +\item{reset}{If \code{TRUE}, the \code{isinf} slot is reset} } \value{ \subsection{When obj is a vector}{ -A vector of individuals informative identifiers +A vector of individuals informative identifiers. } \subsection{When obj is a Pedigree}{ -A list containing the Pedigree object and the vector of individuals -identifiers. -The Pedigree object will have a new column named 'id_inf' containing 1 for -informative individuals and 0 otherwise. +The Pedigree object with its \code{isinf} slot updated. } } \description{ Select the ids of the informative individuals. } \details{ -Depending on the informative parameter, the function will extract -the ids of the informative individuals. In the case of a numeric vector, -the function will return the same vector. In the case of a boolean, the -function will return the ids of the individuals if TRUE, NA otherwise. +Depending on the \strong{informative} parameter, the function will +extract the ids of the informative individuals. In the case of a +numeric vector, the function will return the same vector. +In the case of a boolean, the function will return the ids of the +individuals if TRUE, NA otherwise. In the case of a string, the function will return the ids of the corresponding informative individuals based on the avail and affected columns. } \examples{ + +is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +is_informative(c("A", "B", "C", "D", "E"), informative = c(1, 2)) +is_informative(c("A", "B", "C", "D", "E"), informative = c("A", "B")) +is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), + affected = c(0, 1, 0, 1, 1), informative = "AvAf") +is_informative(c("A", "B", "C", "D", "E"), avail = c(1, 0, 0, 1, 1), + affected = c(0, 1, 0, 1, 1), informative = "AvOrAf") +is_informative(c("A", "B", "C", "D", "E"), + informative = c(TRUE, FALSE, TRUE, FALSE, TRUE)) + data("sampleped") ped <- Pedigree(sampleped) -is_informative(ped, col_aff = "affection_aff") - +ped <- is_informative(ped, col_aff = "affection_mods") +isinf(ped(ped)) } diff --git a/man/is_parent.Rd b/man/is_parent.Rd index 5ad27b20..8a277438 100644 --- a/man/is_parent.Rd +++ b/man/is_parent.Rd @@ -2,29 +2,40 @@ % Please edit documentation in R/utils.R \name{is_parent} \alias{is_parent} -\title{Check wich individuals are parents} +\alias{is_parent,character_OR_integer-method} +\alias{is_parent,Ped-method} +\title{Are individuals parents} \usage{ -is_parent(id, dadid, momid, missid = "0") +\S4method{is_parent}{character_OR_integer}(obj, dadid, momid, missid = NA_character_) + +\S4method{is_parent}{Ped}(obj, missid = NA_character_) } \arguments{ -\item{id}{A vector of each subjects identifiers} +\item{obj}{A vector of each subjects identifiers or a Ped object} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} +\item{missid}{A character vector with the missing values identifiers. +All the id, dadid and momid corresponding to those values will be set +to \code{NA_character_}.} } \value{ -A vector of boolean of the same size as \code{id} +A vector of boolean of the same size as \strong{obj} with TRUE if the individual is a parent and FALSE otherwise } \description{ Check which individuals are parents. } +\examples{ + +is_parent(c("1", "2", "3", "4"), c("3", "3", NA, NA), c("4", "4", NA, NA)) + +data(sampleped) +ped <- Pedigree(sampleped) +is_parent(ped(ped)) +} \keyword{internal} diff --git a/man/is_valid.Rd b/man/is_valid.Rd deleted file mode 100644 index 84d7fc09..00000000 --- a/man/is_valid.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/validity.R -\name{is_valid} -\alias{is_valid} -\title{Pedigree validity method.} -\usage{ -is_valid(object) -} -\arguments{ -\item{object}{A Pedigree object.} -} -\value{ -A logical value or a character vector with the errors. -} -\description{ -Check if the Pedigree object is valid. -} -\details{ -It will check : -the fields of the slots -the values in the columns of the ped, rel and scale slot -} -\keyword{internal} diff --git a/man/is_valid_hints.Rd b/man/is_valid_hints.Rd new file mode 100644 index 00000000..734fe26a --- /dev/null +++ b/man/is_valid_hints.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_hints} +\alias{is_valid_hints} +\title{Check if a Hints object is valid} +\usage{ +is_valid_hints(object) +} +\arguments{ +\item{object}{A Hints object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Check if horder and spouse slots are valid: +\itemize{ +\item horder is named numeric vector +\item spouse is a data.frame +\itemize{ +\item Has the three \code{idr}, \code{idl}, \code{anchor} columns +\item \code{idr} and \code{idl} are different and doesn't contains \code{NA} +\item \code{idr} and \code{idl} couple are unique +\item \code{anchor} column only have \code{right}, \code{left}, \code{either} values +} +\item all ids in spouse needs to be in the names of the horder vector +} +} +\keyword{internal} diff --git a/man/is_valid_ped.Rd b/man/is_valid_ped.Rd new file mode 100644 index 00000000..058d93a0 --- /dev/null +++ b/man/is_valid_ped.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_ped} +\alias{is_valid_ped} +\title{Check if a Ped object is valid} +\usage{ +is_valid_ped(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the ped ids slots have the right values +\item Check that the sex, steril, status, avail and affected slots have the +right values +\item Check that dad are male and mom are female +\item Check that individuals have both parents or none +} +} +\keyword{internal} diff --git a/man/is_valid_pedigree.Rd b/man/is_valid_pedigree.Rd new file mode 100644 index 00000000..9868edba --- /dev/null +++ b/man/is_valid_pedigree.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_pedigree} +\alias{is_valid_pedigree} +\title{Check if a Pedigree object is valid} +\usage{ +is_valid_pedigree(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the all Rel id are in the Ped object +\item Check that twins have same parents +\item Check that MZ twins have same sex +\item Check that all columns used in scales are in the Ped object +\item Check that all fill & border modalities are in the Ped object column +\item Check that all id used in Hints object are in the Ped object +\item Check that all spouse in Hints object are male / female +} +} +\keyword{internal} diff --git a/man/is_valid_rel.Rd b/man/is_valid_rel.Rd new file mode 100644 index 00000000..eb639956 --- /dev/null +++ b/man/is_valid_rel.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_rel} +\alias{is_valid_rel} +\title{Check if a Rel object is valid} +\usage{ +is_valid_rel(object) +} +\arguments{ +\item{object}{A Ped object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Multiple checks are done here +} +\details{ +\enumerate{ +\item Check that the "id1", "id2", "code", "famid" slots exist +\item Check that the "code" slots have the right values +(i.e. "MZ twin", "DZ twin", "UZ twin", "Spouse") +\item Check that all "id1" are different to "id2" +\item Check that all "id1" are smaller than "id2" +\item Check that no duplicate relation are present +} +} +\keyword{internal} diff --git a/man/is_valid_scales.Rd b/man/is_valid_scales.Rd new file mode 100644 index 00000000..49dc2830 --- /dev/null +++ b/man/is_valid_scales.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllValidity.R +\name{is_valid_scales} +\alias{is_valid_scales} +\title{Check if a Scales object is valid} +\usage{ +is_valid_scales(object) +} +\arguments{ +\item{object}{A Scales object.} +} +\value{ +A character vector with the errors or \code{TRUE} if no errors. +} +\description{ +Check if the fill and border slots are valid: +\itemize{ +\item fill slot is a data.frame with "order", "column_values", +"column_mods", "mods", "labels", "affected", "fill", +"density", "angle" columns. +\itemize{ +\item "affected" is logical. +\item "density", "angle", "order", "mods" are numeric. +\item "column_values", "column_mods", "labels", "fill" are +character. +} +\item border slot is a data.frame with "column_values", +"column_mods", "mods", "labels", "border" columns. +\itemize{ +\item "column_values", "column_mods", "labels", "border" are +character. +\item "mods" is numeric. +} +} +} +\keyword{internal} diff --git a/man/kindepth.Rd b/man/kindepth.Rd index 50e9963c..65c65d41 100755 --- a/man/kindepth.Rd +++ b/man/kindepth.Rd @@ -1,59 +1,65 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/kindepth.R -\docType{methods} \name{kindepth} \alias{kindepth} -\alias{kindepth,character-method} -\alias{kindepth,character} +\alias{kindepth,character_OR_integer-method} \alias{kindepth,Pedigree-method} -\alias{kindepth,Pedigree} -\title{Compute the depth of each subject in a Pedigree} +\alias{kindepth,Ped-method} +\title{Individual's depth in a pedigree} \usage{ kindepth(obj, ...) -\S4method{kindepth}{character}(obj, dadid, momid, align_parents = FALSE) +\S4method{kindepth}{character_OR_integer}(obj, dadid, momid, align_parents = FALSE) \S4method{kindepth}{Pedigree}(obj, align_parents = FALSE) + +\S4method{kindepth}{Ped}(obj, align_parents = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{...}{Additional arguments} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} -\item{align_parents}{If \code{align_parents=T}, go one step further and try to -make both parents of each child have the same depth. +\item{align_parents}{If \code{align_parents = TRUE}, go one step further +and try to make both parents of each child have the same depth. (This is not always possible). -It helps the drawing program by lining up pedigrees that 'join in the middle' -via a marriage.} +It helps the drawing program by lining up pedigrees that +'join in the middle' via a marriage.} } \value{ -an integer vector containing the depth for each subject +An integer vector containing the depth for each subject } \description{ Computes the depth of each subject in the Pedigree. } \details{ -Mark each person as to their depth in a Pedigree; 0 for a founder, otherwise +Mark each person as to their depth in a Pedigree; \code{0} for a founder, +otherwise : -\eqn{depth = 1 + \max(fatherDepth, motherDepth)} +\deqn{depth = 1 + \max(fatherDepth, motherDepth)} -In the case of an inbred Pedigree a perfect alignment obeying -\code{extra=TRUE} may not exist. +In the case of an inbred Pedigree a perfect alignment may not exist. } \examples{ +kindepth( + c("A", "B", "C", "D", "E"), + c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0") +) data(sampleped) -ped1 <- Pedigree(sampleped[sampleped$family == "1",]) +ped1 <- Pedigree(sampleped[sampleped$famid == "1",]) kindepth(ped1) } \seealso{ \code{\link[=align]{align()}} } \author{ -Terry Therneau +Terry Therneau, updated by Louis Le Nézet } diff --git a/man/kinship.Rd b/man/kinship.Rd index 93429ced..3c8e8e93 100755 --- a/man/kinship.Rd +++ b/man/kinship.Rd @@ -1,41 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/kinship.R -\docType{methods} \name{kinship} \alias{kinship} +\alias{kinship,Ped-method} \alias{kinship,character-method} -\alias{kinship,character} \alias{kinship,Pedigree-method} -\alias{kinship,Pedigree} -\title{Compute a kinship matrix} +\title{Kinship matrix} \usage{ -kinship(obj, ...) +\S4method{kinship}{Ped}(obj, chrtype = "autosome") \S4method{kinship}{character}(obj, dadid, momid, sex, chrtype = "autosome") \S4method{kinship}{Pedigree}(obj, chrtype = "autosome") } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A Pedigree or Ped object or a vector of subject identifiers.} -\item{...}{Additional arguments passed to methods} +\item{chrtype}{chromosome type. The currently supported types are +'autosome' and 'X' or 'x'.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} - -\item{chrtype}{chromosome type. The currently supported types are -'autosome' and 'X' or 'x'.} } \value{ \subsection{When obj is a vector}{ @@ -43,25 +41,25 @@ the gender of the individuals. The following values are recognized: A matrix of kinship coefficients. } -\subsection{When obj is a pedigree}{ +\subsection{When obj is a Pedigree}{ A matrix of kinship coefficients ordered by families present -in the pedigree. +in the Pedigree object. } } \description{ -Compute the kinship matrix for a set of related autosomal subjects. The -function is generic, and can accept a Pedigree, vector as +Compute the kinship matrix for a set of related autosomal subjects. +The function is generic, and can accept a Pedigree, a Ped or a vector as the first argument. } \details{ -The function will usually be called with a Pedigree the -third form is provided for backwards compatibility with an earlier release -of the library that was less capable. The first argument is named \code{id} -for the same reason. Note that when using the third form any information on +The function will usually be called with a Pedigree. +The call with a Ped or a vector is provided for backwards compatibility +with an earlier release of the library that was less capable. +Note that when using with a Ped or a vector, any information on twins is not available to the function. -When called with a pedigree, the routine +When called with a Pedigree, the routine will create a block-diagonal-symmetric sparse matrix object of class \code{dsCMatrix}. Since the \verb{[i, j]} value of the result is 0 for any two unrelated individuals i and j and a \code{Matrix} utilizes sparse @@ -83,15 +81,23 @@ The computation is based on a recursive algorithm described in Lange, which assumes that the founder alleles are all independent. } \section{References}{ - K Lange, Mathematical and Statistical Methods for + +K Lange, Mathematical and Statistical Methods for Genetic Analysis, Springer-Verlag, New York, 1997. } \examples{ + +kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1)) +kinship(c("A", "B", "C", "D", "E"), c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), sex = c(1, 2, 1, 2, 1), + chrtype = "x" +) + data(sampleped) ped <- Pedigree(sampleped) kinship(ped) - } \seealso{ \code{\link[=make_famid]{make_famid()}}, \code{\link[=kindepth]{kindepth()}} diff --git a/man/length-Pedigree-method.Rd b/man/length-Pedigree-method.Rd deleted file mode 100644 index 66aca38e..00000000 --- a/man/length-Pedigree-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pedigreeClass.R -\docType{methods} -\name{length,Pedigree-method} -\alias{length,Pedigree-method} -\title{Compute the length of a Pedigree object} -\usage{ -\S4method{length}{Pedigree}(x) -} -\arguments{ -\item{x}{A Pedigree object.} -} -\value{ -The number of individuals in the Pedigree object. -} -\description{ -Compute the length of a Pedigree object -} diff --git a/man/make_famid.Rd b/man/make_famid.Rd index ad7439cf..1f9805d5 100644 --- a/man/make_famid.Rd +++ b/man/make_famid.Rd @@ -1,29 +1,23 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/make_famid.R -\docType{methods} \name{make_famid} \alias{make_famid} \alias{make_famid,character-method} -\alias{make_famid,character} \alias{make_famid,Pedigree-method} -\alias{make_famid,Pedigree} -\title{Get family id} +\title{Compute family id} \usage{ -make_famid(obj, ...) - \S4method{make_famid}{character}(obj, dadid, momid) \S4method{make_famid}{Pedigree}(obj) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} - -\item{...}{Additional arguments passed to methods} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} } \value{ @@ -35,10 +29,11 @@ An integer vector giving family groupings \subsection{When used with a Pedigree object}{ An updated Pedigree object with the family id added +and with all ids updated } } \description{ -Construct a family id from Pedigree information +Construct a family identifier from pedigree information } \details{ Create a vector of length n, giving the family 'tree' number of each @@ -47,6 +42,13 @@ tree 1, otherwise the tree numbers represent the disconnected subfamilies. Singleton subjects give a zero for family number. } \examples{ + +make_famid( + c("A", "B", "C", "D", "E", "F"), + c("C", "D", "0", "0", "0", "0"), + c("E", "E", "0", "0", "0", "0") +) + data(sampleped) ped1 <- Pedigree(sampleped[,-1]) make_famid(ped1) diff --git a/man/min_dist_inf.Rd b/man/min_dist_inf.Rd index 485c8c3b..9dac04ed 100644 --- a/man/min_dist_inf.Rd +++ b/man/min_dist_inf.Rd @@ -1,61 +1,44 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/min_dist_inf.R -\docType{methods} \name{min_dist_inf} \alias{min_dist_inf} \alias{min_dist_inf,character-method} -\alias{min_dist_inf,character} \alias{min_dist_inf,Pedigree-method} -\alias{min_dist_inf,Pedigree} +\alias{min_dist_inf,Ped-method} \title{Minimum distance to the informative individuals} \usage{ -min_dist_inf(obj, ...) +\S4method{min_dist_inf}{character}(obj, dadid, momid, sex, id_inf) -\S4method{min_dist_inf}{character}(obj, dadid, momid, sex, avail, affected, informative = "AvAf") +\S4method{min_dist_inf}{Pedigree}(obj, col_aff = NULL, informative = "AvAf", reset = FALSE, ...) -\S4method{min_dist_inf}{Pedigree}( - obj, - col_aff = NULL, - informative = "AvAf", - missid = "0", - reset = FALSE, - ... -) +\S4method{min_dist_inf}{Ped}(obj, informative = "AvAf", reset = FALSE) } \arguments{ -\item{obj}{A pedigree object or a vector of subject identifiers.} +\item{obj}{A character vector with the id of the individuals or a +\code{data.frame} with all the informations in corresponding columns.} -\item{...}{Additional arguments passed to methods} +\item{...}{Additional arguments} \item{dadid}{A vector containing for each subject, the identifiers of the biologicals fathers.} -\item{momid}{vector containing for each subject, the identifiers of the +\item{momid}{A vector containing for each subject, the identifiers of the biologicals mothers.} \item{sex}{A character, factor or numeric vector corresponding to -the gender of the individuals. The following values are recognized: +the gender of the individuals. This will be transformed to an ordered factor +with the following levels: \code{male} < \code{female} < \code{unknown} < `terminated +The following values are recognized: \itemize{ \item character() or factor() : "f", "m", "woman", "man", "male", "female", "unknown", "terminated" \item numeric() : 1 = "male", 2 = "female", 3 = "unknown", 4 = "terminated" }} -\item{avail}{A numeric vector of availability status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unavailable -\item \code{1} : available -\item \code{NA} : availability not known -}} +\item{id_inf}{An identifiers vector of informative individuals.} -\item{affected}{A numeric vector of affection status of each individual -(e.g., genotyped). The values are: -\itemize{ -\item \code{0} : unaffected -\item \code{1} : affected -\item \code{NA} : affection status not known -}} +\item{col_aff}{A character vector with the name of the column to be used +for the affection status.} \item{informative}{Informative individuals selection can take 5 values: \itemize{ @@ -68,14 +51,7 @@ the gender of the individuals. The following values are recognized: \item A boolean }} -\item{col_aff}{A string with the column name to use for the affection status.} - -\item{missid}{The missing identifier value. Founders are the individuals with -no father and no mother in the Pedigree -(i.e. \code{dadid} and \code{momid} equal to the value of this variable). -The default for \code{missid} is \code{"0"}.} - -\item{reset}{If TRUE, the \code{kin} and if \code{id_inf} columns is reset} +\item{reset}{If TRUE, the \code{kin} and if \code{isinf} columns is reset} } \value{ \subsection{When obj is a vector}{ @@ -87,8 +63,9 @@ and all the others corresponding to the order of the individuals in the \subsection{When obj is a Pedigree}{ -The Pedigree object with a new column named 'kin' containing the kinship -degree. +The Pedigree object with a new slot named 'kin' containing the minimum +distance between each individuals and the informative individuals. +The \code{isinf} slot is also updated with the informative individuals. } } \description{ @@ -107,10 +84,18 @@ is 0.5 and the minimum distance is 1. Each time the kinship degree is divided by 2, the minimum distance is increased by 1. } \examples{ + +min_dist_inf( + c("A", "B", "C", "D", "E"), + c("C", "D", "0", "0", "0"), + c("E", "E", "0", "0", "0"), + sex = c(1, 2, 1, 2, 1), + id_inf = c("D", "E") +) + data(sampleped) ped <- Pedigree(sampleped) -min_dist_inf(ped, col_aff = "affection_aff")$ped - +kin(ped(min_dist_inf(ped, col_aff = "affection_mods"))) } \seealso{ \code{\link[=kinship]{kinship()}} diff --git a/man/minnbreast.Rd b/man/minnbreast.Rd index f07282c2..d5eeba2c 100644 --- a/man/minnbreast.Rd +++ b/man/minnbreast.Rd @@ -8,34 +8,31 @@ A data frame with 28081 observations, one line per subject, on the following 14 variables. \itemize{ -\item \code{id} subject identifier -\item \code{proband} if 1, this subject is one of the original +\item \code{id} : Subject identifier +\item \code{proband} : If 1, this subject is one of the original 426 probands -\item \code{fatherid} identifier of the father, if the father is part of +\item \code{fatherid} : Identifier of the father, if the father is part of the data set; zero otherwise -\item \code{motherid} identifier of the mother, if the mother is part of +\item \code{motherid} : Identifier of the mother, if the mother is part of the data set; zero otherwise -\item \code{famid} family identifier -\item \code{endage} age at last follow-up or incident cancer -\item \code{cancer} 1= breast cancer (females) or prostate cancer (males), -0=censored -\item \code{yob} year of birth -\item \code{education} amount of education: 1-8 years, 9-12 years, high -school graduate, vocational education -beyond high school, some college but did not graduate, college graduate, -post-graduate education, refused to -answer on the questionnaire -\item \code{marstat} marital status: married, living with someone in a +\item \code{famid} : Family identifier +\item \code{endage} : Age at last follow-up or incident cancer +\item \code{cancer} : \code{1} = breast cancer (females) or prostate cancer (males), +\code{0} = censored +\item \code{yob} : Year of birth +\item \code{education} : Amount of education: 1-8 years, 9-12 years, high +school graduate, vocational education beyond high school, +some college but did not graduate, college graduate, +post-graduate education, refused to answer on the questionnaire +\item \code{marstat} : Marital status: married, living with someone in a marriage-like relationship, separated or divorced, widowed, never married, refused to answer the questionaire -\item \code{everpreg} ever pregnant: never pregnant at the time of -baseline survey, ever pregnant at the time -of baseline survey -\item \code{parity} number of births -\item \code{nbreast} number of breast biopsies -\item \code{sex} M or F -\item \code{bcpc} part of one of the families in the breast/prostate -cancer substudy: 0=no, 1=yes. +\item \code{everpreg} : Ever pregnant at the time of baseline survey +\item \code{parity} : Number of births +\item \code{nbreast} : Number of breast biopsies +\item \code{sex} : \code{M} or \code{F} +\item \code{bcpc} : Part of one of the families in the breast / prostate +cancer substudy: \code{0} = no, \code{1} = yes. Note that subjects who were recruited to the overall study after the date of the BP substudy are coded as zero. } @@ -46,7 +43,7 @@ data(minnbreast) \description{ Data from the Minnesota Breast Cancer Family Study. This contains extended pedigrees from 426 families, each identified by -a single proband in 1945-52, with follow up for incident breast cancer. +a single proband in 1945-1952, with follow up for incident breast cancer. } \details{ The original study was conducted by Dr. Elving Anderson at the @@ -56,11 +53,13 @@ University Hospital were enrolled, and information gathered on parents, siblings, offspring, aunts / uncles, and grandparents with the goal of understanding possible familial aspects of brest cancer. In 1991 the study was resurrected by Dr Tom Sellers. + Of the original 544 he excluded 58 prevalent cases, along with another 19 who had less than 2 living relatives at the time of Dr Anderson's survey. Of the remaining 462 families 10 had no living members, 23 could not be located and 8 refused, leaving 426 families on whom updated pedigrees were obtained. + This gave a study with 13351 males and 12699 females (5183 marry-ins). Primary questions were the relationship of early life exposures, breast density, and pharmacogenomics on incident breast cancer risk. @@ -77,10 +76,12 @@ breastped <- Pedigree(minnbreast, cols_ren_ped = list( "indId" = "id", "fatherId" = "fatherid", "motherId" = "motherid", "gender" = "sex", "family" = "famid" - ) + ), missid = "0", col_aff = "cancer" ) -print(breastped) -#plot(breastped) #plot family 8, proband is solid, slash for cancers +summary(breastped) +scales(breastped) +#plot family 8, proband is solid, slash for cancers +#plot(breastped[famid(breastped) == "8"]) } \references{ Epidemiologic and genetic follow-up study of 544 Minnesota breast cancer diff --git a/man/na_to_length.Rd b/man/na_to_length.Rd new file mode 100644 index 00000000..e7fb8cbf --- /dev/null +++ b/man/na_to_length.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllConstructor.R +\name{na_to_length} +\alias{na_to_length} +\title{NA to specific length} +\usage{ +na_to_length(x, temp, value) +} +\arguments{ +\item{x}{The vector to check.} + +\item{temp}{A template vector to use to determine the length.} + +\item{value}{The value to use to fill the vector.} +} +\value{ +A vector with the same length as temp. +} +\description{ +Check if all value in a vector is \code{NA}. +If so set all of them to a new value matching the length +of the template. +If not check that the size of the vector is equal to +the template. +} +\examples{ + +na_to_length(NA, rep(0, 4), "NewValue") +na_to_length(c(1, 2, 3, NA), rep(0, 4), "NewValue") +} +\keyword{internal} diff --git a/man/norm_ped.Rd b/man/norm_ped.Rd index 563d1d55..c72f9d19 100644 --- a/man/norm_ped.Rd +++ b/man/norm_ped.Rd @@ -2,53 +2,88 @@ % Please edit documentation in R/norm_data.R \name{norm_ped} \alias{norm_ped} -\title{Normalise dataframe} +\title{Normalise a Ped object dataframe} \usage{ -norm_ped(ped_df, na_strings = c("NA", ""), missid = "0", try_num = FALSE) +norm_ped( + ped_df, + na_strings = c("NA", ""), + missid = NA_character_, + try_num = FALSE +) } \arguments{ \item{ped_df}{A data.frame with the individuals informations. -The minimum columns required are \code{indID}, \code{fatherId}, \code{motherId} and -\code{gender}. -The \code{family} column can also be used to specify the family of the -individuals and will be merge to the \code{id} field separated by an -underscore. -The following columns are also recognize \code{sterilisation}, \code{available}, -\code{vitalStatus}, \code{affection}. The four of them will be transformed with the -\code{\link[=vect_to_binary]{vect_to_binary()}} function. -They respectively correspond to the sterilisation status, -the availability status, the death status and the affection status -of the individuals. The values recognized for those columns are \code{1} or -\code{0}.} +The minimum columns required are: + +\if{html}{\out{