Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue https://github.com/statOmics/msqrob2/issues/63 #64

Merged
merged 13 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Collate: 'msqrob-framework.R' 'allGenerics.R'
Encoding: UTF-8
VignetteBuilder: knitr
Roxygen: list(markdown=TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
biocViews:
Proteomics,
MassSpectrometry,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(getDF)
export(getDfPosterior)
export(getFitMethod)
export(getModel)
export(getReferencePresent)
export(getSigma)
export(getSigmaPosterior)
export(getVar)
Expand Down
16 changes: 13 additions & 3 deletions R/StatModel-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#' the linear model coefficients to be tested equal to zero.
#' The rownames of the matrix should be equal to the names of
#' parameters of the model.
#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
#' parameters with modified reference levels are returned. Watch out putting this
#' parameter to TRUE can change the interpretation of the logFC. Default is FALSE.
#'
#' @examples
#' data(pe)
#' # Aggregate peptide intensities in protein expression values
Expand All @@ -33,11 +37,14 @@

setMethod(
"getContrast", "StatModel",
function(object, L) {
function(object, L, acceptDifferentReference = FALSE) {
if (!is(L, "matrix")) L <- as.matrix(L)
coefs <- getCoef(object)
out <- matrix(rep(NA, ncol(L)))
rownames(out) <- colnames(L)
if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
return(out)
}
coefs <- getCoef(object)
hlp <- try(t(L) %*% coefs[rownames(L)], silent = TRUE)
if (!is(hlp, "try-error")) out[] <- hlp
return(out)
Expand All @@ -49,10 +56,13 @@ setMethod(

setMethod(
"varContrast", "StatModel",
function(object, L) {
function(object, L, acceptDifferentReference = FALSE) {
if (!is(L, "matrix")) L <- as.matrix(L)
out <- matrix(NA, ncol(L), ncol(L))
rownames(out) <- colnames(out) <- colnames(L)
if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
return(out)
}
vcovTmp <- getVcovUnscaled(object) * object@varPosterior
hlp <- try(t(L) %*% vcovTmp[rownames(L), rownames(L)] %*% L, silent = TRUE)
if (!is(hlp, "try-error")) out[] <- hlp
Expand Down
8 changes: 8 additions & 0 deletions R/accessors.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation}
#' \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix
#' of the model parameters}
#' \item{getReferencePresent(object)}{to get the vector of logicals indicating if the
#' reference level associated with a model parameter is present}
#' }
#'
#' @rdname statModelAccessors
Expand Down Expand Up @@ -95,3 +97,9 @@ setMethod("getVcovUnscaled",
signature = "StatModel",
definition = function(object) object@params$vcovUnscaled
)

#' @rdname statModelAccessors
setMethod("getReferencePresent",
signature = "StatModel",
definition = function(object) object@params$referencePresent
)
4 changes: 3 additions & 1 deletion R/allGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ setGeneric("getVarPosterior", function(object) standardGeneric("getVarPosterior"
#' @export
setGeneric("getSigmaPosterior", function(object) standardGeneric("getSigmaPosterior"))
#' @export
setGeneric("getContrast", function(object, L) standardGeneric("getContrast"))
setGeneric("getReferencePresent", function(object) standardGeneric("getReferencePresent"))
#' @export
setGeneric("getContrast", function(object, ...) standardGeneric("getContrast"))
#' @export
setGeneric("getVcovUnscaled", function(object) standardGeneric("getVcovUnscaled"))
#' @export
Expand Down
8 changes: 4 additions & 4 deletions R/hypothesisTest-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,16 @@ setMethod(
adjust.method = "BH",
modelColumn = "msqrobModels",
resultsColumnNamePrefix = "",
overwrite = FALSE) {
overwrite = FALSE,
acceptDifferentReference = FALSE) {
if (!(modelColumn %in% colnames(rowData(object)))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of the SummarizedExperiment object")
if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults"
if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast))
if ((sum(paste0(resultsColumnNamePrefix, colnames(contrast)) %in% colnames(rowData(object))) > 0) & !overwrite) stop("There is/are already column(s) with names starting with\'", resultsColumnNamePrefix, "\' in the rowData of the SummarizedExperiment object, set the argument overwrite=TRUE to replace the column(s) with the new results or use another name for the argument resultsColumnNamePrefix")
for (j in seq_len(ncol(contrast)))
{
contrHlp <- contrast[, j]
names(contrHlp) <- rownames(contrast)
rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
contrHlp <- contrast[, j, drop = FALSE]
rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
}
return(object)
}
Expand Down
118 changes: 118 additions & 0 deletions R/msqrob-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,126 @@ makeContrast <- function(contrasts, parameterNames) {
return(t(.chrlinfct2matrix(contrasts, parameterNames)$K))
}


#' Check for presence of reference levels
#' @description Check if the reference levels of the factors in the model are present for a specific feature.
#'
#' @param y A numeric vector with the feature values for each sample.
#'
#' @param data A `DataFrame` with information on the design. It has
#' the same number of rows as the number of columns (samples) of
#' `y`.
#'
#' @param referenceLevels A named list with the reference levels of the factors (named by the factor name)
#'
#' @return A logical vector indicating for each parameter (that comes from a factor variable) in the model
#' if his reference level has values for the current feature.
#'
#' @rdname checkReference
#'
checkReference <- function(y, data, paramNames, formula) {
referenceLevels <- getReferenceLevels(data, formula)
factor <- names(referenceLevels)
subset <- droplevels(data[!is.na(y), factor, drop = FALSE])
paramSplit <- list()
for (x in factor) {
paramSplit[[x]] <- paramNames[grep(x, paramNames)]
}
factor_status <- unlist(lapply(factor, function(x) {
noIntercept <- all(paste0(x, levels(as.factor(data[, x]))) %in% paramNames)
refPres <- referenceLevels[[x]] %in% levels(subset[, x])
return(noIntercept || refPres)
}))
names(factor_status) <- factor

referencePresent <- propagateFalseStatus(paramSplit, factor_status)

return(unlist(referencePresent))
}

#' Get reference levels factors
#' @description Get the reference levels of the factors of a data frame that are present in a formula
#'
#' @param dataFrame A `data.frame` that contains factors
#'
#' @param formula A formula object associated with the data frame (the variables in the formula should be present in the data frame)
#'
#' @return A named list with the reference levels of the factors (named by the factor name)
#'
#' @rdname getReferenceLevels
#'
getReferenceLevels <- function(dataFrame, formula){
referenceLevels <- list()
vars <- all.vars(formula)
for (x in vars){
if (is.factor(dataFrame[, x]) || is.character(dataFrame[, x])) {
if (is.character(dataFrame[, x])) {
dataFrame[, x] <- as.factor(dataFrame[, x])
}
referenceLevels[[x]] <- levels(dataFrame[, x])[1]
}
}
return(unlist(referenceLevels))
}

#' Check if the reference levels associated with the contrast are present
#' @description Check if the reference levels associated with the contrast are present
#'
#' @param referencePresent A logical vector indicating for each parameter in the model
#' if his reference level has values for the current feature.
#'
#' @param L A numeric contrast matrix with rownames that equal the model parameters
#' that are involved in the contrasts
#'
#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
#' parameters with modified reference levels are returned.
#'
#' @return A boolean
#'
#' @rdname referenceContrast
#'
referenceContrast <- function(referencePresent, L, acceptDifferentReference) {
if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
return(TRUE)
}
return(FALSE)
}

checkFullRank <- function(modelMatrix) {
return(qr(modelMatrix)$rank == ncol(modelMatrix))
}


##### None exported functions from multcomp package is included here to
##### During R and Bioc checks

propagateFalseStatus <- function(vectors, statuses) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice attempt to solve the interaction issue, I however can't fully wrap my head around what you are doing. A few comments on the function:

  • Could you document the internal function using standard comments so to briefly describe what the function does, and especially what are the expected argument inputs.
  • Be very careful with repeat to not allow for infinite loops, I personally prefer a while() loop although there is still is risk of infinite loops.
  • I can't see unit tests, but maybe this is in progress

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added tests, commented the function and changed the code. a97a4e5

repeat {
false_elements <- unique(unlist(vectors[statuses == FALSE]))

new_statuses <- map_lgl(seq_along(vectors), function(i) {
if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) {
return(FALSE)
} else {
return(statuses[i])
}
})

if (all(new_statuses == statuses)) break

statuses <- new_statuses
}

all_elements <- unique(unlist(vectors))
element_status <- setNames(rep(TRUE, length(all_elements)), all_elements)

for (i in seq_along(vectors)) {
element_status[vectors[[i]]] <- statuses[i]
}

return(element_status)
}


.chrlinfct2matrix <- function(ex, var) {
if (!is.character(ex)) {
Expand Down Expand Up @@ -668,3 +785,4 @@ makeContrast <- function(contrasts, parameterNames) {
ex, " to character"
)
}

30 changes: 20 additions & 10 deletions R/msqrob.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,29 @@ msqrobLm <- function(y,
robust = TRUE,
maxitRob = 5) {
myDesign <- model.matrix(formula, data)
missingSamples <- colSums(!is.na(y)) == 0
subsettedDesign <- myDesign[!missingSamples, , drop = FALSE]
stopifnot("The provided model must be full rank" = checkFullRank(subsettedDesign))
paramNames <- colnames(myDesign)
models <- apply(y, 1,
function(y, design) {
## computatability check
obs <- is.finite(y)
type <- "fitError"

referencePresent <- checkReference(y, data, paramNames, formula)
model <- list(
coefficients = NA, vcovUnscaled = NA,
sigma = NA, df.residual = NA, w = NA
referencePresent = referencePresent, coefficients = NA,
vcovUnscaled = NA, sigma = NA,
df.residual = NA, w = NA
)

if (sum(obs) > 0) {
## subset to finite observations, attention with R column switching
X <- design[obs, , drop = FALSE]
X <- X[,colMeans(X == 0) != 1 , drop = FALSE]
qrX <- qr(X)
X <- X[, qrX$pivot[seq_len(qrX$rank)], drop = FALSE]
y <- y[obs]
colnames_orig <- colnames(design)

if (robust) {
## use robust regression from MASS package, "M" estimation is used
Expand Down Expand Up @@ -108,16 +115,17 @@ msqrobLm <- function(y,
}

if (type != "fitError") {
coef <- rep(NA, length(colnames_orig))
names(coef) <- colnames_orig
coef <- rep(NA, length(paramNames))
names(coef) <- paramNames
coef[names(mod$coef)] <- mod$coef
vcovUnscaled <- matrix(NA, nrow =length(colnames_orig), ncol = length(colnames_orig))
rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- colnames_orig
vcovUnscaled <- matrix(NA, nrow =length(paramNames), ncol = length(paramNames))
rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- paramNames
vcovUnscaled[names(mod$coef), names(mod$coef)] <- msqrob2:::.vcovUnscaled(mod)

model <- list(
coefficients = mod$coef,
vcovUnscaled = .vcovUnscaled(mod),
referencePresent = referencePresent,
coefficients = coef,
vcovUnscaled = vcovUnscaled,
sigma = sigma,
cvanderaa marked this conversation as resolved.
Show resolved Hide resolved
df.residual = df.residual,
w = w
Expand Down Expand Up @@ -770,3 +778,5 @@ msqrobGlm <- function(y,

return(models)
}


27 changes: 19 additions & 8 deletions R/topFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#' to statistical significance.
#' @param alpha `numeric` specifying the cutoff value for adjusted p-values.
#' Only features with lower p-values are listed.
#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
#' parameters with modified reference levels are returned. Watch out putting this
#' parameter to TRUE can change the interpretation of the logFC. Default is FALSE.
#'
#' @examples
#' data(pe)
Expand All @@ -43,29 +46,37 @@
#' @author Lieven Clement
#' @export

topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1) {
topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) {
if (is(contrast, "matrix")) {
if (ncol(contrast) > 1) {
stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
}
if (ncol(contrast) > 1) {
stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
}
}

contrast <- contrast[contrast !=0]
contrast <- contrast[contrast != 0, , drop = FALSE]
logFC <- vapply(models,
getContrast,
numeric(1),
L = contrast
L = contrast,
acceptDifferentReference = acceptDifferentReference
)
se <- sqrt(vapply(models,
varContrast,
numeric(1),
L = contrast
L = contrast,
acceptDifferentReference = acceptDifferentReference
))
df <- vapply(models, getDfPosterior, numeric(1))
t <- logFC / se
pval <- pt(-abs(t), df) * 2
adjPval <- p.adjust(pval, method = adjust.method)

out <- data.frame(logFC, se, df, t, pval, adjPval)
if (acceptDifferentReference) {
out[["DifferentReference"]] <- vapply(models, function(y){
any(!getReferencePresent(y)[rownames(contrast)])},
logical(1)
)
}
if (sort) {
if (alpha < 1) {
ids <- adjPval < alpha
Expand Down
26 changes: 26 additions & 0 deletions man/checkReference.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading