From 6063604458222d417ce9715e9db248a350d33bfa Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 22 Aug 2024 11:17:30 +0200
Subject: [PATCH 01/13] [FEAT] - implementation of a referencePresent slot in
 the params of statModel (msqrobLm)

---
 R/msqrob-utils.R | 35 +++++++++++++++++++++++++++++++++++
 R/msqrob.R       | 27 +++++++++++++++++----------
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 9449e32..1f76317 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -86,6 +86,40 @@ makeContrast <- function(contrasts, parameterNames) {
     return(t(.chrlinfct2matrix(contrasts, parameterNames)$K))
 }
 
+
+#' Check for presence of reference levels
+#' @description Check if the reference levels of the factors in the model are present for a specific feature.
+#'
+#' @param yFeature A numeric vector with the feature values for each sample.
+#'
+#' @param data A `DataFrame` with information on the design. It has
+#'        the same number of rows as the number of columns (samples) of
+#'        `y`.
+#' 
+#' @param formula Model formula. The model is built based on the
+#'        covariates in the data object.
+#'
+#' @param paramNames character vector specifying the model parameters that are present in the global model matrix
+#' 
+#' @return A logical vector indicating for each parameter in the model if his reference level has values for the current feature.
+#' 
+#' @rdname checkReference
+#'
+checkReference <- function(yFeature, data, formula, paramNames) {
+  vars <- all.vars(formula)
+  paramRef <- c(rep(NA, length(paramNames)))
+  names(params) <- paramNames
+  subset <- data[!is.na(yFeature), vars, drop = FALSE]
+  for (x in vars){
+    reference <- levels(data[[x]])[1]
+    design <- model.matrix(as.formula(paste("~ -1 +", x)), data = subset)
+    referencePresent <- sum(design[,paste0(x, reference)]) != 0
+    paramRef[grep(x, paramNames, value = FALSE)] <- referencePresent
+  }
+  return(paramRef)
+}
+
+
 ##### None exported functions from multcomp package is included here to
 ##### During R and Bioc checks
 
@@ -668,3 +702,4 @@ makeContrast <- function(contrasts, parameterNames) {
         ex, " to character"
     )
 }
+
diff --git a/R/msqrob.R b/R/msqrob.R
index 91ad805..0b25e78 100644
--- a/R/msqrob.R
+++ b/R/msqrob.R
@@ -57,22 +57,26 @@ msqrobLm <- function(y,
     robust = TRUE,
     maxitRob = 5) {
     myDesign <- model.matrix(formula, data)
+    paramNames <- colnames(myDesign)
     models <- apply(y, 1,
         function(y, design) {
             ## computatability check
             obs <- is.finite(y)
             type <- "fitError"
+
+            referencePresent <- checkReference(y, data, formula, colnames(myDesign))
             model <- list(
-                coefficients = NA, vcovUnscaled = NA,
-                sigma = NA, df.residual = NA, w = NA
+                referencePresent = referencePresent, coefficients = NA,
+                vcovUnscaled = NA, sigma = NA,
+                df.residual = NA, w = NA
             )
 
             if (sum(obs) > 0) {
                 ## subset to finite observations, attention with R column switching
                 X <- design[obs, , drop = FALSE]
-                X <- X[,colMeans(X == 0) != 1 , drop = FALSE]
+                qrX <- qr(X)
+                X <- X[, qrX$pivot[seq_len(qrX$rank)], drop = FALSE]
                 y <- y[obs]
-                colnames_orig <- colnames(design)
 
                 if (robust) {
                     ## use robust regression from MASS package, "M" estimation is used
@@ -108,16 +112,17 @@ msqrobLm <- function(y,
                 }
 
                 if (type != "fitError") {
-                    coef <- rep(NA, length(colnames_orig))
-                    names(coef) <- colnames_orig
+                    coef <- rep(NA, length(paramNames))
+                    names(coef) <- paramNames
                     coef[names(mod$coef)] <- mod$coef
-                    vcovUnscaled <- matrix(NA, nrow =length(colnames_orig), ncol = length(colnames_orig))
-                    rownames(vcovUnscaled) <- colnames(vcovUnscaled) <-  colnames_orig
+                    vcovUnscaled <- matrix(NA, nrow =length(paramNames), ncol = length(paramNames))
+                    rownames(vcovUnscaled) <- colnames(vcovUnscaled) <-  paramNames
                     vcovUnscaled[names(mod$coef), names(mod$coef)] <- msqrob2:::.vcovUnscaled(mod)
 
                     model <- list(
-                        coefficients = mod$coef,
-                        vcovUnscaled = .vcovUnscaled(mod),
+                        referencePresent = referencePresent,
+                        coefficients = coef,
+                        vcovUnscaled = vcovUnscaled,
                         sigma = sigma,
                         df.residual = df.residual,
                         w = w
@@ -770,3 +775,5 @@ msqrobGlm <- function(y,
 
     return(models)
 }
+
+

From 7df1cff51169efd219a6b3ca6fe2c642ac97a446 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 22 Aug 2024 15:53:14 +0200
Subject: [PATCH 02/13] [FEAT] - add option to not return the contrast that
 involve parameters for which the reference level dropped

---
 R/StatModel-methods.R | 16 ++++++++++++++--
 R/accessors.R         |  8 ++++++++
 R/allGenerics.R       |  6 ++++--
 R/msqrob-utils.R      | 10 +++++-----
 R/topFeatures.R       | 14 +++++++++-----
 5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R
index d2b3d84..9c0c8b7 100644
--- a/R/StatModel-methods.R
+++ b/R/StatModel-methods.R
@@ -11,6 +11,10 @@
 #'        the linear model coefficients to be tested equal to zero.
 #'        The rownames of the matrix should be equal to the names of
 #'        parameters of the model.
+#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
+#'        parameters with modified reference levels are returned. Watch out putting this
+#'        parameter to TRUE can change the interpretation of the logFC. Default is FALSE.
+#'
 #' @examples
 #' data(pe)
 #' # Aggregate peptide intensities in protein expression values
@@ -33,8 +37,12 @@
 
 setMethod(
     "getContrast", "StatModel",
-    function(object, L) {
+    function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
+        referencePresent <- object@params$referencePresent
+        if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
+            return(NA)
+        }
         coefs <- getCoef(object)
         out <- matrix(rep(NA, ncol(L)))
         rownames(out) <- colnames(L)
@@ -49,8 +57,12 @@ setMethod(
 
 setMethod(
     "varContrast", "StatModel",
-    function(object, L) {
+    function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
+        referencePresent <- object@params$referencePresent
+        if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
+            return(NA)
+        }
         out <- matrix(NA, ncol(L), ncol(L))
         rownames(out) <- colnames(out) <- colnames(L)
         vcovTmp <- getVcovUnscaled(object) * object@varPosterior
diff --git a/R/accessors.R b/R/accessors.R
index 5631dd2..a1322ca 100644
--- a/R/accessors.R
+++ b/R/accessors.R
@@ -14,6 +14,8 @@
 #'              \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation}
 #'              \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix
 #'                of the model parameters}
+#'              \item{getReferencePresent(object)}{to get the vector of logicals indicating if the
+#'               reference level associated with a model parameter is present}
 #'              }
 #'
 #' @rdname statModelAccessors
@@ -95,3 +97,9 @@ setMethod("getVcovUnscaled",
     signature = "StatModel",
     definition = function(object) object@params$vcovUnscaled
 )
+
+#' @rdname statModelAccessors
+setMethod("getReferencePresent",
+    signature = "StatModel",
+    definition = function(object) object@params$referencePresent
+)
\ No newline at end of file
diff --git a/R/allGenerics.R b/R/allGenerics.R
index 8da7437..24da472 100644
--- a/R/allGenerics.R
+++ b/R/allGenerics.R
@@ -5,7 +5,7 @@ setGeneric("getFitMethod", function(object) standardGeneric("getFitMethod"))
 #' @export
 setGeneric("getCoef", function(object) standardGeneric("getCoef"))
 #' @export
-setGeneric("varContrast", function(object, ...) standardGeneric("varContrast"))
+setGeneric("varContrast", function(object, L, acceptDifferentReference) standardGeneric("varContrast"))
 #' @export
 setGeneric("getVar", function(object) standardGeneric("getVar"))
 #' @export
@@ -19,7 +19,9 @@ setGeneric("getVarPosterior", function(object) standardGeneric("getVarPosterior"
 #' @export
 setGeneric("getSigmaPosterior", function(object) standardGeneric("getSigmaPosterior"))
 #' @export
-setGeneric("getContrast", function(object, L) standardGeneric("getContrast"))
+setGeneric("getReferencePresent", function(object) standardGeneric("getReferencePresent"))
+#' @export
+setGeneric("getContrast", function(object, L, acceptDifferentReference) standardGeneric("getContrast"))
 #' @export
 setGeneric("getVcovUnscaled", function(object) standardGeneric("getVcovUnscaled"))
 #' @export
diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 1f76317..72eb3ad 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -107,16 +107,16 @@ makeContrast <- function(contrasts, parameterNames) {
 #'
 checkReference <- function(yFeature, data, formula, paramNames) {
   vars <- all.vars(formula)
-  paramRef <- c(rep(NA, length(paramNames)))
-  names(params) <- paramNames
+  referencePresent <- c(rep(NA, length(paramNames)))
+  names(referencePresent) <- paramNames
   subset <- data[!is.na(yFeature), vars, drop = FALSE]
   for (x in vars){
     reference <- levels(data[[x]])[1]
     design <- model.matrix(as.formula(paste("~ -1 +", x)), data = subset)
-    referencePresent <- sum(design[,paste0(x, reference)]) != 0
-    paramRef[grep(x, paramNames, value = FALSE)] <- referencePresent
+    varPres <- sum(design[,paste0(x, reference)]) != 0
+    referencePresent[grep(x, paramNames, value = FALSE)] <- varPres
   }
-  return(paramRef)
+  return(referencePresent)
 }
 
 
diff --git a/R/topFeatures.R b/R/topFeatures.R
index e1f4d6e..f6a69c0 100644
--- a/R/topFeatures.R
+++ b/R/topFeatures.R
@@ -18,6 +18,9 @@
 #'        to statistical significance.
 #' @param alpha `numeric` specifying the cutoff value for adjusted p-values.
 #'        Only features with lower p-values are listed.
+#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
+#'        parameters with modified reference levels are returned. Watch out putting this
+#'        parameter to TRUE can change the interpretation of the logFC. Default is FALSE.
 #'
 #' @examples
 #' data(pe)
@@ -43,23 +46,24 @@
 #' @author Lieven Clement
 #' @export
 
-topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1) {
+topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) {
     if (is(contrast, "matrix")) {
           if (ncol(contrast) > 1) {
                 stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
             }
     }
-  
-    contrast <- contrast[contrast !=0]
+    contrast <- contrast[contrast != 0, , drop = FALSE]
     logFC <- vapply(models,
         getContrast,
         numeric(1),
-        L = contrast
+        L = contrast,
+        acceptDifferentReference = acceptDifferentReference
     )
     se <- sqrt(vapply(models,
         varContrast,
         numeric(1),
-        L = contrast
+        L = contrast,
+        acceptDifferentReference = acceptDifferentReference
     ))
     df <- vapply(models, getDfPosterior, numeric(1))
     t <- logFC / se

From 67cf13a96025927292ab974233f3d8b8ad372111 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Fri, 23 Aug 2024 12:58:24 +0200
Subject: [PATCH 03/13] [FEAT] - changing the logic behind the creation of
 "referencePresent"

---
 R/StatModel-methods.R      |  4 +--
 R/allGenerics.R            |  4 +--
 R/hypothesisTest-methods.R |  8 +++---
 R/msqrob-utils.R           | 53 +++++++++++++++++++++++++++++---------
 R/msqrob.R                 |  3 ++-
 R/topFeatures.R            |  7 ++---
 6 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R
index 9c0c8b7..e1b4c08 100644
--- a/R/StatModel-methods.R
+++ b/R/StatModel-methods.R
@@ -39,7 +39,7 @@ setMethod(
     "getContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
-        referencePresent <- object@params$referencePresent
+        referencePresent <- getReferencePresent(object)
         if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
             return(NA)
         }
@@ -59,7 +59,7 @@ setMethod(
     "varContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
-        referencePresent <- object@params$referencePresent
+        referencePresent <- getReferencePresent(object)
         if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
             return(NA)
         }
diff --git a/R/allGenerics.R b/R/allGenerics.R
index 24da472..d741ef4 100644
--- a/R/allGenerics.R
+++ b/R/allGenerics.R
@@ -5,7 +5,7 @@ setGeneric("getFitMethod", function(object) standardGeneric("getFitMethod"))
 #' @export
 setGeneric("getCoef", function(object) standardGeneric("getCoef"))
 #' @export
-setGeneric("varContrast", function(object, L, acceptDifferentReference) standardGeneric("varContrast"))
+setGeneric("varContrast", function(object, ...) standardGeneric("varContrast"))
 #' @export
 setGeneric("getVar", function(object) standardGeneric("getVar"))
 #' @export
@@ -21,7 +21,7 @@ setGeneric("getSigmaPosterior", function(object) standardGeneric("getSigmaPoster
 #' @export
 setGeneric("getReferencePresent", function(object) standardGeneric("getReferencePresent"))
 #' @export
-setGeneric("getContrast", function(object, L, acceptDifferentReference) standardGeneric("getContrast"))
+setGeneric("getContrast", function(object, ...) standardGeneric("getContrast"))
 #' @export
 setGeneric("getVcovUnscaled", function(object) standardGeneric("getVcovUnscaled"))
 #' @export
diff --git a/R/hypothesisTest-methods.R b/R/hypothesisTest-methods.R
index c79e40c..314756e 100644
--- a/R/hypothesisTest-methods.R
+++ b/R/hypothesisTest-methods.R
@@ -122,16 +122,16 @@ setMethod(
     adjust.method = "BH",
     modelColumn = "msqrobModels",
     resultsColumnNamePrefix = "",
-    overwrite = FALSE) {
+    overwrite = FALSE,
+    acceptDifferentReference = FALSE) {
         if (!(modelColumn %in% colnames(rowData(object)))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of the SummarizedExperiment object")
         if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults"
         if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast))
         if ((sum(paste0(resultsColumnNamePrefix, colnames(contrast)) %in% colnames(rowData(object))) > 0) & !overwrite) stop("There is/are already column(s) with names starting with\'", resultsColumnNamePrefix, "\' in the rowData of the SummarizedExperiment object, set the argument overwrite=TRUE to replace the column(s) with the new results or use another name for the argument resultsColumnNamePrefix")
         for (j in seq_len(ncol(contrast)))
         {
-            contrHlp <- contrast[, j]
-            names(contrHlp) <- rownames(contrast)
-            rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
+            contrHlp <- contrast[, j, drop = FALSE]
+            rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
         }
         return(object)
     }
diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 72eb3ad..f80940c 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -90,7 +90,7 @@ makeContrast <- function(contrasts, parameterNames) {
 #' Check for presence of reference levels
 #' @description Check if the reference levels of the factors in the model are present for a specific feature.
 #'
-#' @param yFeature A numeric vector with the feature values for each sample.
+#' @param y A numeric vector with the feature values for each sample.
 #'
 #' @param data A `DataFrame` with information on the design. It has
 #'        the same number of rows as the number of columns (samples) of
@@ -105,18 +105,47 @@ makeContrast <- function(contrasts, parameterNames) {
 #' 
 #' @rdname checkReference
 #'
-checkReference <- function(yFeature, data, formula, paramNames) {
-  vars <- all.vars(formula)
-  referencePresent <- c(rep(NA, length(paramNames)))
-  names(referencePresent) <- paramNames
-  subset <- data[!is.na(yFeature), vars, drop = FALSE]
-  for (x in vars){
-    reference <- levels(data[[x]])[1]
-    design <- model.matrix(as.formula(paste("~ -1 +", x)), data = subset)
-    varPres <- sum(design[,paste0(x, reference)]) != 0
-    referencePresent[grep(x, paramNames, value = FALSE)] <- varPres
+checkReference <- function(y, data, referenceLevels) {
+  subset <- droplevels(data[!is.na(y), names(referenceLevels), drop = FALSE])
+  referencePresent <- list()
+  paramToVar <- list()
+  for (x in names(referenceLevels)){
+    paramNames <- paste0(x, levels(data[, x])[-1])
+    for (param in paramNames){
+      referencePresent[[param]] <- NA
+    }
+    paramToVar[[x]] <- paramNames
+  }
+  for (x in names(referenceLevels)){
+    if (!(referenceLevels[[x]] %in% levels(subset[, x]))){
+        referencePresent[names(referencePresent) %in% paramToVar[[x]]] <- FALSE
+    } else {
+        referencePresent[names(referencePresent) %in% paramToVar[[x]]] <- TRUE
+    }
   }
-  return(referencePresent)
+  return(unlist(referencePresent))
+}
+
+#' Get reference levels factors
+#' @description Get the reference levels of the factors of a data frame that are present in a formula
+#'
+#' @param dataFrame A `data.frame` that contains factors
+#' 
+#' @param formula A formula object associated with the data frame (the variables in the formula should be present in the data frame)
+#'
+#' @return A named list with the reference levels of the factors (named by the factor name)
+#' 
+#' @rdname getReferenceLevels
+#' 
+getReferenceLevels <- function(dataFrame, formula){
+    referenceLevels <- list()
+    vars <- all.vars(formula)
+    for (x in vars){
+        if (is.factor(dataFrame[, x])){
+            referenceLevels[[x]] <- levels(dataFrame[, x])[1]
+        }
+    }
+    return(unlist(referenceLevels))
 }
 
 
diff --git a/R/msqrob.R b/R/msqrob.R
index 0b25e78..1dead5f 100644
--- a/R/msqrob.R
+++ b/R/msqrob.R
@@ -58,13 +58,14 @@ msqrobLm <- function(y,
     maxitRob = 5) {
     myDesign <- model.matrix(formula, data)
     paramNames <- colnames(myDesign)
+    referenceLevels <- getReferenceLevels(data, formula)
     models <- apply(y, 1,
         function(y, design) {
             ## computatability check
             obs <- is.finite(y)
             type <- "fitError"
 
-            referencePresent <- checkReference(y, data, formula, colnames(myDesign))
+            referencePresent <- checkReference(y, data, referenceLevels)
             model <- list(
                 referencePresent = referencePresent, coefficients = NA,
                 vcovUnscaled = NA, sigma = NA,
diff --git a/R/topFeatures.R b/R/topFeatures.R
index f6a69c0..db0a279 100644
--- a/R/topFeatures.R
+++ b/R/topFeatures.R
@@ -47,10 +47,11 @@
 #' @export
 
 topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) {
+    print(contrast)
     if (is(contrast, "matrix")) {
-          if (ncol(contrast) > 1) {
-                stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
-            }
+        if (ncol(contrast) > 1) {
+            stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
+        }
     }
     contrast <- contrast[contrast != 0, , drop = FALSE]
     logFC <- vapply(models,

From 4910d4afaf5eadec15689c7efe45922d59c394c9 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Fri, 23 Aug 2024 13:23:07 +0200
Subject: [PATCH 04/13] [FEAT] - new column to topFeature output when
 acceptDifferentReference is TRUE

---
 R/StatModel-methods.R |  6 ++----
 R/msqrob-utils.R      | 22 ++++++++++++++++++++++
 R/topFeatures.R       |  7 +++++++
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R
index e1b4c08..85a43e4 100644
--- a/R/StatModel-methods.R
+++ b/R/StatModel-methods.R
@@ -39,8 +39,7 @@ setMethod(
     "getContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
-        referencePresent <- getReferencePresent(object)
-        if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
+        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
             return(NA)
         }
         coefs <- getCoef(object)
@@ -59,8 +58,7 @@ setMethod(
     "varContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
-        referencePresent <- getReferencePresent(object)
-        if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
+        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
             return(NA)
         }
         out <- matrix(NA, ncol(L), ncol(L))
diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index f80940c..1995835 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -148,6 +148,28 @@ getReferenceLevels <- function(dataFrame, formula){
     return(unlist(referenceLevels))
 }
 
+#' Check if the reference levels associated with the contrast are present
+#' @description Check if the reference levels associated with the contrast are present
+#'
+#' @param referencePresent A logical vector indicating for each parameter in the model 
+#' if his reference level has values for the current feature.
+#'
+#' @param L A numeric contrast matrix with rownames that equal the model parameters 
+#' that are involved in the contrasts
+#'
+#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
+#' parameters with modified reference levels are returned.
+#' 
+#' @return A boolean
+#' 
+#' @rdname referenceContrast
+#' 
+referenceContrast <- function(referencePresent, L, acceptDifferentReference) {
+  if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
+    return(TRUE)
+  }
+  return(FALSE)
+}
 
 ##### None exported functions from multcomp package is included here to
 ##### During R and Bioc checks
diff --git a/R/topFeatures.R b/R/topFeatures.R
index db0a279..55d1d67 100644
--- a/R/topFeatures.R
+++ b/R/topFeatures.R
@@ -70,7 +70,14 @@ topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alp
     t <- logFC / se
     pval <- pt(-abs(t), df) * 2
     adjPval <- p.adjust(pval, method = adjust.method)
+
     out <- data.frame(logFC, se, df, t, pval, adjPval)
+    if (acceptDifferentReference) {
+        out[["DifferentReference"]] <- vapply(models, function(y){
+            any(!getReferencePresent(y)[rownames(contrast)])},
+            logical(1)
+        )
+    }
     if (sort) {
         if (alpha < 1) {
             ids <- adjPval < alpha

From 414f5102c7955bc871d48fdfed3f79f18d8d323b Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Fri, 23 Aug 2024 14:31:37 +0200
Subject: [PATCH 05/13] [TEST] - test for msqrob-utils (check reference)

---
 DESCRIPTION                        |  2 +-
 NAMESPACE                          |  1 +
 R/msqrob-utils.R                   |  8 ++--
 man/checkReference.Rd              | 26 ++++++++++++
 man/getReferenceLevels.Rd          | 19 +++++++++
 man/hypothesisTest.Rd              |  3 +-
 man/msqrobLmer.Rd                  |  2 +-
 man/referenceContrast.Rd           | 24 +++++++++++
 man/statModelAccessors.Rd          |  5 +++
 man/statModelMethods.Rd            |  8 +++-
 man/topTable.Rd                    | 13 +++++-
 tests/testthat/test_msqrob-utils.R | 65 ++++++++++++++++++++++++++++++
 12 files changed, 165 insertions(+), 11 deletions(-)
 create mode 100644 man/checkReference.Rd
 create mode 100644 man/getReferenceLevels.Rd
 create mode 100644 man/referenceContrast.Rd
 create mode 100644 tests/testthat/test_msqrob-utils.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 04e8e33..26fe335 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -82,7 +82,7 @@ Collate: 'msqrob-framework.R' 'allGenerics.R'
 Encoding: UTF-8
 VignetteBuilder: knitr
 Roxygen: list(markdown=TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 biocViews:
     Proteomics,
     MassSpectrometry,
diff --git a/NAMESPACE b/NAMESPACE
index 6303259..034bf05 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ export(getDF)
 export(getDfPosterior)
 export(getFitMethod)
 export(getModel)
+export(getReferencePresent)
 export(getSigma)
 export(getSigmaPosterior)
 export(getVar)
diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 1995835..8effebc 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -96,12 +96,10 @@ makeContrast <- function(contrasts, parameterNames) {
 #'        the same number of rows as the number of columns (samples) of
 #'        `y`.
 #' 
-#' @param formula Model formula. The model is built based on the
-#'        covariates in the data object.
-#'
-#' @param paramNames character vector specifying the model parameters that are present in the global model matrix
+#' @param referenceLevels A named list with the reference levels of the factors (named by the factor name)
 #' 
-#' @return A logical vector indicating for each parameter in the model if his reference level has values for the current feature.
+#' @return A logical vector indicating for each parameter (that comes from a factor variable) in the model 
+#' if his reference level has values for the current feature.
 #' 
 #' @rdname checkReference
 #'
diff --git a/man/checkReference.Rd b/man/checkReference.Rd
new file mode 100644
index 0000000..bdc0b13
--- /dev/null
+++ b/man/checkReference.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msqrob-utils.R
+\name{checkReference}
+\alias{checkReference}
+\title{Check for presence of reference levels}
+\usage{
+checkReference(y, data, referenceLevels)
+}
+\arguments{
+\item{y}{A numeric vector with the feature values for each sample.}
+
+\item{data}{A \code{DataFrame} with information on the design. It has
+the same number of rows as the number of columns (samples) of
+\code{y}.}
+
+\item{formula}{Model formula. The model is built based on the
+covariates in the data object.}
+
+\item{paramNames}{character vector specifying the model parameters that are present in the global model matrix}
+}
+\value{
+A logical vector indicating for each parameter in the model if his reference level has values for the current feature.
+}
+\description{
+Check if the reference levels of the factors in the model are present for a specific feature.
+}
diff --git a/man/getReferenceLevels.Rd b/man/getReferenceLevels.Rd
new file mode 100644
index 0000000..ac0a9e7
--- /dev/null
+++ b/man/getReferenceLevels.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msqrob-utils.R
+\name{getReferenceLevels}
+\alias{getReferenceLevels}
+\title{Get reference levels factors}
+\usage{
+getReferenceLevels(dataFrame, formula)
+}
+\arguments{
+\item{dataFrame}{A \code{data.frame} that contains factors}
+
+\item{formula}{A formula object associated with the data frame (the variables in the formula should be present in the data frame)}
+}
+\value{
+A named list with the reference levels of the factors (named by the factor name)
+}
+\description{
+Get the reference levels of the factors of a data frame that are present in a formula
+}
diff --git a/man/hypothesisTest.Rd b/man/hypothesisTest.Rd
index e02d4e1..12ba79e 100644
--- a/man/hypothesisTest.Rd
+++ b/man/hypothesisTest.Rd
@@ -16,7 +16,8 @@ expression analysis}
   adjust.method = "BH",
   modelColumn = "msqrobModels",
   resultsColumnNamePrefix = "",
-  overwrite = FALSE
+  overwrite = FALSE,
+  acceptDifferentReference = FALSE
 )
 
 \S4method{hypothesisTestHurdle}{SummarizedExperiment}(
diff --git a/man/msqrobLmer.Rd b/man/msqrobLmer.Rd
index 7d42f81..c025a3c 100644
--- a/man/msqrobLmer.Rd
+++ b/man/msqrobLmer.Rd
@@ -90,7 +90,7 @@ pe <- aggregateFeatures(pe, i = "peptide", fcol = "Proteins", name = "protein")
 
 # Fit MSqrob model using robust ridge regression upon summarization of
 # peptide intensities into protein expression values
-modelsRidge <- msqrobLmer(assay(pe[["protein"]]), ~condition, data = colData(pe), 
+modelsRidge <- msqrobLmer(assay(pe[["protein"]]), ~condition, data = colData(pe),
                           ridge = TRUE)
 getCoef(modelsRidge[[1]])
 
diff --git a/man/referenceContrast.Rd b/man/referenceContrast.Rd
new file mode 100644
index 0000000..966d2bb
--- /dev/null
+++ b/man/referenceContrast.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msqrob-utils.R
+\name{referenceContrast}
+\alias{referenceContrast}
+\title{Check if the reference levels associated with the contrast are present}
+\usage{
+referenceContrast(referencePresent, L, acceptDifferentReference)
+}
+\arguments{
+\item{referencePresent}{A logical vector indicating for each parameter in the model
+if his reference level has values for the current feature.}
+
+\item{L}{A numeric contrast matrix with rownames that equal the model parameters
+that are involved in the contrasts}
+
+\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve
+parameters with modified reference levels are returned.}
+}
+\value{
+A boolean
+}
+\description{
+Check if the reference levels associated with the contrast are present
+}
diff --git a/man/statModelAccessors.Rd b/man/statModelAccessors.Rd
index 0a2f04e..8568404 100644
--- a/man/statModelAccessors.Rd
+++ b/man/statModelAccessors.Rd
@@ -22,6 +22,7 @@
 \alias{getVar,StatModel-method}
 \alias{getSigma,StatModel-method}
 \alias{getVcovUnscaled,StatModel-method}
+\alias{getReferencePresent,StatModel-method}
 \title{Accessor functions for StatModel class}
 \usage{
 \S4method{getModel}{StatModel}(object)
@@ -43,6 +44,8 @@
 \S4method{getSigma}{StatModel}(object)
 
 \S4method{getVcovUnscaled}{StatModel}(object)
+
+\S4method{getReferencePresent}{StatModel}(object)
 }
 \arguments{
 \item{object}{\code{StatModel} object}
@@ -65,6 +68,8 @@ the empirical Bayes variance estimator}
 \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation}
 \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix
 of the model parameters}
+\item{getReferencePresent(object)}{to get the vector of logicals indicating if the
+reference level associated with a model parameter is present}
 }
 }
 \examples{
diff --git a/man/statModelMethods.Rd b/man/statModelMethods.Rd
index 9eeae08..a4f2449 100644
--- a/man/statModelMethods.Rd
+++ b/man/statModelMethods.Rd
@@ -9,9 +9,9 @@
 \alias{varContrast,StatModel-method}
 \title{Methods for StatModel class}
 \usage{
-\S4method{getContrast}{StatModel}(object, L)
+\S4method{getContrast}{StatModel}(object, L, acceptDifferentReference = FALSE)
 
-\S4method{varContrast}{StatModel}(object, L)
+\S4method{varContrast}{StatModel}(object, L, acceptDifferentReference = FALSE)
 }
 \arguments{
 \item{object}{A list with elements of the class \code{StatModel} that are
@@ -21,6 +21,10 @@ estimated using the \code{\link{msqrob}} function}
 the linear model coefficients to be tested equal to zero.
 The rownames of the matrix should be equal to the names of
 parameters of the model.}
+
+\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve
+parameters with modified reference levels are returned. Watch out putting this
+parameter to TRUE can change the interpretation of the logFC. Default is FALSE.}
 }
 \value{
 A matrix with the calculated contrasts or variance-covariance matrix of contrasts
diff --git a/man/topTable.Rd b/man/topTable.Rd
index 8d3cc76..cc21cc5 100644
--- a/man/topTable.Rd
+++ b/man/topTable.Rd
@@ -4,7 +4,14 @@
 \alias{topFeatures}
 \title{Toplist of DE proteins, peptides or features}
 \usage{
-topFeatures(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1)
+topFeatures(
+  models,
+  contrast,
+  adjust.method = "BH",
+  sort = TRUE,
+  alpha = 1,
+  acceptDifferentReference = FALSE
+)
 }
 \arguments{
 \item{models}{A list with elements of the class \code{StatModel} that are
@@ -27,6 +34,10 @@ to statistical significance.}
 
 \item{alpha}{\code{numeric} specifying the cutoff value for adjusted p-values.
 Only features with lower p-values are listed.}
+
+\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve
+parameters with modified reference levels are returned. Watch out putting this
+parameter to TRUE can change the interpretation of the logFC. Default is FALSE.}
 }
 \value{
 A dataframe with log2 fold changes (logFC), standard errors (se),
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
new file mode 100644
index 0000000..a5d0a94
--- /dev/null
+++ b/tests/testthat/test_msqrob-utils.R
@@ -0,0 +1,65 @@
+.create_minimal_data <- function() {
+    y_no_ref <- c(rep(NA,5), rep(1,10))
+    names(y_no_ref) <- 1:15
+    y_ref <- c(rep(1,5), rep(1,5), rep(1,5))
+    names(y_ref) <- 1:15
+    data_no_ref <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
+        numerical = c(1:5, 1:5, 1:5),
+        row.names = 1:15)
+    form <- formula(~ 1 + condition + numerical, data = data)
+    form_num <- formula(~ 1 + numerical, data = data)
+    return(list(data = data, form = form,
+        form_num = form_num,
+        y_no_ref = y_no_ref,
+        y_ref = y_ref))
+}
+
+test_that("getReferenceLevels", {
+    data <- .create_minimal_data()$data
+    form <- .create_minimal_data()$form
+    form_num <- .create_minimal_data()$form_num
+
+    expect_identical(NULL, msqrob2:::getReferenceLevels(data, form_num))
+
+    referenceCond <- "a"
+    names(referenceCond) <- "condition"
+    expect_identical(referenceCond, msqrob2:::getReferenceLevels(data, form))
+})
+
+test_that("checkReference", {
+    data <- .create_minimal_data()$data
+    y_no_ref <- .create_minimal_data()$y_no_ref
+    y_ref <- .create_minimal_data()$y_ref
+
+    referenceCond <- "a"
+    names(referenceCond) <- "condition"
+
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, referenceCond))
+
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
+    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, referenceCond))
+
+    expect_identical(NULL, msqrob2:::checkReference(data, form, NULL))
+})
+
+test_that("referenceContrast", {
+    L <- matrix(c(1, 0), nrow = 2, byrow = TRUE)
+    colnames(L) <- "conditionb=0"
+    rownames(L) <- c("conditionb", "conditionc")
+
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+
+    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_no_ref, L, FALSE))
+
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
+
+    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_ref, L, FALSE))
+
+    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_no_ref, L, TRUE))
+    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_ref, L, TRUE))
+})
\ No newline at end of file

From dc0795e81edb4dbb7dd471ab198dc13d0e4def68 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Fri, 23 Aug 2024 16:41:41 +0200
Subject: [PATCH 06/13] [TEST] - add tests for msqrobLm and varContrast and
 getContrast

---
 R/StatModel-methods.R                   | 12 ++--
 tests/testthat/test_StatModel-methods.R | 83 +++++++++++++++++++++++++
 tests/testthat/test_msqrob-utils.R      |  4 +-
 tests/testthat/test_msqrob.R            | 28 +++++++++
 4 files changed, 119 insertions(+), 8 deletions(-)
 create mode 100644 tests/testthat/test_StatModel-methods.R
 create mode 100644 tests/testthat/test_msqrob.R

diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R
index 85a43e4..e9fea29 100644
--- a/R/StatModel-methods.R
+++ b/R/StatModel-methods.R
@@ -39,12 +39,12 @@ setMethod(
     "getContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
+        out <- matrix(rep(NA, ncol(L)))
+        rownames(out) <- colnames(L)
         if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
-            return(NA)
+            return(out)
         }
         coefs <- getCoef(object)
-        out <- matrix(rep(NA, ncol(L)))
-        rownames(out) <- colnames(L)
         hlp <- try(t(L) %*% coefs[rownames(L)], silent = TRUE)
         if (!is(hlp, "try-error")) out[] <- hlp
         return(out)
@@ -58,11 +58,11 @@ setMethod(
     "varContrast", "StatModel",
     function(object, L, acceptDifferentReference = FALSE) {
         if (!is(L, "matrix")) L <- as.matrix(L)
-        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
-            return(NA)
-        }
         out <- matrix(NA, ncol(L), ncol(L))
         rownames(out) <- colnames(out) <- colnames(L)
+        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
+            return(out)
+        }
         vcovTmp <- getVcovUnscaled(object) * object@varPosterior
         hlp <- try(t(L) %*% vcovTmp[rownames(L), rownames(L)] %*% L, silent = TRUE)
         if (!is(hlp, "try-error")) out[] <- hlp
diff --git a/tests/testthat/test_StatModel-methods.R b/tests/testthat/test_StatModel-methods.R
new file mode 100644
index 0000000..c74ab05
--- /dev/null
+++ b/tests/testthat/test_StatModel-methods.R
@@ -0,0 +1,83 @@
+.create_minimal_data <- function(){
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+    
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
+
+    vcovu_no_ref <- matrix(c(0.2, -0.2, NA, -0.2, 0.4, NA, NA, NA, NA), nrow = 3, byrow = TRUE)
+    colnames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    rownames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
+
+    vcovu_ref <- matrix(c(0.2, -0.2, -0.2, -0.2, 0.4, -0.2, -0.2, -0.2, 0.6 ), nrow = 3, byrow = TRUE)
+    colnames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    rownames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc")
+
+    stat_model_list <- list(feat1 = StatModel(
+        type = "lm",
+        params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = NA),
+            df.residual = 10, sigma = 0.5,
+            vcovUnscaled = vcovu_no_ref,
+            referencePresent = reference_present_no_ref),
+        varPosterior = 0.1,
+        dfPosterior = 6
+    ), feat2 = StatModel(
+        type = "lm",
+        params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = 3),
+            df.residual = 10, sigma = 0.5,
+            vcovUnscaled = vcovu_ref,
+            referencePresent = reference_present_ref),
+        varPosterior = 0.1,
+        dfPosterior = 6
+    ))
+    return(stat_model_list)
+}
+
+test_that("getContrast", {
+    stat_model_list <- .create_minimal_data()
+    L <- matrix(c(1, 0), nrow = 2, byrow = TRUE)
+    colnames(L) <- "conditionb=0"
+    rownames(L) <- c("conditionb", "conditionc")
+
+    res_NA <- matrix(NA)
+    rownames(res_NA) <- "conditionb=0"
+    res_NA_dbl <- matrix(as.double(NA))
+    rownames(res_NA_dbl) <- "conditionb=0"
+    expect_identical(res_NA, getContrast(stat_model_list$feat1, L))
+    expect_equal(res_NA_dbl, getContrast(stat_model_list$feat1, L, TRUE))
+
+    res_2 <- matrix(2)
+    rownames(res_2) <- "conditionb=0"
+
+    L_no_0 <- L[L != 0, , drop = FALSE] # What occurs in topFeatures
+    expect_identical(res_2, getContrast(stat_model_list$feat1, L_no_0, TRUE))
+
+    expect_identical(res_2, getContrast(stat_model_list$feat2, L))
+    expect_identical(res_2, getContrast(stat_model_list$feat2, L, TRUE))
+})
+
+test_that("varContrast", {
+    stat_model_list <- .create_minimal_data()
+    L <- matrix(c(1, 0), nrow = 2, byrow = TRUE)
+    colnames(L) <- "conditionb=0"
+    rownames(L) <- c("conditionb", "conditionc")
+
+    res_NA <- matrix(NA)
+    rownames(res_NA) <- "conditionb=0"
+    colnames(res_NA) <- "conditionb=0"
+
+    res_NA_dbl <- matrix(as.double(NA))
+    rownames(res_NA_dbl) <- "conditionb=0"
+    colnames(res_NA_dbl) <- "conditionb=0"
+    expect_equal(res_NA, varContrast(stat_model_list$feat1, L))
+    expect_equal(res_NA_dbl, varContrast(stat_model_list$feat1, L, TRUE))
+
+    res_2 <- matrix(0.04)
+    rownames(res_2) <- "conditionb=0"
+    colnames(res_2) <- "conditionb=0"
+    L_no_0 <- L[L != 0, , drop = FALSE] # What occurs in topFeatures
+    expect_equal(res_2, varContrast(stat_model_list$feat1, L_no_0, TRUE))
+
+    expect_equal(res_2, varContrast(stat_model_list$feat2, L))
+    expect_equal(res_2, varContrast(stat_model_list$feat2, L, TRUE))
+})
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index a5d0a94..5016fec 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -3,7 +3,7 @@
     names(y_no_ref) <- 1:15
     y_ref <- c(rep(1,5), rep(1,5), rep(1,5))
     names(y_ref) <- 1:15
-    data_no_ref <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
+    data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
         numerical = c(1:5, 1:5, 1:5),
         row.names = 1:15)
     form <- formula(~ 1 + condition + numerical, data = data)
@@ -42,7 +42,7 @@ test_that("checkReference", {
     names(reference_present_ref) <- c("conditionb", "conditionc")
     expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, referenceCond))
 
-    expect_identical(NULL, msqrob2:::checkReference(data, form, NULL))
+    expect_identical(NULL, msqrob2:::checkReference(data, data, NULL))
 })
 
 test_that("referenceContrast", {
diff --git a/tests/testthat/test_msqrob.R b/tests/testthat/test_msqrob.R
new file mode 100644
index 0000000..85717fb
--- /dev/null
+++ b/tests/testthat/test_msqrob.R
@@ -0,0 +1,28 @@
+.create_minimal_data <- function() {
+    y_no_ref <- c(rep(NA,5), runif(10))
+    y_ref <- c(runif(5), runif(5), runif(5))
+    Y <- matrix(c(y_no_ref, y_ref), nrow = 2, ncol = 15, byrow = TRUE)
+    rownames(Y) <- c("feat1", "feat2")
+    colnames(Y) <- paste0("S", 1:15)
+    data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
+        numerical = c(1:5, 1:5, 1:5),
+        row.names = paste0("S", 1:15))
+    form_cond <- formula(~ 1 + condition, data = data)
+    return(list(data = data, form_cond = form_cond, Y = Y))
+}
+
+test_that("msqrobLm", {
+    set.seed(123)
+    Y <- .create_minimal_data()$Y
+    data <- .create_minimal_data()$data
+    form_cond <- .create_minimal_data()$form_cond
+    msqrobLm_object <- msqrobLm(Y, form_cond, data, robust = FALSE)
+
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+    expect_identical(reference_present_no_ref, msqrobLm_object$feat1@params$referencePresent)
+
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
+    expect_identical(reference_present_ref, msqrobLm_object$feat2@params$referencePresent)
+    })

From 06025a4360b605f8d1d6140d2984cc19188a16fe Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Mon, 26 Aug 2024 09:33:07 +0200
Subject: [PATCH 07/13] [TEST] - add test for topFeatures

---
 R/topFeatures.R                   |  1 -
 tests/testthat/test_topFeatures.R | 60 +++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 tests/testthat/test_topFeatures.R

diff --git a/R/topFeatures.R b/R/topFeatures.R
index 55d1d67..98ead9b 100644
--- a/R/topFeatures.R
+++ b/R/topFeatures.R
@@ -47,7 +47,6 @@
 #' @export
 
 topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) {
-    print(contrast)
     if (is(contrast, "matrix")) {
         if (ncol(contrast) > 1) {
             stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
diff --git a/tests/testthat/test_topFeatures.R b/tests/testthat/test_topFeatures.R
new file mode 100644
index 0000000..a22b57b
--- /dev/null
+++ b/tests/testthat/test_topFeatures.R
@@ -0,0 +1,60 @@
+.create_minimal_data <- function(){
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+    
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
+
+    vcovu_no_ref <- matrix(c(0.2, -0.2, NA, -0.2, 0.4, NA, NA, NA, NA), nrow = 3, byrow = TRUE)
+    colnames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    rownames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
+
+    vcovu_ref <- matrix(c(0.2, -0.2, -0.2, -0.2, 0.4, -0.2, -0.2, -0.2, 0.6 ), nrow = 3, byrow = TRUE)
+    colnames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    rownames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc")
+
+    stat_model_list <- list(feat1 = StatModel(
+        type = "lm",
+        params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = NA),
+            df.residual = 10, sigma = 0.5,
+            vcovUnscaled = vcovu_no_ref,
+            referencePresent = reference_present_no_ref),
+        varPosterior = 0.1,
+        dfPosterior = 6
+    ), feat2 = StatModel(
+        type = "lm",
+        params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = 3),
+            df.residual = 10, sigma = 0.5,
+            vcovUnscaled = vcovu_ref,
+            referencePresent = reference_present_ref),
+        varPosterior = 0.1,
+        dfPosterior = 6
+    ))
+    return(stat_model_list)
+}
+
+
+testthat("topFeatures", {
+    stat_model_list <- .create_minimal_data()
+    L <- matrix(c(1, 0), nrow = 2, byrow = TRUE)
+    colnames(L) <- "conditionb=0"
+    rownames(L) <- c("conditionb", "conditionc")
+
+    base_df <- data.frame("logFC" = c(2,2), "se" = c(0.2, 0.2),
+        "df" = c(6,6), "t" = c(10,10),
+        "pval" = c(5.791983e-05, 5.791983e-05),
+        "adjPval" = c(5.791983e-05, 5.791983e-05),
+        row.names = c("feat1", "feat2"))
+    accept_df <- base_df
+    accept_df$DifferentReference <- c(TRUE, FALSE)
+    no_accept_df <- base_df[c(2,1),]
+    no_accept_df["feat1", ] <- NA
+    no_accept_df["feat1", "df"] <- 6
+
+    expect_equal(no_accept_df,
+        topFeatures(stat_model_list, L),
+        tolerance = 1e-3)
+    expect_equal(accept_df,
+        topFeatures(stat_model_list, L, acceptDifferentReference = TRUE),
+        tolerance = 1e-3)
+})
\ No newline at end of file

From b018bb0b6b7e87c510fa6c02d6a81f2f960424d1 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Wed, 28 Aug 2024 17:42:19 +0200
Subject: [PATCH 08/13] [FEAT] - Check that global design matrix is full rank +
 handle case in which the model has no intercept

---
 R/msqrob-utils.R                   | 47 +++++++++++++++++++-----------
 R/msqrob.R                         |  6 ++--
 tests/testthat/test_msqrob-utils.R | 27 +++++++++++------
 tests/testthat/test_msqrob.R       | 17 +++++++----
 tests/testthat/test_topFeatures.R  |  2 +-
 5 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 8effebc..2d22614 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -103,25 +103,31 @@ makeContrast <- function(contrasts, parameterNames) {
 #' 
 #' @rdname checkReference
 #'
-checkReference <- function(y, data, referenceLevels) {
-  subset <- droplevels(data[!is.na(y), names(referenceLevels), drop = FALSE])
-  referencePresent <- list()
-  paramToVar <- list()
-  for (x in names(referenceLevels)){
-    paramNames <- paste0(x, levels(data[, x])[-1])
-    for (param in paramNames){
+checkReference <- function(y, data, paramNames, formula) {
+    referenceLevels <- getReferenceLevels(data, formula)
+    factor <- names(referenceLevels)
+    subset <- droplevels(data[!is.na(y), factor, drop = FALSE])
+    referencePresent <- list()
+    for (param in paramNames) {
       referencePresent[[param]] <- NA
     }
-    paramToVar[[x]] <- paramNames
-  }
-  for (x in names(referenceLevels)){
-    if (!(referenceLevels[[x]] %in% levels(subset[, x]))){
-        referencePresent[names(referencePresent) %in% paramToVar[[x]]] <- FALSE
-    } else {
-        referencePresent[names(referencePresent) %in% paramToVar[[x]]] <- TRUE
+    paramSplit <- list()
+    for (x in factor) {
+        paramSplit[[x]] <- paramNames[grep(x, paramNames)]
     }
-  }
-  return(unlist(referencePresent))
+    for (x in factor){
+        noIntercept <- all(paste0(x, levels(as.factor(data[, x]))) %in% paramNames)
+        if (noIntercept || (referenceLevels[[x]] %in% levels(subset[, x]))) {
+            for (param in paramSplit[[x]]) {
+                referencePresent[[param]] <- TRUE
+            }
+        } else {
+            for (param in paramSplit[[x]]) {
+                referencePresent[[param]] <- FALSE
+            }
+        }
+    }
+    return(unlist(referencePresent))
 }
 
 #' Get reference levels factors
@@ -139,7 +145,10 @@ getReferenceLevels <- function(dataFrame, formula){
     referenceLevels <- list()
     vars <- all.vars(formula)
     for (x in vars){
-        if (is.factor(dataFrame[, x])){
+        if (is.factor(dataFrame[, x]) || is.character(dataFrame[, x])) {
+            if (is.character(dataFrame[, x])) {
+                dataFrame[, x] <- as.factor(dataFrame[, x])
+            }
             referenceLevels[[x]] <- levels(dataFrame[, x])[1]
         }
     }
@@ -169,6 +178,10 @@ referenceContrast <- function(referencePresent, L, acceptDifferentReference) {
   return(FALSE)
 }
 
+checkFullRank <- function(modelMatrix) {
+    return(qr(modelMatrix)$rank == ncol(modelMatrix))
+}
+
 ##### None exported functions from multcomp package is included here to
 ##### During R and Bioc checks
 
diff --git a/R/msqrob.R b/R/msqrob.R
index 1dead5f..a7e17b8 100644
--- a/R/msqrob.R
+++ b/R/msqrob.R
@@ -57,15 +57,17 @@ msqrobLm <- function(y,
     robust = TRUE,
     maxitRob = 5) {
     myDesign <- model.matrix(formula, data)
+    missingSamples <- colSums(!is.na(y)) == 0
+    subsettedDesign <- myDesign[!missingSamples, , drop = FALSE]
+    stopifnot("The provided model must be full rank" = checkFullRank(subsettedDesign))
     paramNames <- colnames(myDesign)
-    referenceLevels <- getReferenceLevels(data, formula)
     models <- apply(y, 1,
         function(y, design) {
             ## computatability check
             obs <- is.finite(y)
             type <- "fitError"
 
-            referencePresent <- checkReference(y, data, referenceLevels)
+            referencePresent <- checkReference(y, data, paramNames, formula)
             model <- list(
                 referencePresent = referencePresent, coefficients = NA,
                 vcovUnscaled = NA, sigma = NA,
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index 5016fec..ffcfba9 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -8,8 +8,10 @@
         row.names = 1:15)
     form <- formula(~ 1 + condition + numerical, data = data)
     form_num <- formula(~ 1 + numerical, data = data)
+    form_no_intercept <- formula(~ -1 + condition + numerical, data = data)
     return(list(data = data, form = form,
         form_num = form_num,
+        form_no_intercept = form_no_intercept,
         y_no_ref = y_no_ref,
         y_ref = y_ref))
 }
@@ -30,19 +32,26 @@ test_that("checkReference", {
     data <- .create_minimal_data()$data
     y_no_ref <- .create_minimal_data()$y_no_ref
     y_ref <- .create_minimal_data()$y_ref
+    formula <- .create_minimal_data()$form
+    formula_no_intercept <- .create_minimal_data()$form_no_intercept
+    paramNames <- colnames(model.matrix(formula, data = data))
+    paramNames_no_intercept <- colnames(model.matrix(formula_no_intercept, data = data))
 
-    referenceCond <- "a"
-    names(referenceCond) <- "condition"
+    reference_present_no_ref <- c(NA, FALSE, FALSE, NA)
+    names(reference_present_no_ref) <- c("(Intercept)", "conditionb", "conditionc", "numerical")
+    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, paramNames, formula))
 
-    reference_present_no_ref <- c(FALSE, FALSE)
-    names(reference_present_no_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, referenceCond))
+    reference_present_no_int <- c(TRUE, TRUE, TRUE, NA)
+    names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc", "numerical")
+    expect_identical(reference_present_no_int, msqrob2:::checkReference(y_no_ref, data, paramNames_no_intercept, formula_no_intercept))
 
-    reference_present_ref <- c(TRUE, TRUE)
-    names(reference_present_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, referenceCond))
+    reference_present_ref <- c(NA, TRUE, TRUE, NA)
+    names(reference_present_ref) <- c("(Intercept)", "conditionb", "conditionc", "numerical")
+    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, paramNames, formula))
 
-    expect_identical(NULL, msqrob2:::checkReference(data, data, NULL))
+    reference_no_var <- c(NA)
+    names(reference_no_var) <- c("(Intercept)")
+    expect_identical(reference_no_var, msqrob2:::checkReference(y_ref, data, c("(Intercept)"), as.formula(~1)))
 })
 
 test_that("referenceContrast", {
diff --git a/tests/testthat/test_msqrob.R b/tests/testthat/test_msqrob.R
index 85717fb..441e354 100644
--- a/tests/testthat/test_msqrob.R
+++ b/tests/testthat/test_msqrob.R
@@ -2,27 +2,32 @@
     y_no_ref <- c(rep(NA,5), runif(10))
     y_ref <- c(runif(5), runif(5), runif(5))
     Y <- matrix(c(y_no_ref, y_ref), nrow = 2, ncol = 15, byrow = TRUE)
+    Y_defficient <- matrix(c(y_no_ref, y_no_ref), nrow = 2, ncol = 15, byrow = TRUE)
     rownames(Y) <- c("feat1", "feat2")
     colnames(Y) <- paste0("S", 1:15)
     data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
         numerical = c(1:5, 1:5, 1:5),
         row.names = paste0("S", 1:15))
     form_cond <- formula(~ 1 + condition, data = data)
-    return(list(data = data, form_cond = form_cond, Y = Y))
+    return(list(data = data, form_cond = form_cond, Y = Y, Y_defficient = Y_defficient))
 }
 
 test_that("msqrobLm", {
     set.seed(123)
-    Y <- .create_minimal_data()$Y
+    Y_defficient <- .create_minimal_data()$Y_defficient
     data <- .create_minimal_data()$data
     form_cond <- .create_minimal_data()$form_cond
+    expect_error(msqrobLm(Y_defficient, form_cond, data, robust = FALSE),
+        "The provided model must be full rank")
+
+    Y <- .create_minimal_data()$Y
     msqrobLm_object <- msqrobLm(Y, form_cond, data, robust = FALSE)
 
-    reference_present_no_ref <- c(FALSE, FALSE)
-    names(reference_present_no_ref) <- c("conditionb", "conditionc")
+    reference_present_no_ref <- c(NA, FALSE, FALSE)
+    names(reference_present_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
     expect_identical(reference_present_no_ref, msqrobLm_object$feat1@params$referencePresent)
 
-    reference_present_ref <- c(TRUE, TRUE)
-    names(reference_present_ref) <- c("conditionb", "conditionc")
+    reference_present_ref <- c(NA, TRUE, TRUE)
+    names(reference_present_ref) <- c("(Intercept)", "conditionb", "conditionc")
     expect_identical(reference_present_ref, msqrobLm_object$feat2@params$referencePresent)
     })
diff --git a/tests/testthat/test_topFeatures.R b/tests/testthat/test_topFeatures.R
index a22b57b..79b9827 100644
--- a/tests/testthat/test_topFeatures.R
+++ b/tests/testthat/test_topFeatures.R
@@ -34,7 +34,7 @@
 }
 
 
-testthat("topFeatures", {
+test_that("topFeatures", {
     stat_model_list <- .create_minimal_data()
     L <- matrix(c(1, 0), nrow = 2, byrow = TRUE)
     colnames(L) <- "conditionb=0"

From 6a41212f5e7c1c6ca0846eb05d7bb551cd610b7d Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 29 Aug 2024 12:24:13 +0200
Subject: [PATCH 09/13] [FEAT] - handling of interaction models

---
 R/msqrob-utils.R                   | 51 +++++++++++++++++++++---------
 tests/testthat/test_msqrob-utils.R | 25 ++++++++++-----
 tests/testthat/test_msqrob.R       |  8 ++---
 3 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 2d22614..770f280 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -107,26 +107,19 @@ checkReference <- function(y, data, paramNames, formula) {
     referenceLevels <- getReferenceLevels(data, formula)
     factor <- names(referenceLevels)
     subset <- droplevels(data[!is.na(y), factor, drop = FALSE])
-    referencePresent <- list()
-    for (param in paramNames) {
-      referencePresent[[param]] <- NA
-    }
     paramSplit <- list()
     for (x in factor) {
         paramSplit[[x]] <- paramNames[grep(x, paramNames)]
     }
-    for (x in factor){
+    factor_status <- unlist(lapply(factor, function(x) {
         noIntercept <- all(paste0(x, levels(as.factor(data[, x]))) %in% paramNames)
-        if (noIntercept || (referenceLevels[[x]] %in% levels(subset[, x]))) {
-            for (param in paramSplit[[x]]) {
-                referencePresent[[param]] <- TRUE
-            }
-        } else {
-            for (param in paramSplit[[x]]) {
-                referencePresent[[param]] <- FALSE
-            }
-        }
-    }
+        refPres <- referenceLevels[[x]] %in% levels(subset[, x])
+        return(noIntercept || refPres)
+    }))
+    names(factor_status) <- factor
+
+    referencePresent <- propagateFalseStatus(paramSplit, factor_status)
+
     return(unlist(referencePresent))
 }
 
@@ -182,9 +175,37 @@ checkFullRank <- function(modelMatrix) {
     return(qr(modelMatrix)$rank == ncol(modelMatrix))
 }
 
+
 ##### None exported functions from multcomp package is included here to
 ##### During R and Bioc checks
 
+propagateFalseStatus <- function(vectors, statuses) {
+    repeat {
+        false_elements <- unique(unlist(vectors[statuses == FALSE]))
+
+        new_statuses <- map_lgl(seq_along(vectors), function(i) {
+            if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) {
+                return(FALSE)
+            } else {
+                return(statuses[i])
+            }
+        })
+
+        if (all(new_statuses == statuses)) break
+
+        statuses <- new_statuses
+    }
+
+    all_elements <- unique(unlist(vectors))
+    element_status <- setNames(rep(TRUE, length(all_elements)), all_elements)
+
+    for (i in seq_along(vectors)) {
+        element_status[vectors[[i]]] <- statuses[i]
+    }
+
+    return(element_status)
+}
+
 
 .chrlinfct2matrix <- function(ex, var) {
     if (!is.character(ex)) {
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index ffcfba9..5f218f0 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -4,14 +4,17 @@
     y_ref <- c(rep(1,5), rep(1,5), rep(1,5))
     names(y_ref) <- 1:15
     data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))),
+        condition2 = as.factor(rep(letters[1:5], 3)),
         numerical = c(1:5, 1:5, 1:5),
         row.names = 1:15)
     form <- formula(~ 1 + condition + numerical, data = data)
     form_num <- formula(~ 1 + numerical, data = data)
     form_no_intercept <- formula(~ -1 + condition + numerical, data = data)
+    form_interaction <- formula(~ 1 + condition*condition2, data = data)
     return(list(data = data, form = form,
         form_num = form_num,
         form_no_intercept = form_no_intercept,
+        form_interaction = form_interaction,
         y_no_ref = y_no_ref,
         y_ref = y_ref))
 }
@@ -34,24 +37,30 @@ test_that("checkReference", {
     y_ref <- .create_minimal_data()$y_ref
     formula <- .create_minimal_data()$form
     formula_no_intercept <- .create_minimal_data()$form_no_intercept
+    formula_interaction <- .create_minimal_data()$form_interaction
+
     paramNames <- colnames(model.matrix(formula, data = data))
     paramNames_no_intercept <- colnames(model.matrix(formula_no_intercept, data = data))
+    paramNames_interaction <- colnames(model.matrix(formula_interaction, data = data))
 
-    reference_present_no_ref <- c(NA, FALSE, FALSE, NA)
-    names(reference_present_no_ref) <- c("(Intercept)", "conditionb", "conditionc", "numerical")
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
     expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, paramNames, formula))
 
-    reference_present_no_int <- c(TRUE, TRUE, TRUE, NA)
-    names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc", "numerical")
+    reference_present_no_int <- c(TRUE, TRUE, TRUE)
+    names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc")
     expect_identical(reference_present_no_int, msqrob2:::checkReference(y_no_ref, data, paramNames_no_intercept, formula_no_intercept))
 
-    reference_present_ref <- c(NA, TRUE, TRUE, NA)
-    names(reference_present_ref) <- c("(Intercept)", "conditionb", "conditionc", "numerical")
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
     expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, paramNames, formula))
 
-    reference_no_var <- c(NA)
-    names(reference_no_var) <- c("(Intercept)")
+    reference_no_var <- logical(0)
     expect_identical(reference_no_var, msqrob2:::checkReference(y_ref, data, c("(Intercept)"), as.formula(~1)))
+
+    reference_interaction <- rep(FALSE, length(paramNames_interaction[-1]))
+    names(reference_interaction) <- paramNames_interaction[-1]
+    expect_identical(reference_interaction, msqrob2:::checkReference(y_no_ref, data, paramNames_interaction, formula_interaction))
 })
 
 test_that("referenceContrast", {
diff --git a/tests/testthat/test_msqrob.R b/tests/testthat/test_msqrob.R
index 441e354..b8fc6f6 100644
--- a/tests/testthat/test_msqrob.R
+++ b/tests/testthat/test_msqrob.R
@@ -23,11 +23,11 @@ test_that("msqrobLm", {
     Y <- .create_minimal_data()$Y
     msqrobLm_object <- msqrobLm(Y, form_cond, data, robust = FALSE)
 
-    reference_present_no_ref <- c(NA, FALSE, FALSE)
-    names(reference_present_no_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    reference_present_no_ref <- c(FALSE, FALSE)
+    names(reference_present_no_ref) <- c("conditionb", "conditionc")
     expect_identical(reference_present_no_ref, msqrobLm_object$feat1@params$referencePresent)
 
-    reference_present_ref <- c(NA, TRUE, TRUE)
-    names(reference_present_ref) <- c("(Intercept)", "conditionb", "conditionc")
+    reference_present_ref <- c(TRUE, TRUE)
+    names(reference_present_ref) <- c("conditionb", "conditionc")
     expect_identical(reference_present_ref, msqrobLm_object$feat2@params$referencePresent)
     })

From f3c691fae23535d38e35c656b1f36b3f3ab6ed51 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 29 Aug 2024 14:29:53 +0200
Subject: [PATCH 10/13] [FIX] - fixing some issues revealed by the cptac
 vignette

---
 R/StatModel-methods.R                   |  4 ++--
 R/hypothesisTest-methods.R              | 19 +++++++++++--------
 R/msqrob-utils.R                        |  5 +----
 R/topFeatures.R                         |  5 ++---
 tests/testthat/test_StatModel-methods.R |  2 +-
 tests/testthat/test_msqrob-utils.R      |  8 ++++----
 6 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R
index e9fea29..80c9995 100644
--- a/R/StatModel-methods.R
+++ b/R/StatModel-methods.R
@@ -41,7 +41,7 @@ setMethod(
         if (!is(L, "matrix")) L <- as.matrix(L)
         out <- matrix(rep(NA, ncol(L)))
         rownames(out) <- colnames(L)
-        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
+        if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
             return(out)
         }
         coefs <- getCoef(object)
@@ -60,7 +60,7 @@ setMethod(
         if (!is(L, "matrix")) L <- as.matrix(L)
         out <- matrix(NA, ncol(L), ncol(L))
         rownames(out) <- colnames(out) <- colnames(L)
-        if (referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
+        if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
             return(out)
         }
         vcovTmp <- getVcovUnscaled(object) * object@varPosterior
diff --git a/R/hypothesisTest-methods.R b/R/hypothesisTest-methods.R
index 314756e..bd95f47 100644
--- a/R/hypothesisTest-methods.R
+++ b/R/hypothesisTest-methods.R
@@ -149,7 +149,8 @@ setMethod(
     adjust.method = "BH",
     modelColumn = "msqrobHurdle",
     resultsColumnNamePrefix = "hurdle_",
-    overwrite = FALSE) {
+    overwrite = FALSE,
+    acceptDifferentReference = FALSE) {
         if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of the SummarizedExperiment")
         if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults"
         if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast))
@@ -158,8 +159,8 @@ setMethod(
         {
             contrHlp <- contrast[, j]
             names(contrHlp) <- rownames(contrast)
-            intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
-            countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
+            intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
+            countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
 
             sam <- cbind(
                 intensityComponent[, seq_len(5)],
@@ -205,7 +206,8 @@ setMethod(
     adjust.method = "BH",
     modelColumn = "msqrobModels",
     resultsColumnNamePrefix = "",
-    overwrite = FALSE) {
+    overwrite = FALSE,
+    acceptDifferentReference = FALSE) {
         if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i)
         if (!(modelColumn %in% colnames(rowData(object[[i]])))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of assay ", i, "of the QFeatures object.")
         if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults"
@@ -215,7 +217,7 @@ setMethod(
         {
             contrHlp <- contrast[, j]
             names(contrHlp) <- rownames(contrast)
-            rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
+            rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
         }
         return(object)
     }
@@ -232,7 +234,8 @@ setMethod(
     adjust.method = "BH",
     modelColumn = "msqrobHurdle",
     resultsColumnNamePrefix = "hurdle_",
-    overwrite = FALSE) {
+    overwrite = FALSE,
+    acceptDifferentReference = FALSE) {
         if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i)
         if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object[[i]]))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of assay ", i, "of the QFeatures object.")
         if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults"
@@ -242,8 +245,8 @@ setMethod(
         {
             contrHlp <- contrast[, j]
             names(contrHlp) <- rownames(contrast)
-            intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
-            countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
+            intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
+            countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
 
             sam <- cbind(
                 intensityComponent[, seq_len(5)],
diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 770f280..6247790 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -165,10 +165,7 @@ getReferenceLevels <- function(dataFrame, formula){
 #' @rdname referenceContrast
 #' 
 referenceContrast <- function(referencePresent, L, acceptDifferentReference) {
-  if (!acceptDifferentReference && any(!referencePresent[rownames(L)])) {
-    return(TRUE)
-  }
-  return(FALSE)
+  acceptDifferentReference || is.null(referencePresent) || !any(!referencePresent[rownames(L)])
 }
 
 checkFullRank <- function(modelMatrix) {
diff --git a/R/topFeatures.R b/R/topFeatures.R
index 98ead9b..2bb4131 100644
--- a/R/topFeatures.R
+++ b/R/topFeatures.R
@@ -47,10 +47,9 @@
 #' @export
 
 topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) {
-    if (is(contrast, "matrix")) {
-        if (ncol(contrast) > 1) {
+    contrast  <- as.matrix(contrast)
+    if (ncol(contrast) > 1) {
             stop("Argument contrast is matrix with more than one column, only one contrast is allowed")
-        }
     }
     contrast <- contrast[contrast != 0, , drop = FALSE]
     logFC <- vapply(models,
diff --git a/tests/testthat/test_StatModel-methods.R b/tests/testthat/test_StatModel-methods.R
index c74ab05..cb32cfd 100644
--- a/tests/testthat/test_StatModel-methods.R
+++ b/tests/testthat/test_StatModel-methods.R
@@ -43,7 +43,7 @@ test_that("getContrast", {
     rownames(res_NA) <- "conditionb=0"
     res_NA_dbl <- matrix(as.double(NA))
     rownames(res_NA_dbl) <- "conditionb=0"
-    expect_identical(res_NA, getContrast(stat_model_list$feat1, L))
+    expect_equal(res_NA, getContrast(stat_model_list$feat1, L))
     expect_equal(res_NA_dbl, getContrast(stat_model_list$feat1, L, TRUE))
 
     res_2 <- matrix(2)
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index 5f218f0..5aa6721 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -71,13 +71,13 @@ test_that("referenceContrast", {
     reference_present_no_ref <- c(FALSE, FALSE)
     names(reference_present_no_ref) <- c("conditionb", "conditionc")
 
-    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_no_ref, L, FALSE))
+    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_no_ref, L, FALSE))
 
     reference_present_ref <- c(TRUE, TRUE)
     names(reference_present_ref) <- c("conditionb", "conditionc")
 
-    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_ref, L, FALSE))
+    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_ref, L, FALSE))
 
-    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_no_ref, L, TRUE))
-    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_ref, L, TRUE))
+    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_no_ref, L, TRUE))
+    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_ref, L, TRUE))
 })
\ No newline at end of file

From a97a4e5021bf49807b307cf9134156a32b261c0a Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 29 Aug 2024 14:54:16 +0200
Subject: [PATCH 11/13] [TEST] - test for propagateFalseStatus + refactoring

---
 R/msqrob-utils.R                   | 40 +++++++++++++++++++-----------
 tests/testthat/test_msqrob-utils.R | 23 +++++++++++++++++
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 6247790..71be021 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -177,29 +177,39 @@ checkFullRank <- function(modelMatrix) {
 ##### During R and Bioc checks
 
 propagateFalseStatus <- function(vectors, statuses) {
-    repeat {
+    # Initialize a flag to track if any status has changed
+    has_changed <- TRUE
+    
+    # Use a while loop that runs until no statuses change
+    while (has_changed) {
+        # Collect all elements from vectors marked as FALSE
         false_elements <- unique(unlist(vectors[statuses == FALSE]))
-
-        new_statuses <- map_lgl(seq_along(vectors), function(i) {
-            if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) {
-                return(FALSE)
-            } else {
-                return(statuses[i])
-            }
-        })
-
-        if (all(new_statuses == statuses)) break
-
+        
+        # Initialize new statuses as the current statuses
+        new_statuses <- statuses
+        
+        # Check each vector and update status if it contains any false element
+        for (i in seq_along(vectors)) {
+        if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) {
+            new_statuses[i] <- FALSE  # Change status to FALSE
+        }
+        }
+        
+        # Check if any status has changed
+        has_changed <- !all(new_statuses == statuses)
+        
+        # Update statuses with new values
         statuses <- new_statuses
     }
-
+    # Create a named vector of unique elements with their final statuses
     all_elements <- unique(unlist(vectors))
     element_status <- setNames(rep(TRUE, length(all_elements)), all_elements)
-
+    
+    # Set the final status of each element based on the propagated statuses
     for (i in seq_along(vectors)) {
         element_status[vectors[[i]]] <- statuses[i]
     }
-
+    
     return(element_status)
 }
 
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index 5aa6721..9d34ce5 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -80,4 +80,27 @@ test_that("referenceContrast", {
 
     expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_no_ref, L, TRUE))
     expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_ref, L, TRUE))
+})
+
+test_that("propagateFalseStatus", {
+    vector1 <- c("A", "B", "C")
+    vector2 <- c("B", "D", "E")
+    vector3 <- c("E", "F", "G")
+    vector4 <- c("X", "Y", "Z")
+
+    statuses1 <- c(TRUE, FALSE, TRUE, TRUE)
+    statuses2 <- c(TRUE, TRUE, TRUE, TRUE)
+    statuses3 <- c(FALSE, TRUE, TRUE, FALSE)
+
+    vectors <- list(vector1, vector2, vector3, vector4)
+    nam <- c("A", "B", "C", "D", "E", "F", "G", "X", "Y", "Z")
+    exp_status1 <- c(rep(FALSE, 7), rep(TRUE, 3))
+    names(exp_status1) <- nam
+    exp_status2 <- c(rep(TRUE, 10))
+    names(exp_status2) <- nam
+    exp_status3 <- c(rep(FALSE, 10))
+    names(exp_status3) <- nam
+    expect_identical(exp_status1, msqrob2:::propagateFalseStatus(vectors, statuses1))
+    expect_identical(exp_status2, msqrob2:::propagateFalseStatus(vectors, statuses2))
+    expect_identical(exp_status3, msqrob2:::propagateFalseStatus(vectors, statuses3))
 })
\ No newline at end of file

From bed5c34ba1115bf1f24511977a9d4343da26e58e Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 29 Aug 2024 16:01:19 +0200
Subject: [PATCH 12/13] [OPTI] - optimization of checkReference

---
 R/msqrob-utils.R                   | 35 ++++++++++++++++++++++--------
 R/msqrob.R                         |  4 ++--
 tests/testthat/test_msqrob-utils.R | 12 +++++-----
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index 71be021..dd41ebd 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -103,20 +103,24 @@ makeContrast <- function(contrasts, parameterNames) {
 #' 
 #' @rdname checkReference
 #'
-checkReference <- function(y, data, paramNames, formula) {
-    referenceLevels <- getReferenceLevels(data, formula)
-    factor <- names(referenceLevels)
-    subset <- droplevels(data[!is.na(y), factor, drop = FALSE])
+checkReference <- function(y, data, paramNames, factorReference) {
+    if (length(factorReference) == 0) {
+        return(logical(0))
+    }
+    factors <- factorReference
+    subset <- droplevels(data[!is.na(y), names(factors), drop = FALSE])
     paramSplit <- list()
-    for (x in factor) {
+    for (x in names(factors)) {
         paramSplit[[x]] <- paramNames[grep(x, paramNames)]
     }
-    factor_status <- unlist(lapply(factor, function(x) {
-        noIntercept <- all(paste0(x, levels(as.factor(data[, x]))) %in% paramNames)
-        refPres <- referenceLevels[[x]] %in% levels(subset[, x])
+    factor_status <- unlist(lapply(names(factors), function(x) {
+        factorLevels <- factors[[x]]
+        referenceRef <- factorLevels[1]
+        noIntercept <- all(paste0(x, factorLevels) %in% paramNames)
+        refPres <- referenceRef %in% levels(subset[, x])
         return(noIntercept || refPres)
     }))
-    names(factor_status) <- factor
+    names(factor_status) <- names(factors)
 
     referencePresent <- propagateFalseStatus(paramSplit, factor_status)
 
@@ -148,6 +152,19 @@ getReferenceLevels <- function(dataFrame, formula){
     return(unlist(referenceLevels))
 }
 
+getFormulaFactors <- function(formula, dataFrame) {
+    vars <- all.vars(formula)
+    if(length(vars) == 0) {
+        return(character(0))
+    }
+    nam <- vars[sapply(vars, function(x) is.factor(dataFrame[, x]) || is.character(dataFrame[, x]) )]
+    res <- lapply(nam, function(x) {
+        levels(as.factor(dataFrame[, x]))
+    })
+    names(res) <- nam
+    return(res)
+}
+
 #' Check if the reference levels associated with the contrast are present
 #' @description Check if the reference levels associated with the contrast are present
 #'
diff --git a/R/msqrob.R b/R/msqrob.R
index a7e17b8..6a844bb 100644
--- a/R/msqrob.R
+++ b/R/msqrob.R
@@ -61,13 +61,13 @@ msqrobLm <- function(y,
     subsettedDesign <- myDesign[!missingSamples, , drop = FALSE]
     stopifnot("The provided model must be full rank" = checkFullRank(subsettedDesign))
     paramNames <- colnames(myDesign)
+    factorLevels <- getFormulaFactors(formula, data)
     models <- apply(y, 1,
         function(y, design) {
             ## computatability check
             obs <- is.finite(y)
             type <- "fitError"
-
-            referencePresent <- checkReference(y, data, paramNames, formula)
+            referencePresent <- checkReference(y, data, paramNames, factorLevels)
             model <- list(
                 referencePresent = referencePresent, coefficients = NA,
                 vcovUnscaled = NA, sigma = NA,
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index 9d34ce5..76b36b9 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -43,24 +43,26 @@ test_that("checkReference", {
     paramNames_no_intercept <- colnames(model.matrix(formula_no_intercept, data = data))
     paramNames_interaction <- colnames(model.matrix(formula_interaction, data = data))
 
+    factorVars <- list(condition = c("a", "b", "c"))
+
     reference_present_no_ref <- c(FALSE, FALSE)
     names(reference_present_no_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, paramNames, formula))
+    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, paramNames, factorVars))
 
     reference_present_no_int <- c(TRUE, TRUE, TRUE)
     names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc")
-    expect_identical(reference_present_no_int, msqrob2:::checkReference(y_no_ref, data, paramNames_no_intercept, formula_no_intercept))
+    expect_identical(reference_present_no_int, msqrob2:::checkReference(y_no_ref, data, paramNames_no_intercept, factorVars))
 
     reference_present_ref <- c(TRUE, TRUE)
     names(reference_present_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, paramNames, formula))
+    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, paramNames, factorVars))
 
     reference_no_var <- logical(0)
-    expect_identical(reference_no_var, msqrob2:::checkReference(y_ref, data, c("(Intercept)"), as.formula(~1)))
+    expect_identical(reference_no_var, msqrob2:::checkReference(y_ref, data, c("(Intercept)"), character(0)))
 
     reference_interaction <- rep(FALSE, length(paramNames_interaction[-1]))
     names(reference_interaction) <- paramNames_interaction[-1]
-    expect_identical(reference_interaction, msqrob2:::checkReference(y_no_ref, data, paramNames_interaction, formula_interaction))
+    expect_identical(reference_interaction, msqrob2:::checkReference(y_no_ref, data, paramNames_interaction, factorVars))
 })
 
 test_that("referenceContrast", {

From bddff7931036eacec1b9b320b46274d14bc97349 Mon Sep 17 00:00:00 2001
From: leopoldguyot <leopold.guyot13@gmail.com>
Date: Thu, 29 Aug 2024 16:36:13 +0200
Subject: [PATCH 13/13] [TEST + DOC] - tests and doc for getFormulaFactors

---
 R/msqrob-utils.R                   | 44 ++++++++++++------------------
 man/checkReference.Rd              | 11 ++++----
 man/getFormulaFactors.Rd           | 21 ++++++++++++++
 man/getReferenceLevels.Rd          | 19 -------------
 man/hypothesisTest.Rd              |  9 ++++--
 tests/testthat/test_msqrob-utils.R | 42 +++++++++++++++-------------
 6 files changed, 74 insertions(+), 72 deletions(-)
 create mode 100644 man/getFormulaFactors.Rd
 delete mode 100644 man/getReferenceLevels.Rd

diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R
index dd41ebd..d29b751 100644
--- a/R/msqrob-utils.R
+++ b/R/msqrob-utils.R
@@ -96,18 +96,21 @@ makeContrast <- function(contrasts, parameterNames) {
 #'        the same number of rows as the number of columns (samples) of
 #'        `y`.
 #' 
-#' @param referenceLevels A named list with the reference levels of the factors (named by the factor name)
+#' @param paramNames A `character` vector with the names of the parameters in the model.
+#' 
+#' @param factorLevels A list with the levels of the factors in the model. The names of the list
+#'       should correspond to the factor variables present in the model.
 #' 
 #' @return A logical vector indicating for each parameter (that comes from a factor variable) in the model 
 #' if his reference level has values for the current feature.
 #' 
 #' @rdname checkReference
 #'
-checkReference <- function(y, data, paramNames, factorReference) {
-    if (length(factorReference) == 0) {
+checkReference <- function(y, data, paramNames, factorLevels) {
+    if (length(factorLevels) == 0) {
         return(logical(0))
     }
-    factors <- factorReference
+    factors <- factorLevels
     subset <- droplevels(data[!is.na(y), names(factors), drop = FALSE])
     paramSplit <- list()
     for (x in names(factors)) {
@@ -127,31 +130,20 @@ checkReference <- function(y, data, paramNames, factorReference) {
     return(unlist(referencePresent))
 }
 
-#' Get reference levels factors
-#' @description Get the reference levels of the factors of a data frame that are present in a formula
-#'
-#' @param dataFrame A `data.frame` that contains factors
+#' Get the levels of the factors in the model
 #' 
-#' @param formula A formula object associated with the data frame (the variables in the formula should be present in the data frame)
-#'
-#' @return A named list with the reference levels of the factors (named by the factor name)
+#' @description Get the levels of the factors in the model associated with the factors
+#'            names.
 #' 
-#' @rdname getReferenceLevels
+#' @param data A `DataFrame` with information on the design. Contains the variables that are used in the model.
+#' 
+#' @param formula A `formula` object that specifies the model.
+#' 
+#' @return A list with the levels of the factors in the model. The names of the list
+#'        correspond to the factor variables present in the model.
+#' 
+#' @rdname getFormulaFactors
 #' 
-getReferenceLevels <- function(dataFrame, formula){
-    referenceLevels <- list()
-    vars <- all.vars(formula)
-    for (x in vars){
-        if (is.factor(dataFrame[, x]) || is.character(dataFrame[, x])) {
-            if (is.character(dataFrame[, x])) {
-                dataFrame[, x] <- as.factor(dataFrame[, x])
-            }
-            referenceLevels[[x]] <- levels(dataFrame[, x])[1]
-        }
-    }
-    return(unlist(referenceLevels))
-}
-
 getFormulaFactors <- function(formula, dataFrame) {
     vars <- all.vars(formula)
     if(length(vars) == 0) {
diff --git a/man/checkReference.Rd b/man/checkReference.Rd
index bdc0b13..7317e68 100644
--- a/man/checkReference.Rd
+++ b/man/checkReference.Rd
@@ -4,7 +4,7 @@
 \alias{checkReference}
 \title{Check for presence of reference levels}
 \usage{
-checkReference(y, data, referenceLevels)
+checkReference(y, data, paramNames, factorLevels)
 }
 \arguments{
 \item{y}{A numeric vector with the feature values for each sample.}
@@ -13,13 +13,14 @@ checkReference(y, data, referenceLevels)
 the same number of rows as the number of columns (samples) of
 \code{y}.}
 
-\item{formula}{Model formula. The model is built based on the
-covariates in the data object.}
+\item{paramNames}{A \code{character} vector with the names of the parameters in the model.}
 
-\item{paramNames}{character vector specifying the model parameters that are present in the global model matrix}
+\item{factorLevels}{A list with the levels of the factors in the model. The names of the list
+should correspond to the factor variables present in the model.}
 }
 \value{
-A logical vector indicating for each parameter in the model if his reference level has values for the current feature.
+A logical vector indicating for each parameter (that comes from a factor variable) in the model
+if his reference level has values for the current feature.
 }
 \description{
 Check if the reference levels of the factors in the model are present for a specific feature.
diff --git a/man/getFormulaFactors.Rd b/man/getFormulaFactors.Rd
new file mode 100644
index 0000000..5ba30e3
--- /dev/null
+++ b/man/getFormulaFactors.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msqrob-utils.R
+\name{getFormulaFactors}
+\alias{getFormulaFactors}
+\title{Get the levels of the factors in the model}
+\usage{
+getFormulaFactors(formula, dataFrame)
+}
+\arguments{
+\item{formula}{A \code{formula} object that specifies the model.}
+
+\item{data}{A \code{DataFrame} with information on the design. Contains the variables that are used in the model.}
+}
+\value{
+A list with the levels of the factors in the model. The names of the list
+correspond to the factor variables present in the model.
+}
+\description{
+Get the levels of the factors in the model associated with the factors
+names.
+}
diff --git a/man/getReferenceLevels.Rd b/man/getReferenceLevels.Rd
deleted file mode 100644
index ac0a9e7..0000000
--- a/man/getReferenceLevels.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/msqrob-utils.R
-\name{getReferenceLevels}
-\alias{getReferenceLevels}
-\title{Get reference levels factors}
-\usage{
-getReferenceLevels(dataFrame, formula)
-}
-\arguments{
-\item{dataFrame}{A \code{data.frame} that contains factors}
-
-\item{formula}{A formula object associated with the data frame (the variables in the formula should be present in the data frame)}
-}
-\value{
-A named list with the reference levels of the factors (named by the factor name)
-}
-\description{
-Get the reference levels of the factors of a data frame that are present in a formula
-}
diff --git a/man/hypothesisTest.Rd b/man/hypothesisTest.Rd
index 12ba79e..6422bba 100644
--- a/man/hypothesisTest.Rd
+++ b/man/hypothesisTest.Rd
@@ -26,7 +26,8 @@ expression analysis}
   adjust.method = "BH",
   modelColumn = "msqrobHurdle",
   resultsColumnNamePrefix = "hurdle_",
-  overwrite = FALSE
+  overwrite = FALSE,
+  acceptDifferentReference = FALSE
 )
 
 \S4method{hypothesisTest}{QFeatures}(
@@ -36,7 +37,8 @@ expression analysis}
   adjust.method = "BH",
   modelColumn = "msqrobModels",
   resultsColumnNamePrefix = "",
-  overwrite = FALSE
+  overwrite = FALSE,
+  acceptDifferentReference = FALSE
 )
 
 \S4method{hypothesisTestHurdle}{QFeatures}(
@@ -46,7 +48,8 @@ expression analysis}
   adjust.method = "BH",
   modelColumn = "msqrobHurdle",
   resultsColumnNamePrefix = "hurdle_",
-  overwrite = FALSE
+  overwrite = FALSE,
+  acceptDifferentReference = FALSE
 )
 }
 \arguments{
diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R
index 76b36b9..608cd7e 100644
--- a/tests/testthat/test_msqrob-utils.R
+++ b/tests/testthat/test_msqrob-utils.R
@@ -19,16 +19,20 @@
         y_ref = y_ref))
 }
 
-test_that("getReferenceLevels", {
+test_that("getFormulaFactors", {
     data <- .create_minimal_data()$data
     form <- .create_minimal_data()$form
     form_num <- .create_minimal_data()$form_num
-
-    expect_identical(NULL, msqrob2:::getReferenceLevels(data, form_num))
-
-    referenceCond <- "a"
-    names(referenceCond) <- "condition"
-    expect_identical(referenceCond, msqrob2:::getReferenceLevels(data, form))
+    form_no_intercept <- .create_minimal_data()$form_no_intercept
+    form_interaction <- .create_minimal_data()$form_interaction
+
+    res <- list(condition = c("a", "b", "c"))
+    res_num <- setNames(list(), character(0))
+    res_interaction <- list(condition = c("a", "b", "c"), condition2 = c("a", "b", "c", "d", "e"))
+    expect_identical(res, getFormulaFactors(form, data))
+    expect_identical(res_num, getFormulaFactors(form_num, data))
+    expect_identical(res, getFormulaFactors(form_no_intercept, data))
+    expect_identical(res_interaction, getFormulaFactors(form_interaction, data))
 })
 
 test_that("checkReference", {
@@ -47,22 +51,22 @@ test_that("checkReference", {
 
     reference_present_no_ref <- c(FALSE, FALSE)
     names(reference_present_no_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_no_ref, msqrob2:::checkReference(y_no_ref, data, paramNames, factorVars))
+    expect_identical(reference_present_no_ref, checkReference(y_no_ref, data, paramNames, factorVars))
 
     reference_present_no_int <- c(TRUE, TRUE, TRUE)
     names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc")
-    expect_identical(reference_present_no_int, msqrob2:::checkReference(y_no_ref, data, paramNames_no_intercept, factorVars))
+    expect_identical(reference_present_no_int, checkReference(y_no_ref, data, paramNames_no_intercept, factorVars))
 
     reference_present_ref <- c(TRUE, TRUE)
     names(reference_present_ref) <- c("conditionb", "conditionc")
-    expect_identical(reference_present_ref, msqrob2:::checkReference(y_ref, data, paramNames, factorVars))
+    expect_identical(reference_present_ref, checkReference(y_ref, data, paramNames, factorVars))
 
     reference_no_var <- logical(0)
-    expect_identical(reference_no_var, msqrob2:::checkReference(y_ref, data, c("(Intercept)"), character(0)))
+    expect_identical(reference_no_var, checkReference(y_ref, data, c("(Intercept)"), character(0)))
 
     reference_interaction <- rep(FALSE, length(paramNames_interaction[-1]))
     names(reference_interaction) <- paramNames_interaction[-1]
-    expect_identical(reference_interaction, msqrob2:::checkReference(y_no_ref, data, paramNames_interaction, factorVars))
+    expect_identical(reference_interaction, checkReference(y_no_ref, data, paramNames_interaction, factorVars))
 })
 
 test_that("referenceContrast", {
@@ -73,15 +77,15 @@ test_that("referenceContrast", {
     reference_present_no_ref <- c(FALSE, FALSE)
     names(reference_present_no_ref) <- c("conditionb", "conditionc")
 
-    expect_identical(FALSE, msqrob2:::referenceContrast(reference_present_no_ref, L, FALSE))
+    expect_identical(FALSE, referenceContrast(reference_present_no_ref, L, FALSE))
 
     reference_present_ref <- c(TRUE, TRUE)
     names(reference_present_ref) <- c("conditionb", "conditionc")
 
-    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_ref, L, FALSE))
+    expect_identical(TRUE, referenceContrast(reference_present_ref, L, FALSE))
 
-    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_no_ref, L, TRUE))
-    expect_identical(TRUE, msqrob2:::referenceContrast(reference_present_ref, L, TRUE))
+    expect_identical(TRUE, referenceContrast(reference_present_no_ref, L, TRUE))
+    expect_identical(TRUE, referenceContrast(reference_present_ref, L, TRUE))
 })
 
 test_that("propagateFalseStatus", {
@@ -102,7 +106,7 @@ test_that("propagateFalseStatus", {
     names(exp_status2) <- nam
     exp_status3 <- c(rep(FALSE, 10))
     names(exp_status3) <- nam
-    expect_identical(exp_status1, msqrob2:::propagateFalseStatus(vectors, statuses1))
-    expect_identical(exp_status2, msqrob2:::propagateFalseStatus(vectors, statuses2))
-    expect_identical(exp_status3, msqrob2:::propagateFalseStatus(vectors, statuses3))
+    expect_identical(exp_status1, propagateFalseStatus(vectors, statuses1))
+    expect_identical(exp_status2, propagateFalseStatus(vectors, statuses2))
+    expect_identical(exp_status3, propagateFalseStatus(vectors, statuses3))
 })
\ No newline at end of file