update version and docs

uio-bmi · Mar 14, 2022 · 5df87c0 · 5df87c0
1 parent 9c5e5ba
commit 5df87c0
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 6 deletions.
diff --git a/docs/source/usecases/emerson_reproduction.rst b/docs/source/usecases/emerson_reproduction.rst
@@ -37,6 +37,12 @@ The statistical model `ProbabilisticBinaryClassifier` relies on `SequenceAbundan
 and {:math:`\alpha_1`, :math:`\beta_1`}) to describe beta-distributed prior for CMV-negative and CMV-positive subjects. These parameters are then used
 to create log-posterior odds ratio for class assignment for new subjects.
 
+.. note::
+
+  When used on large datasets, 'SequenceAbundance' encoder might be slow. If you want to reproduce the analysis faster and you are using
+  immuneML version 2.2.0 or later, use :ref:`CompAIRRSequenceAbundance` encoder instead. `p_value_threshold` parameter is the same, and by default
+  the analysis is performed using the amino acid sequence, and V and J genes. This can be turned of by setting `ignore_genes` to True (it is False by default).
+
 To find the optimal p-value threshold we used 10-fold cross-validation on the cohort 1 and chose the one minimizing the cross-entropy loss (also
 called logarithmic loss). We then tested the performance of the optimal model (optimal p-value and the classifier fitted on resulting data representation)
 on the cohort 2 (as it was done in the original study).

diff --git a/immuneML/encodings/abundance_encoding/CompAIRRSequenceAbundanceEncoder.py b/immuneML/encodings/abundance_encoding/CompAIRRSequenceAbundanceEncoder.py
@@ -34,8 +34,8 @@ class CompAIRRSequenceAbundanceEncoder(DatasetEncoder):
     - the first element corresponds to the number of label-associated clonotypes
     - the second element is the total number of unique clonotypes
 
-    To determine what clonotypes (with or without matching V/J genes) are label-associated
-    based on a statistical test. The statistical test used is Fisher's exact test (one-sided).
+    To determine what clonotypes (amino acid sequences with or without matching V/J genes) are label-associated, Fisher's exact test (one-sided)
+    is used.
 
     The encoder also writes out files containing the contingency table used for fisher's exact test,
     the resulting p-values, and the significantly abundant sequences

diff --git a/immuneML/encodings/abundance_encoding/SequenceAbundanceEncoder.py b/immuneML/encodings/abundance_encoding/SequenceAbundanceEncoder.py
@@ -24,10 +24,9 @@ class SequenceAbundanceEncoder(DatasetEncoder):
     - the first element corresponds to the number of label-associated clonotypes
     - the second element is the total number of unique clonotypes
 
-    To determine what clonotypes (with features defined by comparison_attributes) are label-associated
-    based on a statistical test. The statistical test used is Fisher's exact test (one-sided).
+    To determine what clonotypes (with features defined by comparison_attributes) are label-associated, one-sided Fisher's exact test is used.
 
-    The encoder also writes out files containing the contingency table used for fisher's exact test,
+    The encoder also writes out files containing the contingency table used for Fisher's exact test,
     the resulting p-values, and the significantly abundant sequences
     (use :py:obj:`~immuneML.reports.encoding_reports.RelevantSequenceExporter.RelevantSequenceExporter` to export these sequences in AIRR format).
 

diff --git a/immuneML/environment/Constants.py b/immuneML/environment/Constants.py
@@ -1,6 +1,6 @@
 class Constants:
 
-    VERSION = "2.1.2"
+    VERSION = "2.2.0"
 
     # encoding constants
     FEATURE_DELIMITER = "-"