test pass for SE

BiocPy · Dec 21, 2023 · 0dbbbe9 · 0dbbbe9
1 parent b3eb0c1
commit 0dbbbe9
Show file tree

Hide file tree

Showing 8 changed files with 181 additions and 150 deletions.
diff --git a/src/summarizedexperiment/BaseSE.py b/src/summarizedexperiment/BaseSE.py
@@ -99,8 +99,8 @@ class BaseSE:
     def __init__(
         self,
         assays: Dict[str, Any],
-        rows: Optional[BiocFrame] = None,
-        cols: Optional[BiocFrame] = None,
+        row_data: Optional[BiocFrame] = None,
+        col_data: Optional[BiocFrame] = None,
         metadata: Optional[Dict] = None,
         validate: bool = True,
     ) -> None:
@@ -142,9 +142,9 @@ def __init__(
         """
         self._assays = assays
 
-        self._shape = _guess_assay_shape(assays, rows, cols)
-        self._rows = _sanitize_frame(rows, self._shape[0])
-        self._cols = _sanitize_frame(cols, self._shape[1])
+        self._shape = _guess_assay_shape(assays, row_data, col_data)
+        self._rows = _sanitize_frame(row_data, self._shape[0])
+        self._cols = _sanitize_frame(col_data, self._shape[1])
         self._metadata = metadata if metadata is not None else {}
 
         if validate:
@@ -182,8 +182,8 @@ def __deepcopy__(self, memo=None, _nil=[]):
         current_class_const = type(self)
         return current_class_const(
             assays=_assays_copy,
-            rows=_rows_copy,
-            cols=_cols_copy,
+            row_data=_rows_copy,
+            col_data=_cols_copy,
             metadata=_metadata_copy,
         )
 
@@ -195,8 +195,8 @@ def __copy__(self):
         current_class_const = type(self)
         return current_class_const(
             assays=self._assays,
-            rows=self._rows,
-            cols=self._cols,
+            row_data=self._rows,
+            col_data=self._cols,
             metadata=self._metadata,
         )
 
@@ -245,8 +245,8 @@ def __repr__(self) -> str:
         pattern = (
             f"Class BaseSE with {self.shape[0]} features and {self.shape[1]} samples \n"
             f"  assays: {', '.join(list(self.assays.keys()))} \n"
-            f"  features: {self.rowdata.columns if self.rowdata is not None else None} \n"
-            f"  sample data: {self.coldata.columns if self.coldata is not None else None}"
+            f"  row_data: {self._rows.names if self._rows is not None else None} \n"
+            f"  col_data: {self._cols.names if self._cols is not None else None}"
         )
         return pattern
 
@@ -716,8 +716,8 @@ def get_slice(
 
         return current_class_const(
             assays=slicer.assays,
-            rows=slicer.rows,
-            columns=slicer.columns,
+            row_data=slicer.rows,
+            col_data=slicer.columns,
             metadata=self._metadata,
         )
 
@@ -801,20 +801,18 @@ def to_anndata(self):
         """Transform :py:class:`summarizedexperiment.BaseSE`-like into a :py:class:`~anndata.AnnData` representation.
 
         Returns:
-            AnnData: An `AnnData` representation of the experiment.
+            An `AnnData` representation of the experiment.
         """
         from anndata import AnnData
 
         layers = OrderedDict()
         for asy, mat in self.assays.items():
             layers[asy] = mat.transpose()
 
-        trows = self.row_data
-        if isinstance(self.row_data, GenomicRanges):
-            trows = self.row_data.to_pandas()
+        trows = self._rows.to_pandas()
 
         obj = AnnData(
-            obs=self.col_data,
+            obs=self._cols.to_pandas(),
             var=trows,
             uns=self.metadata,
             layers=layers,

diff --git a/src/summarizedexperiment/RangedSummarizedExperiment.py b/src/summarizedexperiment/RangedSummarizedExperiment.py
@@ -4,7 +4,7 @@
 from genomicranges import GenomicRanges, GenomicRangesList, SeqInfo
 
 from .SummarizedExperiment import SummarizedExperiment
-from .types import BiocOrPandasFrame, MatrixTypes, SlicerArgTypes
+from .types import MatrixTypes, SlicerArgTypes
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
@@ -129,7 +129,7 @@ def __init__(
             metadata (Dict, optional): Additional experimental metadata describing the
                 methods. Defaults to None.
         """
-        super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata)
+        super().__init__(assays, row_data=row_data, col_data=col_data, metadata=metadata)
 
         if row_ranges is None:
             row_ranges = GenomicRangesList.empty(n=self._shape[0])

diff --git a/src/summarizedexperiment/SummarizedExperiment.py b/src/summarizedexperiment/SummarizedExperiment.py
@@ -1,44 +1,23 @@
-from typing import Dict, Optional
+from typing import Dict, Optional, Union, Sequence
 from warnings import warn
 
 from genomicranges import GenomicRanges
 from biocframe import BiocFrame
 
 from .BaseSE import BaseSE
-from .types import MatrixTypes, SlicerArgTypes
+from .types import MatrixTypes
 
 __author__ = "jkanche"
 __copyright__ = "jkanche"
 __license__ = "MIT"
 
 
 class SummarizedExperiment(BaseSE):
-    """Container to represents genomic experiment data (`assays`), features (`row_data`), sample data (`col_data`) and
-    any other `metadata`.
+    """Container to represents genomic experiment data (`assays`), 
+    features (`row_data`), sample data (`col_data`) and any other `metadata`.
 
-    SummarizedExperiment follows the R/Bioconductor specification; rows are features, columns
-    are samples.
-
-    Attributes:
-        assays (Dict[str, MatrixTypes]): A dictionary containing matrices, with assay names as keys
-            and 2-dimensional matrices represented as either
-            :py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`.
-
-            Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and
-            implements the slice operation using the ``__getitem__`` dunder method.
-
-            All matrices in assays must be 2-dimensional and have the same shape
-            (number of rows, number of columns).
-
-        row_data (BiocFrame, optional): Features, which must be of the same length as the rows of
-            the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or
-            :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
-
-        col_data (BiocFrame, optional): Sample data, which must be of the same length as the
-            columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame`
-            or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
-
-        metadata (Dict, optional): Additional experimental metadata describing the methods. Defaults to None.
+    SummarizedExperiment follows the R/Bioconductor specification; 
+    rows are features, columns are samples.
     """
 
     def __init__(
@@ -47,65 +26,83 @@ def __init__(
         row_data: Optional[BiocFrame] = None,
         col_data: Optional[BiocFrame] = None,
         metadata: Optional[Dict] = None,
+        validate: bool = True,
     ) -> None:
         """Initialize a Summarized Experiment (SE).
 
         Args:
-            assays (Dict[str, MatrixTypes]): A dictionary containing matrices, with assay names as keys
+            assays:
+                A dictionary containing matrices, with assay names as keys
                 and 2-dimensional matrices represented as either
                 :py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`.
 
-                Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and
-                implements the slice operation using the ``__getitem__`` dunder method.
+                Alternatively, you may use any 2-dimensional matrix that has
+                the ``shape`` property and implements the slice operation
+                using the ``__getitem__`` dunder method.
+
+                All matrices in assays must be 2-dimensional and have the
+                same shape (number of rows, number of columns).
+
+            row_data:
+                Features, must be the same length as the number of rows of
+                the matrices in assays.
+
+                Feature information is coerced to a
+                :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
 
-                All matrices in assays must be 2-dimensional and have the same shape
-                (number of rows, number of columns).
+            col_data:
+                Sample data, must be the same length as the number of
+                columns of the matrices in assays.
 
-            row_data (BiocFrame, optional): Features, which must be of the same length as the rows of
-                the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or
+                Sample information is coerced to a
                 :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
 
-            col_data (BiocFrame, optional): Sample data, which must be of the same length as the
-                columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame`
-                or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
+            metadata:
+                Additional experimental metadata describing the methods.
+                Defaults to None.
 
-            metadata (Dict, optional): Additional experimental metadata describing the methods. Defaults to None.
+            validate:
+                Internal use only.
         """
 
         if isinstance(row_data, GenomicRanges):
             warn(
                 "`row_data` is `GenomicRanges`, consider using `RangeSummarizedExperiment`."
             )
 
-        super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata)
-
-    def __getitem__(
-        self,
-        args: SlicerArgTypes,
-    ) -> "SummarizedExperiment":
-        """Subset a `SummarizedExperiment`.
-
-        Args:
-            args (SlicerArgTypes): Indices or names to slice. The tuple contains
-                slices along dimensions (rows, cols).
-
-                Each element in the tuple, might be either a integer vector (integer positions),
-                boolean vector or :py:class:`~slice` object. Defaults to None.
-
-        Raises:
-            ValueError: If too many or too few slices provided.
-
-        Returns:
-            SummarizedExperiment: Sliced `SummarizedExperiment` object.
-        """
-        sliced_objs = self._slice(args)
-        return SummarizedExperiment(
-            assays=sliced_objs.assays,
-            row_data=sliced_objs.row_data,
-            col_data=sliced_objs.col_data,
-            metadata=self.metadata,
+        super().__init__(
+            assays, row_data=row_data, col_data=col_data, metadata=metadata, validate=validate
         )
 
+    # def __getitem__(
+    #     self,
+    #     args: Union[int, str, Sequence, tuple],
+    # ) -> "SummarizedExperiment":
+    #     """Subset a `SummarizedExperiment`.
+
+    #     Args:
+    #         args: 
+    #             Indices or names to slice. The tuple contains
+    #             slices along dimensions (rows, cols).
+
+    #             Each element in the tuple, might be either a integer vector (integer positions),
+    #             boolean vector or :py:class:`~slice` object. Defaults to None.
+
+    #     Raises:
+    #         ValueError: 
+    #             If too many or too few slices provided.
+
+    #     Returns:
+    #         Sliced `SummarizedExperiment` object.
+    #     """
+    #     sliced_objs = self._generic_slice(args)
+    #     return SummarizedExperiment(
+    #         assays=sliced_objs.assays,
+    #         row_data=sliced_objs.rows,
+    #         col_data=sliced_objs.cols,
+    #         metadata=self.metadata,
+    #     )
+
     def __repr__(self) -> str:
         pattern = (
             f"Class SummarizedExperiment with {self.shape[0]} features and {self.shape[1]} "

diff --git a/tests/test_RSE.py b/tests/test_RSE.py
@@ -37,32 +37,32 @@
     }
 )
 
-gr = genomicranges.from_pandas(df_gr)
+gr = genomicranges.GenomicRanges.from_pandas(df_gr)
 
 col_data = pd.DataFrame(
     {
         "treatment": ["ChIP", "Input"] * 3,
     }
 )
 
-a = genomicranges.GenomicRanges(
-    {
+a = genomicranges.GenomicRanges.from_pandas(
+    pd.DataFrame({
         "seqnames": ["chr1", "chr2", "chr1", "chr3"],
         "starts": [1, 3, 2, 4],
         "ends": [10, 30, 50, 60],
         "strand": ["-", "+", "*", "+"],
         "score": [1, 2, 3, 4],
-    }
+    })
 )
 
-b = genomicranges.GenomicRanges(
-    {
+b = genomicranges.GenomicRanges.from_pandas(
+    pd.DataFrame({
         "seqnames": ["chr2", "chr4", "chr5"],
         "starts": [3, 6, 4],
         "ends": [30, 50, 60],
         "strand": ["-", "+", "*"],
         "score": [2, 3, 4],
-    }
+    })
 )
 
 grl = genomicranges.GenomicRangesList(ranges=[a, b], names=["a", "b"])

diff --git a/tests/test_RSE_methods.py b/tests/test_RSE_methods.py
@@ -37,7 +37,7 @@
     }
 )
 
-gr = genomicranges.from_pandas(df_gr)
+gr = genomicranges.GenomicRanges.from_pandas(df_gr)
 
 col_data = pd.DataFrame(
     {
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,7 +37,7 @@ @@
         }
     )
-    gr = genomicranges.from_pandas(df_gr)
+    gr = genomicranges.GenomicRanges.from_pandas(df_gr)
     col_data = pd.DataFrame(
         {
@@ Expand Down @@