Skip to content

Commit

Permalink
test pass for SE
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Dec 21, 2023
1 parent b3eb0c1 commit 0dbbbe9
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 150 deletions.
34 changes: 16 additions & 18 deletions src/summarizedexperiment/BaseSE.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ class BaseSE:
def __init__(
self,
assays: Dict[str, Any],
rows: Optional[BiocFrame] = None,
cols: Optional[BiocFrame] = None,
row_data: Optional[BiocFrame] = None,
col_data: Optional[BiocFrame] = None,
metadata: Optional[Dict] = None,
validate: bool = True,
) -> None:
Expand Down Expand Up @@ -142,9 +142,9 @@ def __init__(
"""
self._assays = assays

self._shape = _guess_assay_shape(assays, rows, cols)
self._rows = _sanitize_frame(rows, self._shape[0])
self._cols = _sanitize_frame(cols, self._shape[1])
self._shape = _guess_assay_shape(assays, row_data, col_data)
self._rows = _sanitize_frame(row_data, self._shape[0])
self._cols = _sanitize_frame(col_data, self._shape[1])
self._metadata = metadata if metadata is not None else {}

if validate:
Expand Down Expand Up @@ -182,8 +182,8 @@ def __deepcopy__(self, memo=None, _nil=[]):
current_class_const = type(self)
return current_class_const(
assays=_assays_copy,
rows=_rows_copy,
cols=_cols_copy,
row_data=_rows_copy,
col_data=_cols_copy,
metadata=_metadata_copy,
)

Expand All @@ -195,8 +195,8 @@ def __copy__(self):
current_class_const = type(self)
return current_class_const(
assays=self._assays,
rows=self._rows,
cols=self._cols,
row_data=self._rows,
col_data=self._cols,
metadata=self._metadata,
)

Expand Down Expand Up @@ -245,8 +245,8 @@ def __repr__(self) -> str:
pattern = (
f"Class BaseSE with {self.shape[0]} features and {self.shape[1]} samples \n"
f" assays: {', '.join(list(self.assays.keys()))} \n"
f" features: {self.rowdata.columns if self.rowdata is not None else None} \n"
f" sample data: {self.coldata.columns if self.coldata is not None else None}"
f" row_data: {self._rows.names if self._rows is not None else None} \n"
f" col_data: {self._cols.names if self._cols is not None else None}"
)
return pattern

Expand Down Expand Up @@ -716,8 +716,8 @@ def get_slice(

return current_class_const(
assays=slicer.assays,
rows=slicer.rows,
columns=slicer.columns,
row_data=slicer.rows,
col_data=slicer.columns,
metadata=self._metadata,
)

Expand Down Expand Up @@ -801,20 +801,18 @@ def to_anndata(self):
"""Transform :py:class:`summarizedexperiment.BaseSE`-like into a :py:class:`~anndata.AnnData` representation.
Returns:
AnnData: An `AnnData` representation of the experiment.
An `AnnData` representation of the experiment.
"""
from anndata import AnnData

layers = OrderedDict()
for asy, mat in self.assays.items():
layers[asy] = mat.transpose()

trows = self.row_data
if isinstance(self.row_data, GenomicRanges):
trows = self.row_data.to_pandas()
trows = self._rows.to_pandas()

obj = AnnData(
obs=self.col_data,
obs=self._cols.to_pandas(),
var=trows,
uns=self.metadata,
layers=layers,
Expand Down
4 changes: 2 additions & 2 deletions src/summarizedexperiment/RangedSummarizedExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from genomicranges import GenomicRanges, GenomicRangesList, SeqInfo

from .SummarizedExperiment import SummarizedExperiment
from .types import BiocOrPandasFrame, MatrixTypes, SlicerArgTypes
from .types import MatrixTypes, SlicerArgTypes

__author__ = "jkanche"
__copyright__ = "jkanche"
Expand Down Expand Up @@ -129,7 +129,7 @@ def __init__(
metadata (Dict, optional): Additional experimental metadata describing the
methods. Defaults to None.
"""
super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata)
super().__init__(assays, row_data=row_data, col_data=col_data, metadata=metadata)

if row_ranges is None:
row_ranges = GenomicRangesList.empty(n=self._shape[0])
Expand Down
127 changes: 62 additions & 65 deletions src/summarizedexperiment/SummarizedExperiment.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,23 @@
from typing import Dict, Optional
from typing import Dict, Optional, Union, Sequence
from warnings import warn

from genomicranges import GenomicRanges
from biocframe import BiocFrame

from .BaseSE import BaseSE
from .types import MatrixTypes, SlicerArgTypes
from .types import MatrixTypes

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"


class SummarizedExperiment(BaseSE):
"""Container to represents genomic experiment data (`assays`), features (`row_data`), sample data (`col_data`) and
any other `metadata`.
"""Container to represents genomic experiment data (`assays`),
features (`row_data`), sample data (`col_data`) and any other `metadata`.
SummarizedExperiment follows the R/Bioconductor specification; rows are features, columns
are samples.
Attributes:
assays (Dict[str, MatrixTypes]): A dictionary containing matrices, with assay names as keys
and 2-dimensional matrices represented as either
:py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`.
Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and
implements the slice operation using the ``__getitem__`` dunder method.
All matrices in assays must be 2-dimensional and have the same shape
(number of rows, number of columns).
row_data (BiocFrame, optional): Features, which must be of the same length as the rows of
the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or
:py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
col_data (BiocFrame, optional): Sample data, which must be of the same length as the
columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame`
or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
metadata (Dict, optional): Additional experimental metadata describing the methods. Defaults to None.
SummarizedExperiment follows the R/Bioconductor specification;
rows are features, columns are samples.
"""

def __init__(
Expand All @@ -47,65 +26,83 @@ def __init__(
row_data: Optional[BiocFrame] = None,
col_data: Optional[BiocFrame] = None,
metadata: Optional[Dict] = None,
validate: bool = True,
) -> None:
"""Initialize a Summarized Experiment (SE).
Args:
assays (Dict[str, MatrixTypes]): A dictionary containing matrices, with assay names as keys
assays:
A dictionary containing matrices, with assay names as keys
and 2-dimensional matrices represented as either
:py:class:`~numpy.ndarray` or :py:class:`~scipy.sparse.spmatrix`.
Alternatively, you may use any 2-dimensional matrix that has the ``shape`` property and
implements the slice operation using the ``__getitem__`` dunder method.
Alternatively, you may use any 2-dimensional matrix that has
the ``shape`` property and implements the slice operation
using the ``__getitem__`` dunder method.
All matrices in assays must be 2-dimensional and have the
same shape (number of rows, number of columns).
row_data:
Features, must be the same length as the number of rows of
the matrices in assays.
Feature information is coerced to a
:py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
All matrices in assays must be 2-dimensional and have the same shape
(number of rows, number of columns).
col_data:
Sample data, must be the same length as the number of
columns of the matrices in assays.
row_data (BiocFrame, optional): Features, which must be of the same length as the rows of
the matrices in assays. Features can be either a :py:class:`~pandas.DataFrame` or
Sample information is coerced to a
:py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
col_data (BiocFrame, optional): Sample data, which must be of the same length as the
columns of the matrices in assays. Sample Information can be either a :py:class:`~pandas.DataFrame`
or :py:class:`~biocframe.BiocFrame.BiocFrame`. Defaults to None.
metadata:
Additional experimental metadata describing the methods.
Defaults to None.
metadata (Dict, optional): Additional experimental metadata describing the methods. Defaults to None.
validate:
Internal use only.
"""

if isinstance(row_data, GenomicRanges):
warn(
"`row_data` is `GenomicRanges`, consider using `RangeSummarizedExperiment`."
)

super().__init__(assays, rows=row_data, cols=col_data, metadata=metadata)

def __getitem__(
self,
args: SlicerArgTypes,
) -> "SummarizedExperiment":
"""Subset a `SummarizedExperiment`.
Args:
args (SlicerArgTypes): Indices or names to slice. The tuple contains
slices along dimensions (rows, cols).
Each element in the tuple, might be either a integer vector (integer positions),
boolean vector or :py:class:`~slice` object. Defaults to None.
Raises:
ValueError: If too many or too few slices provided.
Returns:
SummarizedExperiment: Sliced `SummarizedExperiment` object.
"""
sliced_objs = self._slice(args)
return SummarizedExperiment(
assays=sliced_objs.assays,
row_data=sliced_objs.row_data,
col_data=sliced_objs.col_data,
metadata=self.metadata,
super().__init__(
assays, row_data=row_data, col_data=col_data, metadata=metadata, validate=validate
)

# def __getitem__(
# self,
# args: Union[int, str, Sequence, tuple],
# ) -> "SummarizedExperiment":
# """Subset a `SummarizedExperiment`.

# Args:
# args:
# Indices or names to slice. The tuple contains
# slices along dimensions (rows, cols).

# Each element in the tuple, might be either a integer vector (integer positions),
# boolean vector or :py:class:`~slice` object. Defaults to None.

# Raises:
# ValueError:
# If too many or too few slices provided.

# Returns:
# Sliced `SummarizedExperiment` object.
# """
# sliced_objs = self._generic_slice(args)
# return SummarizedExperiment(
# assays=sliced_objs.assays,
# row_data=sliced_objs.rows,
# col_data=sliced_objs.cols,
# metadata=self.metadata,
# )

def __repr__(self) -> str:
pattern = (
f"Class SummarizedExperiment with {self.shape[0]} features and {self.shape[1]} "
Expand Down
14 changes: 7 additions & 7 deletions tests/test_RSE.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,32 @@
}
)

gr = genomicranges.from_pandas(df_gr)
gr = genomicranges.GenomicRanges.from_pandas(df_gr)

col_data = pd.DataFrame(
{
"treatment": ["ChIP", "Input"] * 3,
}
)

a = genomicranges.GenomicRanges(
{
a = genomicranges.GenomicRanges.from_pandas(
pd.DataFrame({
"seqnames": ["chr1", "chr2", "chr1", "chr3"],
"starts": [1, 3, 2, 4],
"ends": [10, 30, 50, 60],
"strand": ["-", "+", "*", "+"],
"score": [1, 2, 3, 4],
}
})
)

b = genomicranges.GenomicRanges(
{
b = genomicranges.GenomicRanges.from_pandas(
pd.DataFrame({
"seqnames": ["chr2", "chr4", "chr5"],
"starts": [3, 6, 4],
"ends": [30, 50, 60],
"strand": ["-", "+", "*"],
"score": [2, 3, 4],
}
})
)

grl = genomicranges.GenomicRangesList(ranges=[a, b], names=["a", "b"])
Expand Down
2 changes: 1 addition & 1 deletion tests/test_RSE_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
}
)

gr = genomicranges.from_pandas(df_gr)
gr = genomicranges.GenomicRanges.from_pandas(df_gr)

col_data = pd.DataFrame(
{
Expand Down
Loading

0 comments on commit 0dbbbe9

Please sign in to comment.