Skip to content

Commit

Permalink
Add mcols support for completeness. (#49)
Browse files Browse the repository at this point in the history
Also update tests and docstrings

---------

Co-authored-by: jkanche <[email protected]>
  • Loading branch information
LTLA and jkanche authored Oct 26, 2023
1 parent e9f56ff commit a6ea858
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 17 deletions.
86 changes: 70 additions & 16 deletions src/biocframe/BiocFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,12 @@ class BiocFrame:
Attributes:
data (Dict[str, Any], optional): Dictionary of column names as `keys` and
their values. All columns must have the same length. Defaults to {}.
number_of_rows (int, optional): Number of rows. If not specified, inferred from ``data``.
their values.
number_of_rows (int, optional): Number of rows.
row_names (list, optional): Row names.
column_names (list, optional): Column names. If not provided,
inferred from the ``data``.
metadata (dict): Additional metadata. Defaults to {}.
column_names (list, optional): Column names.
mcols (BiocFrame, optional): Metadata about columns.
metadata (dict): Additional metadata.
Raises:
ValueError: If there is a mismatch in the number of rows or columns in the data.
Expand All @@ -153,6 +153,7 @@ def __init__(
number_of_rows: Optional[int] = None,
row_names: Optional[List] = None,
column_names: Optional[List[str]] = None,
mcols: Optional["BiocFrame"] = None,
metadata: Optional[dict] = None,
) -> None:
"""Initialize a `BiocFrame` object.
Expand All @@ -164,13 +165,16 @@ def __init__(
row_names (list, optional): Row names.
column_names (list, optional): Column names. If not provided,
inferred from the ``data``.
mcols (BiocFrame, optional): Metadata about columns. Must have the same length as the number
of columns. Defaults to None.
metadata (dict): Additional metadata. Defaults to {}.
"""
self._number_of_rows = number_of_rows
self._row_names = row_names
self._data = {} if data is None else data
self._column_names = column_names
self._metadata = {} if metadata is None else metadata
self._mcols = mcols

self._validate()

Expand All @@ -188,18 +192,20 @@ def _validate(self):
self._number_of_rows = validate_rows(
self._data, self._number_of_rows, self._row_names
)
self._column_names, self._data = validate_cols(self._column_names, self._data)
self._column_names, self._data, self._mcols = validate_cols(
self._column_names, self._data, self._mcols
)

if self._number_of_rows is None:
self._number_of_rows = 0

def __repr__(self) -> str:
if self.row_names is None:
if self.dims[0] == 0:
return f"Empty BiocFrame with no rows & {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}."
if self.shape[0] == 0:
return f"Empty BiocFrame with no rows & {self.shape[1]} column{'s' if self.shape[1] != 1 else ''}."

if self.dims[1] == 0:
return f"Empty BiocFrame with {self.dims[0]} row{'s' if self.dims[0] != 1 else ''} & no columns."
if self.shape[1] == 0:
return f"Empty BiocFrame with {self.shape[0]} row{'s' if self.shape[0] != 1 else ''} & no columns."

from io import StringIO

Expand All @@ -208,8 +214,8 @@ def __repr__(self) -> str:

table = Table(
title=(
f"BiocFrame with {self.dims[0]} row{'s' if self.dims[0] != 1 else ''}"
" & {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}"
f"BiocFrame with {self.dims[0]} row{'s' if self.shape[0] != 1 else ''}"
f" & {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}"
),
show_header=True,
)
Expand Down Expand Up @@ -244,11 +250,11 @@ def __repr__(self) -> str:
_rows.append(_dots)

_last = self.shape[0] - _top
if _last <= rows_to_show:
_last = self.shape[0] - _top
if _last < rows_to_show:
_last += 1

# last set of rows
for r in range(_last + 1, len(self)):
for r in range(_last, len(self)):
_row = self.row(r)
vals = list(_row.values())
res = [str(v) for v in vals]
Expand Down Expand Up @@ -358,6 +364,23 @@ def column_names(self, names: List[str]):
self._column_names = names
self._data = new_data

@property
def mcols(self) -> Union[None, "BiocFrame"]:
"""
Returns: The ``mcols``, containing annotation on the columns.
"""
# TODO: need to attach row names.
return self._mcols

@mcols.setter
def mcols(self, mcols: Union[None, "BiocFrame"]):
if mcols is not None:
if mcols.shape[0] != self.shape[1]:
raise ValueError(
"Number of rows in `mcols` should be equal to the number of columns."
)
self._mcols = mcols

@property
def metadata(self) -> dict:
"""Access metadata.
Expand Down Expand Up @@ -516,12 +539,19 @@ def _slice(
elif is_col_scalar is True:
return new_data[new_column_names[0]]

mcols = self._mcols
if mcols is not None:
if column_indices_or_names is not None:
mcols = mcols._slice(new_column_indices, None)

current_class_const = type(self)
return current_class_const(
data=new_data,
number_of_rows=new_number_of_rows,
row_names=new_row_names,
column_names=new_column_names,
metadata=self._metadata,
mcols=mcols,
)

# TODO: implement in-place or views
Expand Down Expand Up @@ -675,9 +705,11 @@ def __setitem__(self, name: str, value: List):
if name not in self.column_names:
self._column_names.append(name)

if self._mcols is not None:
self._mcols = self._mcols.combine(BiocFrame({}, number_of_rows=1))

self._data[name] = value

# TODO: implement in-place or view
def __delitem__(self, name: str):
"""Remove a column.
Expand Down Expand Up @@ -708,7 +740,14 @@ def __delitem__(self, name: str):
raise ValueError(f"Column: '{name}' does not exist.")

del self._data[name]
_col_idx = self._column_names.index(name)

# TODO: do something better later!
_indices = [i for i in range(len(self._column_names)) if i != _col_idx]

self._column_names.remove(name)
if self._mcols is not None:
self._mcols = self._mcols[_indices, :]

def __len__(self) -> int:
"""Number of rows.
Expand Down Expand Up @@ -888,12 +927,24 @@ def combine(self, *other):
if all([x is None for x in all_row_names]) or len(all_row_names) == 0:
all_row_names = None

combined_mcols = None
if self.mcols is not None:
combined_mcols = self.mcols
if len(all_unique_columns) > len(self.mcols):
combined_mcols = self.mcols.combine(
BiocFrame(
{}, number_of_rows=len(all_unique_columns) - len(self.mcols)
)
)

current_class_const = type(self)
return current_class_const(
all_data,
number_of_rows=all_num_rows,
row_names=all_row_names,
column_names=all_unique_columns,
metadata=self._metadata,
mcols=combined_mcols,
)

def __deepcopy__(self, memo=None, _nil=[]):
Expand All @@ -911,6 +962,7 @@ def __deepcopy__(self, memo=None, _nil=[]):
_num_rows_copy = deepcopy(self._number_of_rows)
_rownames_copy = deepcopy(self.row_names)
_metadata_copy = deepcopy(self.metadata)
_mcols_copy = deepcopy(self._mcols) if self._mcols is not None else None

# copy dictionary first
_data_copy = OrderedDict()
Expand All @@ -929,6 +981,7 @@ def __deepcopy__(self, memo=None, _nil=[]):
row_names=_rownames_copy,
column_names=_colnames_copy,
metadata=_metadata_copy,
mcols=_mcols_copy,
)

def __copy__(self):
Expand All @@ -946,6 +999,7 @@ def __copy__(self):
row_names=self._row_names,
column_names=self._column_names,
metadata=self._metadata,
mcols=self._mcols,
)

return new_instance
Expand Down
9 changes: 8 additions & 1 deletion src/biocframe/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def validate_rows(
def validate_cols(
column_names: List[str],
data: Dict[str, Any],
mcols: Optional["BiocFrame"],
) -> Tuple[List[str], Dict[str, Any]]:
"""Validate columns of a :py:class:`biocframe.BiocFrame` object.
Expand Down Expand Up @@ -115,7 +116,13 @@ def validate_cols(
f"{', '.join(incorrect_types)}."
)

return column_names, data
if mcols is not None:
if mcols.shape[0] != len(column_names):
raise ValueError(
"Number of rows in `mcols` should be equal to the number of columns."
)

return column_names, data, mcols


def validate_unique_list(values: List) -> bool:
Expand Down
22 changes: 22 additions & 0 deletions tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,25 @@ def test_combine_generic_preserve_types():
assert isinstance(merged, BiocFrame)
assert isinstance(merged.column("odd"), np.ndarray)
assert isinstance(merged.column("even"), list)


def test_combine_with_extras():
obj1 = BiocFrame(
{
"column1": [1, 2, 3],
"column2": [4, 5, 6],
},
mcols=BiocFrame({"foo": [-1, -2], "bar": ["A", "B"]}),
metadata={"YAY": 2},
)

obj2 = BiocFrame(
{
"column1": [1, 2, 3],
"column2": [4, 5, 6],
},
)

merged = combine(obj1, obj2)
assert merged.metadata == obj1.metadata
assert merged.mcols.shape == obj1.mcols.shape
48 changes: 48 additions & 0 deletions tests/test_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,51 @@ def test_nested_biocFrame():
assert isinstance(nested_col, BiocFrame)
assert nested_col.row_names is None
assert len(nested_col.column_names) == 3


def test_extra_bits():
bframe = BiocFrame(
{
"column1": [1, 2, 3],
},
mcols=BiocFrame({"foo": [1], "bar": ["A"]}),
metadata={"YAY": 2},
)

assert isinstance(bframe.mcols, BiocFrame)
assert bframe.metadata["YAY"] == 2

# Setters work correctly.
bframe.mcols = BiocFrame({"STUFF": [2.5]})
assert bframe.mcols.column_names == ["STUFF"]

bframe.metadata = {"FOO": "A"}
assert bframe.metadata["FOO"] == "A"


def test_with_add_deletions():
obj1 = BiocFrame(
{
"column1": [1, 2, 3],
"column2": [4, 5, 6],
},
mcols=BiocFrame({"foo": [-1, -2], "bar": ["A", "B"]}),
metadata={"YAY": 2},
)

assert isinstance(obj1.mcols, BiocFrame)

obj1["new_column"] = [10, 11, "12"]
assert obj1.shape == (3, 3)
assert len(obj1.mcols) == 3

# welp assume i made a mistake earlier
obj1["new_column"] = [10, 11, 12]
assert obj1.shape == (3, 3)
assert len(obj1.mcols) == 3

# lets delete
del obj1["new_column"]
assert obj1.shape == (3, 2)
print(obj1.mcols)
assert len(obj1.mcols) == 2
20 changes: 20 additions & 0 deletions tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,26 @@ def test_bframe_slice():
assert sliced_list.dims == (2, 2)


def test_bframe_slice_with_extras():
bframe = BiocFrame(
{
"column1": [1, 2, 3],
"column2": [4, 5, 6],
},
mcols=BiocFrame({"foo": [-1, -2], "bar": ["A", "B"]}),
metadata={"YAY": 2},
)

subframe = bframe[0:2, :]
assert subframe.mcols.shape[0] == bframe.mcols.shape[1]
assert subframe.metadata == bframe.metadata

subframe = bframe[:, [1]]
assert subframe.mcols.shape[0] == 1
assert subframe.mcols.column("foo") == [-2]
assert subframe.metadata == bframe.metadata


def test_bframe_unary_slice():
obj = {
"column1": [1, 2, 3],
Expand Down

0 comments on commit a6ea858

Please sign in to comment.