Skip to content

Commit

Permalink
Initialize from a NamedList& wrappers for combine and merge operati…
Browse files Browse the repository at this point in the history
…ons (#118)

* Initialize from a `NamedList`
* Update tests and changelog
  • Loading branch information
jkanche authored Jan 2, 2025
1 parent 852013d commit 5ea8600
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
Expand All @@ -21,36 +22,31 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest tox
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
pip install tox
- name: Test with tox
run: tox
run: |
tox
- name: Build docs
run: tox -e docs
run: |
tox -e docs
- name: Add nojekyll file
run: touch ./docs/_build/html/.nojekyll
- run: touch ./docs/_build/html/.nojekyll

- name: GH Pages Deployment
uses: JamesIves/github-pages-deploy-action@4.1.3
uses: JamesIves/github-pages-deploy-action@v4
with:
branch: gh-pages # The branch the action should deploy to.
folder: ./docs/_build/html
clean: true # Automatically remove deleted files from the deploy branch

- name: Build Project and Publish
run: python -m tox -e clean,build
run: |
python -m tox -e clean,build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
uses: pypa/gh-action-pypi-publish@v1.12.2
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
42 changes: 0 additions & 42 deletions .github/workflows/pypi-test.yml

This file was deleted.

32 changes: 32 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Run tests

on:
push:
branches: [master]
pull_request:

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox
- name: Test with tox
run: |
tox
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Version 0.6.1

- Implement wrapper methods within the `BiocFrame` class for merge and combine operations.
- Initialize `BiocFrame` from a `NamedList`.
- Renaming GitHub actions for consistency with other packages.

## Version 0.6.0

- chore: Remove Python 3.8 (EOL)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

[![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)
[![PyPI-Server](https://img.shields.io/pypi/v/BiocFrame.svg)](https://pypi.org/project/BiocFrame/)
![Unit tests](https://github.com/BiocPy/BiocFrame/actions/workflows/pypi-test.yml/badge.svg)
![Unit tests](https://github.com/BiocPy/BiocFrame/actions/workflows/run-tests.yml/badge.svg)

# Bioconductor-like data frames

Expand Down
61 changes: 53 additions & 8 deletions src/biocframe/BiocFrame.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import OrderedDict
from collections import OrderedDict, abc
from copy import copy
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union
from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence, Tuple, Union
from warnings import warn

import biocutils as ut
Expand Down Expand Up @@ -122,7 +122,7 @@ class BiocFrame:

def __init__(
self,
data: Optional[Dict[str, Any]] = None,
data: Mapping = None,
number_of_rows: Optional[int] = None,
row_names: Optional[List] = None,
column_names: Optional[List[str]] = None,
Expand All @@ -138,6 +138,10 @@ def __init__(
columns must have the same length. Defaults to an empty
dictionary.
Alternatively may provide a `Mapping` object, for example
a :py:class:`~biocutils.NamedList` that can be coerced into
a dictionary.
number_of_rows:
Number of rows. If not specified, inferred from ``data``. This
needs to be provided if ``data`` is empty and ``row_names`` are
Expand All @@ -161,7 +165,18 @@ def __init__(
validate:
Internal use only.
"""
self._data = {} if data is None else data
if data is None:
data = {}

if isinstance(data, ut.NamedList):
data = data.as_dict()
# making sure all column values are lists
for k, v in data.items():
if not isinstance(v, list):
# if its a scalar, make a list else corce to list
data[k] = list(v) if isinstance(v, abc.Sequence) else [v]

self._data = data
if row_names is not None and not isinstance(row_names, ut.Names):
row_names = ut.Names(row_names)
self._row_names = row_names
Expand Down Expand Up @@ -1243,10 +1258,6 @@ def flatten(self, as_type: Literal["dict", "biocframe"] = "dict", delim: str = "

return _data_copy

def combine(self, *other):
"""Wrapper around :py:func:`~relaxed_combine_rows`, provided for back-compatibility only."""
return relaxed_combine_rows([self] + other)

# TODO: very primitive implementation, needs very robust testing
# TODO: implement in-place, view
def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":
Expand Down Expand Up @@ -1274,6 +1285,40 @@ def __array_ufunc__(self, func, method, *inputs, **kwargs) -> "BiocFrame":

return input

#############################
######>> Combine Ops <<######
#############################

def combine(self, *other):
"""Wrapper around :py:func:`~relaxed_combine_rows`, provided for back-compatibility only."""
return relaxed_combine_rows(self, *other)

def relaxed_combine_rows(self, *other):
"""Wrapper around :py:func:`~relaxed_combine_rows`."""
return relaxed_combine_rows(self, *other)

def relaxed_combine_columns(self, *other):
"""Wrapper around :py:func:`~relaxed_combine_columns`."""
return relaxed_combine_columns(self, *other)

def combine_rows(self, *other):
"""Wrapper around :py:func:`~biocutils.combine_rows`."""
return _combine_rows_bframes(self, *other)

def combine_columns(self, *other):
"""Wrapper around :py:func:`~biocutils.combine_columns`."""
return _combine_cols_bframes(self, *other)

def merge(
self,
*other: Sequence["BiocFrame"],
by: Union[None, str, Sequence] = None,
join: Literal["inner", "left", "right", "outer"] = "left",
rename_duplicate_columns: bool = False,
):
"""Wrapper around :py:func:`merge`."""
return merge([self] + list(other), by=by, join=join, rename_duplicate_columns=rename_duplicate_columns)


############################

Expand Down
46 changes: 46 additions & 0 deletions tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ def test_basic():
assert merged.shape[0] == obj1.shape[0] + obj2.shape[0]
assert merged.shape[1] == 2

merged2 = obj1.combine(obj2)

assert isinstance(merged2, BiocFrame)
assert merged2.shape[0] == obj1.shape[0] + obj2.shape[0]
assert merged2.shape[1] == 2


def test_multiple():
merged = combine(obj1, obj2, obj1)
Expand All @@ -38,6 +44,12 @@ def test_multiple():
assert merged.shape[0] == 2 * obj1.shape[0] + obj2.shape[0]
assert merged.shape[1] == 2

merged2 = obj1.combine(obj2, obj1)

assert isinstance(merged2, BiocFrame)
assert merged2.shape[0] == 2 * obj1.shape[0] + obj2.shape[0]
assert merged2.shape[1] == 2


def test_empty():
o1 = BiocFrame(number_of_rows=10)
Expand All @@ -49,6 +61,11 @@ def test_empty():
assert merged.shape[0] == 15
assert merged.shape[1] == 0

merged2 = o1.combine(o2)

assert isinstance(merged2, BiocFrame)
assert merged2.shape[0] == 15
assert merged2.shape[1] == 0

def test_with_rownames():
obj1.row_names = ["a", "b", "c", "d", "e"]
Expand Down Expand Up @@ -155,6 +172,29 @@ def test_relaxed_combine_rows():
None,
]

merged2 = obj1.relaxed_combine_rows(obj2, obj3)

assert merged2.get_column_names().as_list() == ["column1", "column2", "column3"]
assert merged2.column("column1") == [1, 2, 3, -1, -2, -3, None, None, None]
assert (
merged2.column("column2").mask
== np.ma.array([False, False, False, True, True, True, False, False, False])
).all()
assert (
merged2.column("column2").data == np.ma.array([4, 5, 6, 0, 0, 0, -4, -5, -6])
).all()
assert merged2.column("column3") == [
None,
None,
None,
"A",
"B",
"C",
None,
None,
None,
]


def test_combine_columns_basic():
obj1 = BiocFrame(
Expand Down Expand Up @@ -185,6 +225,12 @@ def test_combine_columns_basic():
combine_columns(obj1, obj2[1:4, :])
assert str(ex.value).find("same number of rows") >= 0

merged2= obj1.combine_columns(obj2)
assert isinstance(merged2, BiocFrame)
assert merged2.get_column_names().as_list() == ["odd", "even", "foo", "bar"]
assert merged2.get_column("odd") == [1, 3, 5, 7, 9]
assert merged2.get_column("bar") == [0, 22, 44, 66, 88]


def test_combine_columns_with_column_data():
obj1 = BiocFrame(
Expand Down
11 changes: 10 additions & 1 deletion tests/test_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import biocframe
from biocframe import BiocFrame
from biocutils import Names
from biocutils import Names, NamedList

__author__ = "jkanche"
__copyright__ = "jkanche"
Expand Down Expand Up @@ -164,3 +164,12 @@ def test_with_add_deletions():
del obj1["new_column"]
assert obj1.shape == (3, 2)
assert len(obj1.column_data) == 2

def test_NamedList():
x = NamedList([1,2,3,4], names=["A", "B", "C", "D"])

frame = BiocFrame(x)
assert frame is not None
assert isinstance(frame, BiocFrame)
assert frame.shape == (1, 4)
assert list(frame.get_column_names()) == ["A", "B", "C", "D"]
31 changes: 31 additions & 0 deletions tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,34 @@ def test_merge_BiocFrame_column_data():
combined = merge([obj1, obj2], by="A", join="left")
comcol = combined.get_column_data()
assert comcol.column("foo") == [True, False, False]

def test_merge_class_method():
# A simple case.
obj1 = BiocFrame({"B": [3, 4, 5, 6]}, row_names=[1, 2, 3, 4])
obj2 = BiocFrame(
{"C": ["A", "B"]},
row_names=[1, 3],
)

combined = merge([obj1, obj2], by=None, join="left")
assert combined.get_column_data() is None

obj1.set_column_data(BiocFrame({"foo": [True]}), in_place=True)
combined = merge([obj1, obj2], by=None, join="left")
comcol = combined.get_column_data()
assert combined.get_column_names().as_list() == ["B", "C"]
assert comcol.column("foo") == [True, None]

combined2 = obj1.merge(obj2, by=None, join="left")
comcol2 = combined2.get_column_data()
assert combined2.get_column_names().as_list() == ["B", "C"]
assert comcol2.column("foo") == [True, None]

obj2.set_column_data(BiocFrame({"foo": [False]}), in_place=True)
combined = merge([obj1, obj2], by=None, join="left")
comcol = combined.get_column_data()
assert comcol.column("foo") == [True, False]

combined2 = obj1.merge(obj2, by=None, join="left")
comcol2 = combined2.get_column_data()
assert comcol.column("foo") == [True, False]

0 comments on commit 5ea8600

Please sign in to comment.