-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* move no_integration_batch to new folder structure * modify script to work with opsca-v2 API * add global no integration for embed and graph * add global no integration for feature * refactor global random integration * allow to reuse existing kNN * move utils to top-level and reuse precomputed kNN graph in no_integration graph * fix naming * add per batch random integrations * add random cell type integrations * refactor perfect cell type integrations (one-hot encoding) * update images to 1.0.4 * update Changelog * flatten control method folder structure * add control methods to nextflow workflow * fix namespaces * missing control methods from nextflow workflow * fix utils path --------- Co-authored-by: Robrecht Cannoodt <[email protected]>
- Loading branch information
Showing
40 changed files
with
903 additions
and
226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
src/tasks/batch_integration/api/comp_control_method_feature.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
functionality: | ||
namespace: batch_integration/control_methods | ||
info: | ||
type: control_method | ||
subtype: feature | ||
type_info: | ||
label: Control method (feature) | ||
summary: A batch integration feature control method. | ||
description: | | ||
A batch integration control method which outputs a batch-corrected feature space. | ||
arguments: | ||
- name: --input | ||
__merge__: file_dataset.yaml | ||
direction: input | ||
required: true | ||
- name: --output | ||
direction: output | ||
__merge__: file_integrated_feature.yaml | ||
required: true | ||
test_resources: | ||
- type: python_script | ||
path: /src/common/comp_tests/check_method_config.py | ||
- type: python_script | ||
path: /src/common/comp_tests/run_and_check_adata.py | ||
- path: /resources_test/batch_integration/pancreas | ||
dest: resources_test/batch_integration/pancreas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
src/tasks/batch_integration/control_methods/no_integration/batch_embed/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import scanpy as sc | ||
import numpy as np | ||
|
||
## VIASH START | ||
|
||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
} | ||
|
||
meta = { | ||
'functionality': 'foo', | ||
'config': 'bar' | ||
} | ||
|
||
## VIASH END | ||
|
||
print('Read input', flush=True) | ||
adata = sc.read_h5ad(par['input']) | ||
adata.X = adata.layers["normalized"] | ||
adata.var["highly_variable"] = adata.var["hvg"] | ||
|
||
print("Process dataset", flush=True) | ||
adata.obsm["X_emb"] = np.zeros((adata.shape[0], 50), dtype=float) | ||
for batch in adata.obs["batch"].unique(): | ||
batch_idx = adata.obs["batch"] == batch | ||
n_comps = min(50, np.sum(batch_idx)) | ||
solver = "full" if n_comps == np.sum(batch_idx) else "arpack" | ||
adata.obsm["X_emb"][batch_idx, :n_comps] = sc.tl.pca( | ||
adata[batch_idx], | ||
n_comps=n_comps, | ||
use_highly_variable=True, | ||
svd_solver=solver, | ||
copy=True, | ||
).obsm["X_pca"] | ||
|
||
print("Store outputs", flush=True) | ||
adata.uns['method_id'] = meta['functionality_name'] | ||
adata.write_h5ad(par['output'], compression='gzip') |
22 changes: 22 additions & 0 deletions
22
src/tasks/batch_integration/control_methods/no_integration/global_embed/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# use method api spec | ||
__merge__: ../../../api/comp_control_method_embedding.yaml | ||
functionality: | ||
name: global_embed | ||
namespace: batch_integration/control_methods/no_integration | ||
info: | ||
label: No integration | ||
summary: "Cells are embedded by PCA on the unintegrated data" | ||
description: "Cells are embedded by PCA on the unintegrated data" | ||
v1: | ||
path: openproblems/tasks/_batch_integration/_common/methods/baseline.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
- type: nextflow | ||
directives: | ||
label: [ "midtime", "lowmem", "lowcpu"] |
26 changes: 26 additions & 0 deletions
26
src/tasks/batch_integration/control_methods/no_integration/global_embed/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import scanpy as sc | ||
|
||
## VIASH START | ||
|
||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
} | ||
|
||
meta = { | ||
'functionality': 'foo', | ||
'config': 'bar', | ||
"resources_dir": "src/tasks/batch_integration/control_methods/" | ||
} | ||
|
||
## VIASH END | ||
|
||
print('Read input', flush=True) | ||
adata = sc.read_h5ad(par['input']) | ||
|
||
print("process dataset", flush=True) | ||
adata.obsm["X_emb"] = adata.obsm["X_pca"] | ||
|
||
print("Store outputs", flush=True) | ||
adata.uns['method_id'] = meta['functionality_name'] | ||
adata.write_h5ad(par['output'], compression='gzip') |
22 changes: 22 additions & 0 deletions
22
src/tasks/batch_integration/control_methods/no_integration/global_feature/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# use method api spec | ||
__merge__: ../../../api/comp_control_method_feature.yaml | ||
functionality: | ||
name: global_feature | ||
namespace: batch_integration/control_methods/no_integration | ||
info: | ||
label: No integration | ||
summary: "Original feature space is not modified" | ||
description: "Original feature space is not modified" | ||
v1: | ||
path: openproblems/tasks/_batch_integration/_common/methods/baseline.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
- type: nextflow | ||
directives: | ||
label: [ "midtime", "lowmem", "lowcpu"] |
27 changes: 27 additions & 0 deletions
27
src/tasks/batch_integration/control_methods/no_integration/global_feature/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import scanpy as sc | ||
|
||
## VIASH START | ||
|
||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
} | ||
|
||
meta = { | ||
'functionality': 'foo', | ||
'config': 'bar', | ||
"resources_dir": "src/tasks/batch_integration/control_methods/" | ||
} | ||
|
||
## VIASH END | ||
|
||
print('Read input', flush=True) | ||
adata = sc.read_h5ad(par['input']) | ||
|
||
# no processing, subset matrix to highly variable genes | ||
adata_hvg = adata[:, adata.var["hvg"]].copy() | ||
adata.layers['corrected_counts'] = adata_hvg.layers["normalized"].copy() | ||
|
||
print("Store outputs", flush=True) | ||
adata.uns['method_id'] = meta['functionality_name'] | ||
adata.write_h5ad(par['output'], compression='gzip') |
23 changes: 23 additions & 0 deletions
23
src/tasks/batch_integration/control_methods/no_integration/global_graph/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# use method api spec | ||
__merge__: ../../../api/comp_control_method_graph.yaml | ||
functionality: | ||
name: global_graph | ||
namespace: batch_integration/control_methods/no_integration | ||
info: | ||
label: No integration | ||
summary: "kNN graph is built on the PCA of the unintegrated data" | ||
description: "Cells are embedded by PCA on the unintegrated data. A kNN graph is built on this PCA." | ||
v1: | ||
path: openproblems/tasks/_batch_integration/_common/methods/baseline.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: ../../utils.py | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
- type: nextflow | ||
directives: | ||
label: [ "midtime", "lowmem", "lowcpu"] |
35 changes: 35 additions & 0 deletions
35
src/tasks/batch_integration/control_methods/no_integration/global_graph/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import scanpy as sc | ||
import sys | ||
|
||
## VIASH START | ||
|
||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
} | ||
|
||
meta = { | ||
'functionality': 'foo', | ||
'config': 'bar', | ||
"resources_dir": "src/tasks/batch_integration/control_methods/" | ||
} | ||
|
||
## VIASH END | ||
|
||
# add helper scripts to path | ||
sys.path.append(meta["resources_dir"]) | ||
from utils import _set_uns | ||
|
||
|
||
print('Read input', flush=True) | ||
adata = sc.read_h5ad(par['input']) | ||
|
||
print("process dataset", flush=True) | ||
neighbors_map = adata.uns['knn'] | ||
adata.obsp['connectivities'] = adata.obsp[neighbors_map['connectivities_key']] | ||
adata.obsp['distances'] = adata.obsp[neighbors_map['distances_key']] | ||
_set_uns(adata, neighbors_key='knn') | ||
|
||
print("Store outputs", flush=True) | ||
adata.uns['method_id'] = meta['functionality_name'] | ||
adata.write_h5ad(par['output'], compression='gzip') |
38 changes: 0 additions & 38 deletions
38
src/tasks/batch_integration/control_methods/no_integration_batch/script.py
This file was deleted.
Oops, something went wrong.
14 changes: 6 additions & 8 deletions
14
...methods/random_embed_cell/config.vsh.yaml → ...ntegration/celltype_embed/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,23 @@ | ||
# use method api spec | ||
__merge__: ../../api/comp_control_method_embedding.yaml | ||
__merge__: ../../../api/comp_control_method_embedding.yaml | ||
functionality: | ||
name: random_embed_cell | ||
name: celltype_embed | ||
namespace: batch_integration/control_methods/perfect_integration | ||
info: | ||
label: Random Embedding by Celltype | ||
label: Perfect embedding by cell type | ||
summary: "Cells are embedded as a one-hot encoding of celltype labels" | ||
description: "Cells are embedded as a one-hot encoding of celltype labels" | ||
v1: | ||
path: openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py | ||
path: openproblems/tasks/_batch_integration/_common/methods/baseline.py | ||
commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32 | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: ../../utils.py | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/openproblems-bio/base_python:1.0.4 | ||
setup: | ||
- type: python | ||
pypi: | ||
- scikit-learn | ||
- type: nextflow | ||
directives: | ||
label: [midtime, lowmem, lowcpu] |
29 changes: 29 additions & 0 deletions
29
src/tasks/batch_integration/control_methods/perfect_integration/celltype_embed/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import anndata as ad | ||
import sys | ||
|
||
## VIASH START | ||
|
||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
} | ||
|
||
meta = { | ||
'functionality': 'foo', | ||
'config': 'bar' | ||
} | ||
|
||
## VIASH END | ||
sys.path.append(meta["resources_dir"]) | ||
from utils import _perfect_embedding | ||
|
||
|
||
print('Read input', flush=True) | ||
adata = ad.read_h5ad(par['input']) | ||
|
||
print('Process data...', flush=True) | ||
adata.obsm["X_emb"] = _perfect_embedding(partition=adata.obs["label"]) | ||
|
||
print("Store outputs", flush=True) | ||
adata.uns['method_id'] = meta['functionality_name'] | ||
adata.write_h5ad(par['output'], compression='gzip') |
Oops, something went wrong.