From 9dd40a13812d7179ebc4d791c83cf574aaf05d7b Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 07:41:34 -0800 Subject: [PATCH 01/17] include R step in gh actions --- .github/workflows/publish.yml | 7 ++++++- requirements.txt => python_requirements.txt | 0 r_requirements.r | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) rename requirements.txt => python_requirements.txt (100%) create mode 100644 r_requirements.r diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3534baf..7d60593 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -27,9 +27,14 @@ jobs: with: python-version: '3.9' cache: 'pip' - - run: pip install jupyter - run: pip install -r requirements.txt + - name: Install R + uses: r-lib/actions/setup-r@v2 + with: + r-version: '4.3.2' # The R version to download (if necessary) and use. + - run: Rscript r_requirements.r + - name: Render uses: quarto-dev/quarto-actions/render@v2 with: diff --git a/requirements.txt b/python_requirements.txt similarity index 100% rename from requirements.txt rename to python_requirements.txt diff --git a/r_requirements.r b/r_requirements.r new file mode 100644 index 0000000..5af6131 --- /dev/null +++ b/r_requirements.r @@ -0,0 +1,5 @@ +install.packages(c()"BiocManager", "devtools"), repos='http://cran.us.r-project.org') +BiocManager::install(version = "3.18") + +# install alabaster +BiocManager::install(c("alabaster")) \ No newline at end of file From c1e06a27024228b4ccdf70d85e5e53d13910eae9 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 07:43:25 -0800 Subject: [PATCH 02/17] fix name of python reqs --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7d60593..fe18d0e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -27,7 +27,7 @@ jobs: with: python-version: '3.9' cache: 'pip' - - run: pip install -r requirements.txt + - run: pip install -r python_requirements.txt - name: Install R uses: r-lib/actions/setup-r@v2 From 9075541650080e321a7886c812b0399009e1ae04 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 07:49:50 -0800 Subject: [PATCH 03/17] rename python reqs --- .github/workflows/publish.yml | 2 +- python_requirements.txt => requirements.txt | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename python_requirements.txt => requirements.txt (100%) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fe18d0e..7d60593 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -27,7 +27,7 @@ jobs: with: python-version: '3.9' cache: 'pip' - - run: pip install -r python_requirements.txt + - run: pip install -r requirements.txt - name: Install R uses: r-lib/actions/setup-r@v2 diff --git a/python_requirements.txt b/requirements.txt similarity index 100% rename from python_requirements.txt rename to requirements.txt From 8dbf2036b6926a10079cac36263a03d56352ece0 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 07:55:03 -0800 Subject: [PATCH 04/17] use r-dep action --- .github/workflows/publish.yml | 5 +++++ r_requirements.r | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7d60593..925107b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -33,6 +33,11 @@ jobs: uses: r-lib/actions/setup-r@v2 with: r-version: '4.3.2' # The R version to download (if necessary) and use. + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 2 + extra-packages: | + any::BiocManager - run: Rscript r_requirements.r - name: Render diff --git a/r_requirements.r b/r_requirements.r index 5af6131..d871089 100644 --- a/r_requirements.r +++ b/r_requirements.r @@ -1,4 +1,4 @@ -install.packages(c()"BiocManager", "devtools"), repos='http://cran.us.r-project.org') +# install.packages(c("BiocManager", "devtools"), repos='http://cran.us.r-project.org') BiocManager::install(version = "3.18") # install alabaster From 3ecb9dce69bfa15c73f7ebbaf2c84207057340d5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 08:05:48 -0800 Subject: [PATCH 05/17] yet another r package dep --- .github/workflows/publish.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 925107b..017dc7c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -15,7 +15,7 @@ jobs: contents: write steps: - name: Check out repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 @@ -36,9 +36,10 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: 2 - extra-packages: | + packages: | any::BiocManager - - run: Rscript r_requirements.r + any::alabaster + # - run: Rscript r_requirements.r - name: Render uses: quarto-dev/quarto-actions/render@v2 From 896747645a706633aafa18c1a3cdcde139603dd7 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 09:02:05 -0800 Subject: [PATCH 06/17] test the R workflow --- .github/workflows/publish.yml | 1 + chapters/interop.qmd | 2 +- chapters/workflow.qmd | 17 +++++++++++++++++ r_requirements.r | 7 ++++--- 4 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 chapters/workflow.qmd diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 017dc7c..e108ea9 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -39,6 +39,7 @@ jobs: packages: | any::BiocManager any::alabaster + any::scRNAseq # - run: Rscript r_requirements.r - name: Render diff --git a/chapters/interop.qmd b/chapters/interop.qmd index bf54cc8..2152eb8 100644 --- a/chapters/interop.qmd +++ b/chapters/interop.qmd @@ -1,6 +1,6 @@ # Interop with R -The [rds2py](https://github.com/BiocPy/rds2py) package serves as a Python interface to the [rds2cpp](https://github.com/LTLA/rds2cpp) library, enabling direct reading of RDS files within Python. This eliminates the need for additional data conversion tools or intermediate formats, streamlining the transition between Python and R for seamless analysis. +The [rds2py](https://github.com/BiocPy/rds2py) package provides Python bindings to the [rds2cpp](https://github.com/LTLA/rds2cpp) library, enabling direct reading of RDS files within Python. This eliminates the need for additional data conversion tools or intermediate formats, streamlining the transition between Python and R for seamless analysis. One notable feature is the use of memory views (excluding strings) to access the same memory from C++ in Python, facilitated through Cython. This approach is particularly advantageous for handling large datasets, as it avoids unnecessary duplication of data. diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd new file mode 100644 index 0000000..54c41b8 --- /dev/null +++ b/chapters/workflow.qmd @@ -0,0 +1,17 @@ +```{r} +library(scRNA-seq) + +sce <- ZilionisLungData() + +library(alabaster) +dir_path <- paste(getwd(), "datasets", sep="/") +saveObject(sce, path=paste(dir_path, "zilinoislung", sep="/")) +``` + + +```{python} +from dolomite_base import read_object + +obj = read_object("./datasets/zilinoislung") +print(obj) +``` \ No newline at end of file diff --git a/r_requirements.r b/r_requirements.r index d871089..422e9c6 100644 --- a/r_requirements.r +++ b/r_requirements.r @@ -1,5 +1,6 @@ -# install.packages(c("BiocManager", "devtools"), repos='http://cran.us.r-project.org') -BiocManager::install(version = "3.18") +install.packages(c("BiocManager", "devtools"), repos='http://cran.us.r-project.org') +BiocManager::install(version = "3.18", ask=FALSE) # install alabaster -BiocManager::install(c("alabaster")) \ No newline at end of file +BiocManager::install(c("alabaster", "scRNAseq")) + From 89d26181d1abbbfc9a88d613cf3122165c70ec7d Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 09:22:46 -0800 Subject: [PATCH 07/17] include combined notebook --- _quarto.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_quarto.yml b/_quarto.yml index b8b3a3a..f8e4dae 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -43,6 +43,7 @@ book: - chapters/experiments/extending_se.qmd - chapters/experiments/multiassay_expt.qmd - chapters/interop.qmd + - chapters/workflow.qmd - part: chapters/extras/index.qmd chapters: - chapters/extras/iranges.qmd From 6ff29c7061ffbc2553b73fdcae49fcfbc69f9ca5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 09:37:41 -0800 Subject: [PATCH 08/17] fix package name --- chapters/workflow.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 54c41b8..26cf774 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -1,5 +1,5 @@ ```{r} -library(scRNA-seq) +library(scRNAseq) sce <- ZilionisLungData() From ee3bae03b8bdd056c032dc908ad883eb7f1940d5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 11:26:32 -0800 Subject: [PATCH 09/17] use bioc devel --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e108ea9..87286e3 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -32,7 +32,7 @@ jobs: - name: Install R uses: r-lib/actions/setup-r@v2 with: - r-version: '4.3.2' # The R version to download (if necessary) and use. + r-version: 'devel' - uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: 2 From aa27e6355cc201e8293e7f0f86cb76969c457d2d Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 12:37:48 -0800 Subject: [PATCH 10/17] install knitr and markdown --- .github/workflows/publish.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 87286e3..8109da5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -40,6 +40,8 @@ jobs: any::BiocManager any::alabaster any::scRNAseq + any::rmarkdown + any::knitr # - run: Rscript r_requirements.r - name: Render From d9feeb409cf7919b03f825e683f7f2c46b3bda43 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 15:25:33 -0800 Subject: [PATCH 11/17] use current working dir --- chapters/workflow.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 26cf774..9b36ff9 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -4,8 +4,8 @@ library(scRNAseq) sce <- ZilionisLungData() library(alabaster) -dir_path <- paste(getwd(), "datasets", sep="/") -saveObject(sce, path=paste(dir_path, "zilinoislung", sep="/")) +# dir_path <- paste(getwd(), "datasets", sep="/") +saveObject(sce, path=paste(getwd(), "zilinoislung", sep="/")) ``` From ba256d37acbf298f2b7ab79ca77aee654207022d Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 18:25:45 -0800 Subject: [PATCH 12/17] lets see if this works --- .github/workflows/publish.yml | 1 + chapters/workflow.qmd | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8109da5..6d5a8fb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,6 +42,7 @@ jobs: any::scRNAseq any::rmarkdown any::knitr + any::reticulate # - run: Rscript r_requirements.r - name: Render diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 9b36ff9..42655d9 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -1,3 +1,7 @@ +--- +engine: knitr +--- + ```{r} library(scRNAseq) From 301dd439943f30cf73b5bfcf6fcbb23ae6c1a77c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Tue, 20 Feb 2024 19:04:14 -0800 Subject: [PATCH 13/17] set correct path for file in python chunk --- chapters/workflow.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 42655d9..85eaa6b 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -16,6 +16,6 @@ saveObject(sce, path=paste(getwd(), "zilinoislung", sep="/")) ```{python} from dolomite_base import read_object -obj = read_object("./datasets/zilinoislung") +obj = read_object("./zilinoislung") print(obj) ``` \ No newline at end of file From a2ce3a9767d48a2bbc60222231fd4c0ac01a0021 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 21 Feb 2024 07:42:07 -0800 Subject: [PATCH 14/17] use bioc devel container --- .github/workflows/publish.yml | 45 +++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6d5a8fb..3a00509 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,8 +11,11 @@ name: Quarto Publish jobs: build-deploy: runs-on: ubuntu-latest + container: bioconductor/bioconductor_docker:devel + permissions: contents: write + steps: - name: Check out repository uses: actions/checkout@v4 @@ -29,20 +32,36 @@ jobs: cache: 'pip' - run: pip install -r requirements.txt - - name: Install R - uses: r-lib/actions/setup-r@v2 - with: - r-version: 'devel' - - uses: r-lib/actions/setup-r-dependencies@v2 + - name: Set directories + run: | + echo "R_PKG_DIR=${R_HOME}/site-library" >> $GITHUB_ENV + + - name: Restore the package directory + uses: actions/cache@v3 with: - cache-version: 2 - packages: | - any::BiocManager - any::alabaster - any::scRNAseq - any::rmarkdown - any::knitr - any::reticulate + path: ${{ env.R_PKG_DIR }} + key: check-packages + + - name: Install dependencies + shell: Rscript {0} + run: | + BiocManager::install(c("knitr", "reticulate", "alabaster", "scRNAseq", "rmarkdown")) + + + # - name: Install R + # uses: r-lib/actions/setup-r@v2 + # with: + # r-version: 'devel' + # - uses: r-lib/actions/setup-r-dependencies@v2 + # with: + # cache-version: 2 + # packages: | + # any::BiocManager + # any::alabaster + # any::scRNAseq + # any::rmarkdown + # any::knitr + # any::reticulate # - run: Rscript r_requirements.r - name: Render From e1ed8b1714dce4d5aa4fb09b1586cecf597158bd Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 21 Feb 2024 07:44:22 -0800 Subject: [PATCH 15/17] nvm revert --- .github/workflows/publish.yml | 45 ++++++++++------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3a00509..6d5a8fb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,11 +11,8 @@ name: Quarto Publish jobs: build-deploy: runs-on: ubuntu-latest - container: bioconductor/bioconductor_docker:devel - permissions: contents: write - steps: - name: Check out repository uses: actions/checkout@v4 @@ -32,36 +29,20 @@ jobs: cache: 'pip' - run: pip install -r requirements.txt - - name: Set directories - run: | - echo "R_PKG_DIR=${R_HOME}/site-library" >> $GITHUB_ENV - - - name: Restore the package directory - uses: actions/cache@v3 + - name: Install R + uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_PKG_DIR }} - key: check-packages - - - name: Install dependencies - shell: Rscript {0} - run: | - BiocManager::install(c("knitr", "reticulate", "alabaster", "scRNAseq", "rmarkdown")) - - - # - name: Install R - # uses: r-lib/actions/setup-r@v2 - # with: - # r-version: 'devel' - # - uses: r-lib/actions/setup-r-dependencies@v2 - # with: - # cache-version: 2 - # packages: | - # any::BiocManager - # any::alabaster - # any::scRNAseq - # any::rmarkdown - # any::knitr - # any::reticulate + r-version: 'devel' + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 2 + packages: | + any::BiocManager + any::alabaster + any::scRNAseq + any::rmarkdown + any::knitr + any::reticulate # - run: Rscript r_requirements.r - name: Render From ff3578a95b375a5e645efba7fea3d9d338be3fa5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 21 Feb 2024 08:22:36 -0800 Subject: [PATCH 16/17] setup python reqs and finish section on workflow --- chapters/workflow.qmd | 88 ++++++++++++++++++++++++++++++++++++++++--- requirements.txt | 4 +- 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 85eaa6b..2c8040d 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -2,20 +2,96 @@ engine: knitr --- +# Interchange data between Python and R + +In this section, we will demonstrate a workflow that stores genomic data in language agnostic formats to provide easy access to datasets and analysis results across multiple programming frameworks such as R and Python. This functionality is provided by the [ArtifactDB](https://github.com/artifactdb) framework. + +To get started, we will download the zilionis lung dataset from the [scRNAseq](https://bioconductor.org/packages/release/data/experiment/html/scRNAseq.html) package. Then, we will store this dataset in language agnostic format using the [alabaster suite](https://github.com/ArtifactDB/alabaster.base) of R packages. + ```{r} library(scRNAseq) +library(alabaster) sce <- ZilionisLungData() - -library(alabaster) -# dir_path <- paste(getwd(), "datasets", sep="/") saveObject(sce, path=paste(getwd(), "zilinoislung", sep="/")) ``` +:::{.callout-note} +You can also save this dataset as an RDS object and access it in Python. Check out the [interop with R](./interop.qmd) section for more details. +::: + +We can now load this dataset in Python using the [dolomite suite](https://github.com/ArtifactDB/dolomite-base) of Python packages. Both dolomite and alabaster are part of the ArtifactDB ecosystem to read artifacts stored in language agnostic formats. ```{python} from dolomite_base import read_object -obj = read_object("./zilinoislung") -print(obj) -``` \ No newline at end of file +data = read_object("./zilinoislung") +print(data) +``` + +To illustrate this workflow, we will use the [CellTypist](https://github.com/Teichlab/celltypist) model to annotate cell types for this dataset. CellTypist works on an `AnnData` representation. + +```{python} +adata = data.to_anndata() +``` + +Before we annotate, lets download the human lung atlas model from celltypist. + +```{python} +import celltypist +from celltypist import models + +models.download_models() +model_name = "Human_Lung_Atlas.pkl" +model = models.Model.load(model = model_namel) +print(model) +``` + +Now lets annotate our dataset + +```{python} +predictions = celltypist.annotate(adata, model = model_name, majority_voting = True) +print(predictions.predicted_labels) +``` + +:::{.callout-note} +The celltypist workflow is based on the tutorial described [here](https://colab.research.google.com/github/Teichlab/celltypist/blob/main/docs/notebook/celltypist_tutorial.ipynb#scrollTo=postal-chicken). +::: + +Now lets get the `AnnData` object with the predicted labels embedded into the `obs` dataframe. + +```{python} +adata = predictions.to_adata() +``` + +We can now reverse the workflow and save this object into an Artifactdb format from Python. But first the object needs to be converted to a `SingleCellExperiment` class. + +```{python} +from singlecellexperiment import SingleCellExperiment + +sce = SingleCellExperiment.from_anndata(adata) +print(sce) +``` + +We now use the dolomite package to save it into language agnostic format. + +```{python} +import dolomite_base + +dolomite_base.save_object(df, "./zilinoislung_with_celltypist") +``` + +Finally read the object back in R. + +```{r} +sce_with_celltypist = readObject(path=paste(getwd(), "zilinoislung_with_celltypist", sep="/")) +sce_with_celltypist +``` + +and that's it. Using these two generics read: `readObject`(R), `read_object`(Python), and save: `saveObject`(R), `save_object`(Python), you can save most Bioconductor objects into language agnostic formats. + +---- + +## Further reading + +- ArtifactDB GitHub organization - https://github.com/ArtifactDB. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1896194..d823c5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,6 @@ mudata delayedarray[dask] joblib dolomite -hdf5array \ No newline at end of file +hdf5array +celltypist +rpy2 \ No newline at end of file From 46b82856d147d5b2d8fd68e259ce4a8b4c2cc759 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 21 Feb 2024 08:29:05 -0800 Subject: [PATCH 17/17] edits to the text --- chapters/workflow.qmd | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/chapters/workflow.qmd b/chapters/workflow.qmd index 2c8040d..a9dae4d 100644 --- a/chapters/workflow.qmd +++ b/chapters/workflow.qmd @@ -4,9 +4,9 @@ engine: knitr # Interchange data between Python and R -In this section, we will demonstrate a workflow that stores genomic data in language agnostic formats to provide easy access to datasets and analysis results across multiple programming frameworks such as R and Python. This functionality is provided by the [ArtifactDB](https://github.com/artifactdb) framework. +In this section, we will illustrate a workflow that utilizes language-agnostic representations for storing genomic data, facilitating seamless access to datasets and analysis results across multiple programming frameworks such as R and Python. The [ArtifactDB](https://github.com/artifactdb) framework supports this functionality. -To get started, we will download the zilionis lung dataset from the [scRNAseq](https://bioconductor.org/packages/release/data/experiment/html/scRNAseq.html) package. Then, we will store this dataset in language agnostic format using the [alabaster suite](https://github.com/ArtifactDB/alabaster.base) of R packages. +To begin, we will download the "zilionis lung" dataset from the [scRNAseq](https://bioconductor.org/packages/release/data/experiment/html/scRNAseq.html) package. Subsequently, we will store this dataset in a language-agnostic format using the [alabaster suite](https://github.com/ArtifactDB/alabaster.base) of R packages. ```{r} library(scRNAseq) @@ -17,10 +17,10 @@ saveObject(sce, path=paste(getwd(), "zilinoislung", sep="/")) ``` :::{.callout-note} -You can also save this dataset as an RDS object and access it in Python. Check out the [interop with R](./interop.qmd) section for more details. +Additionally, you can save this dataset as an RDS object for access in Python. Refer t [interop with R](./interop.qmd) section for more details. ::: -We can now load this dataset in Python using the [dolomite suite](https://github.com/ArtifactDB/dolomite-base) of Python packages. Both dolomite and alabaster are part of the ArtifactDB ecosystem to read artifacts stored in language agnostic formats. +We can now load this dataset in Python using the [dolomite suite](https://github.com/ArtifactDB/dolomite-base) of Python packages. Both dolomite and alabaster are integral parts of the ArtifactDB ecosystem designed to read artifacts stored in language-agnostic formats. ```{python} from dolomite_base import read_object @@ -29,13 +29,13 @@ data = read_object("./zilinoislung") print(data) ``` -To illustrate this workflow, we will use the [CellTypist](https://github.com/Teichlab/celltypist) model to annotate cell types for this dataset. CellTypist works on an `AnnData` representation. +To demonstrate this workflow, we will employ the [CellTypist](https://github.com/Teichlab/celltypist) model to annotate cell types for this dataset. CellTypist operates on an AnnData representation. ```{python} adata = data.to_anndata() ``` -Before we annotate, lets download the human lung atlas model from celltypist. +Before annotation, let's download the "human lung atlas" model from celltypist. ```{python} import celltypist @@ -47,7 +47,7 @@ model = models.Model.load(model = model_namel) print(model) ``` -Now lets annotate our dataset +Now, let's annotate our dataset. ```{python} predictions = celltypist.annotate(adata, model = model_name, majority_voting = True) @@ -58,13 +58,13 @@ print(predictions.predicted_labels) The celltypist workflow is based on the tutorial described [here](https://colab.research.google.com/github/Teichlab/celltypist/blob/main/docs/notebook/celltypist_tutorial.ipynb#scrollTo=postal-chicken). ::: -Now lets get the `AnnData` object with the predicted labels embedded into the `obs` dataframe. +Next, let's retrieve the `AnnData` object with the predicted labels embedded into the `obs` dataframe. ```{python} adata = predictions.to_adata() ``` -We can now reverse the workflow and save this object into an Artifactdb format from Python. But first the object needs to be converted to a `SingleCellExperiment` class. +We can now reverse the workflow and save this object into an Artifactdb format from Python. However, the object needs to be converted to a `SingleCellExperiment` class first. Read more about our experiment representations [here](./experiments/singlecell_expt.qmd). ```{python} from singlecellexperiment import SingleCellExperiment @@ -73,22 +73,20 @@ sce = SingleCellExperiment.from_anndata(adata) print(sce) ``` -We now use the dolomite package to save it into language agnostic format. - +We use the dolomite package to save it into a language-agnostic format. ```{python} import dolomite_base dolomite_base.save_object(df, "./zilinoislung_with_celltypist") ``` -Finally read the object back in R. - +Finally, read the object back in R. ```{r} sce_with_celltypist = readObject(path=paste(getwd(), "zilinoislung_with_celltypist", sep="/")) sce_with_celltypist ``` -and that's it. Using these two generics read: `readObject`(R), `read_object`(Python), and save: `saveObject`(R), `save_object`(Python), you can save most Bioconductor objects into language agnostic formats. +And that concludes the workflow. Leveraging the generic **read** functions `readObject` (R) and `read_object` (Python), along with the **save** functions `saveObject` (R) and `save_object` (Python), you can seamlessly store most Bioconductor objects in language-agnostic formats. ----