Skip to content

Commit

Permalink
Add clustering and spatial plot in integration report
Browse files Browse the repository at this point in the history
  • Loading branch information
cavenel committed Dec 16, 2024
1 parent 138ba83 commit c9f3dec
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 17 deletions.
105 changes: 96 additions & 9 deletions bin/integration.qmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "Data Pre-processing and Integration with Quality Controls"
title: "Data integration and batch correction"
format:
nf-core-html: default
jupyter: python3
Expand All @@ -17,20 +17,23 @@ before merging them into a single integrated dataset for downstream analysis.
# Import necessary packages
import scanpy as sc
import anndata as an
import math
import matplotlib.pyplot as plt
import seaborn as sns
import scanorama
import spatialdata
import os
import glob
import scipy.sparse as sp
from matplotlib.legend import Legend
```

```{python}
# Parameter cell
#| tags: [parameters]
#| echo: false
input_sdata = "merged_sdata.zarr" # Input: SpatialData file
cluster_resolution = 1 # Resolution for Leiden clustering
n_hvgs = 2000 # Parameter: Number of HVGs to use for analyses
artifact_dir = "artifacts" # Parameter: artifact directory
output_sdata = "integrated_sdata.zarr" # Output: Integrated SpatialData file
Expand Down Expand Up @@ -99,24 +102,25 @@ adata_integrated

# Dimensionality Reduction

Finally, we perform dimensionality reduction to visualize the data.
We perform dimensionality reduction to visualize the data.
We compare the UMAP plots before and after batch correction.

```{python}
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
fig, axes = plt.subplots(1, 2, figsize=(12, 6), sharex=True, sharey=True)
adata_integrated_copy = adata_integrated.copy()
# Compute neighbors and UMAP before integration
sc.pp.neighbors(adata_integrated)
sc.tl.umap(adata_integrated)
sc.pp.neighbors(adata_integrated_copy)
sc.tl.umap(adata_integrated_copy)
# Plot UMAP before integration
sc.pl.umap(
adata_integrated,
adata_integrated_copy,
color=["library_id"],
palette=sc.pl.palettes.default_20,
ax=axes[0],
show=False
show=False,
legend_loc=None, # Disable the individual legend
)
axes[0].set_title("Before integration")
Expand All @@ -130,10 +134,93 @@ sc.pl.umap(
color=["library_id"],
palette=sc.pl.palettes.default_20,
ax=axes[1],
show=False
show=False,
legend_loc=None, # Disable the individual legend
)
axes[1].set_title("After integration")
# Add a shared legend at the bottom
# Extract unique categories and colors from "library_id"
library_ids = adata_integrated.obs["library_id"].cat.categories
colors = sc.pl.palettes.default_20[:len(library_ids)]
# Create custom legend elements
legend_elements = [
plt.Line2D([0], [0], marker="o", color=color, linestyle="", markersize=8, label=lib_id)
for lib_id, color in zip(library_ids, colors)
]
# Add the legend to the figure
fig.legend(
handles=legend_elements,
loc="lower center",
ncol=len(library_ids) // 2, # Adjust columns based on the number of categories
bbox_to_anchor=(0.5, -0.05), # Position the legend outside the plot
frameon=False,
)
# Adjust layout and display the figure
plt.tight_layout(rect=[0, 0.1, 1, 1]) # Leave space for the legend
plt.show()
```

# Clustering

We perform clustering on the integrated dataset. Principal
Component Analysis (PCA) is applied to reduce dimensionality. The Leiden
algorithm is employed for clustering with a given resolution.

```{python}
sc.pp.pca(adata_integrated)
sc.pp.neighbors(adata_integrated)
sc.tl.leiden(adata_integrated, key_added="clusters", resolution=cluster_resolution)
```

We compare the clusters of the integrated samples before and after batch correction.

```{python}
# Plot UMAP after integration
sc.pl.umap(
adata_integrated,
color=["clusters"],
palette=sc.pl.palettes.godsnot_102
)
```

# Spatial Visualization

Finally, we visualize the spatial distribution of clusters on the tissue image of all samples.

```{python}
# We have N samples, we will create a 2xN/2 grid of plots
n_samples = len(sdata.tables)
n_cols = math.ceil(n_samples / 2)
n_rows = 2
# Create a figure with subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
# Flatten the axes for easier indexing
axes = axes.flatten()
# Plot spatial distribution of clusters for each sample
for i, (table_name, adata) in enumerate(sdata.tables.items()):
sample_name = table_name.replace("_table", "")
sc.pl.spatial(
adata,
img_key="hires",
library_id=f"{sample_name}_hires_image",
color="clusters",
size=1.25,
ax=axes[i],
show=False,
)
axes[i].set_title(f"Sample: {sample_name}")
# Hide any remaining axes
for j in range(i + 1, len(axes)):
axes[j].axis("off")
# Adjust layout and display the figure
plt.tight_layout()
plt.show()
Expand Down
7 changes: 4 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ params {
n_top_svgs = 14

// Data aggregation
merge_sdata = false
integrate_sdata = false
integration_n_hvgs = 2000
merge_sdata = false
integrate_sdata = false
integration_cluster_resolution = 1.0
integration_n_hvgs = 2000

// MultiQC options
multiqc_config = null
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,13 @@
"description": "Integrate per-sample SpatialData objects into one and output a integration report.",
"fa_icon": "fas fa-arrows-to-dot"
},
"integration_cluster_resolution": {
"type": "number",
"default": 1,
"description": "The resolution for the clustering of the spots after integration of samples.",
"help_text": "The resolution controls the coarseness of the clustering, where a higher resolution leads to more clusters.",
"fa_icon": "fas fa-circle-nodes"
},
"integration_n_hvgs": {
"type": "integer",
"default": 2000,
Expand Down
11 changes: 6 additions & 5 deletions subworkflows/local/aggregation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ workflow AGGREGATION {
ch_integrated_adata = Channel.empty()
if (params.integrate_sdata) {
integration_params = [
input_sdata: "merged_sdata.zarr",
n_hvgs: params.integration_n_hvgs,
artifact_dir: "artifacts",
output_adata: "integrated_adata.h5ad",
output_sdata: "integrated_sdata.zarr"
input_sdata: "merged_sdata.zarr",
cluster_resolution: params.integration_cluster_resolution,
n_hvgs: params.integration_n_hvgs,
artifact_dir: "artifacts",
output_adata: "integrated_adata.h5ad",
output_sdata: "integrated_sdata.zarr"
]
INTEGRATE_SDATA (
[[id:"integration"], integration_notebook],
Expand Down

0 comments on commit c9f3dec

Please sign in to comment.