Skip to content

Commit

Permalink
Added analysis scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Rade committed Sep 3, 2023
1 parent 707d86f commit a46d1be
Show file tree
Hide file tree
Showing 84 changed files with 68,360 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
15 changes: 15 additions & 0 deletions Grieb-et-al-2023.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

StripTrailingWhitespace: Yes
129 changes: 129 additions & 0 deletions code/01_cellranger.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
library(dplyr)
library(yaml)

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# scRNA-Seq: CellRanger count
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
nodes = 1
ntasks = 1
ttime = "44:00:00"
mail = "FAIL"
mem = 190000
cpu = 25

manifest = yaml.load_file("manifest.yaml")

# FASTQ files: NCBI GEO GSE234261
fastqs = manifest$grieb$fastq

# The FASTA sequences can be found in the supplementary material of the publication
ref.gex.cilta = paste0(manifest$base$workdata, "references/GRCh38-CiltaCel/")
ref.gex.ide = paste0(manifest$base$workdata, "references/GRCh38-IdeCel/")

# VDJ reference: https://support.10xgenomics.com/single-cell-vdj/software/downloads/latest
ref.vdj = paste0(manifest$base$workdata, "references/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.1.0/")

# ADT reference can be found in the supplementary material of the publication or in "data/" of the repo
ref.features = "data/feature_reference.csv"
out.dir = paste0(manifest$base$workdata, "cohorts/grieb/cellranger/")

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# sbatch
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
write_subscript = function(path, job_id, csv.out){
file.create(path)

write("#!/bin/bash", file = path, append = TRUE)
write("", file = path, append = TRUE)

write(paste("#SBATCH -J", job_id), file = path, append = TRUE)
write(paste("#SBATCH --nodes", nodes), file = path, append = TRUE)
write(paste("#SBATCH --ntasks", ntasks), file = path, append = TRUE)
write(paste("#SBATCH --time", ttime), file = path, append = TRUE)
write(paste("#SBATCH --cpus-per-task", cpu), file = path, append = TRUE)
write(paste("#SBATCH --mem", mem), file = path, append = TRUE)
write(paste("#SBATCH --exclude=ribnode[009,020,006,007]"), file = path, append = TRUE)
write(paste("#SBATCH -e", paste0(job_id, ".e")), file = path, append = TRUE)
write(paste("#SBATCH -o", paste0(job_id, ".o")), file = path, append = TRUE)
write("#SBATCH --mail-type=END,FAIL", file = path, append = TRUE)

write("", file = path, append = TRUE)

write(
paste0(
"cellranger multi",
" --id ", job_id,
" --csv ", csv.out,
" --localcores=", cpu,
" --localmem=", mem/1000
), file = path, append = TRUE)

write("", file = path, append = TRUE)
}

write_multi_csv = function(sample.paths, csv.out){

rna = sample.paths[sample.paths$SOURCE == "Gene Expression", , drop = F]
adt = sample.paths[sample.paths$SOURCE == "Antibody Capture", , drop = F]
tcr = sample.paths[sample.paths$SOURCE == "VDJ-T", , drop = F]
bcr = sample.paths[sample.paths$SOURCE == "VDJ-B", , drop = F]

file.create(csv.out)

write(paste0("[gene-expression]"), file = csv.out, append = TRUE)
write(paste0("reference,", rna$GEX_REF), file = csv.out, append = TRUE)
write(paste0("[vdj]"), file = csv.out, append = TRUE)
write(paste0("reference,", ref.vdj), file = csv.out, append = TRUE)
write(paste0("[feature]"), file = csv.out, append = TRUE)
write(paste0("reference,", ref.features), file = csv.out, append = TRUE)
write(paste0("[libraries]"), file = csv.out, append = TRUE)
write(paste0("fastq_id,fastqs,feature_types"), file = csv.out, append = TRUE)
write(paste0(rna$SAMPLE, ",", rna$PATH, ",", rna$SOURCE), file = csv.out, append = TRUE)
write(paste0(adt$SAMPLE, ",", adt$PATH, ",", adt$SOURCE), file = csv.out, append = TRUE)
write(paste0(tcr$SAMPLE, ",", tcr$PATH, ",", tcr$SOURCE), file = csv.out, append = TRUE)
write(paste0(bcr$SAMPLE, ",", bcr$PATH, ",", bcr$SOURCE), file = csv.out, append = TRUE)

}

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# sample paths
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
fastq.files = list.files(path = fastqs, full.names = T, recursive = T)
df = data.frame(
SAMPLE = basename(fastq.files),
PATH = dirname(fastq.files)
)
df$SAMPLE = gsub("_S.+", "", df$SAMPLE)
df = df %>% dplyr::mutate(
SOURCE = dplyr::case_when(
grepl("_R$", SAMPLE) ~ "Gene Expression",
grepl("_A$", SAMPLE) ~ "Antibody Capture",
grepl("_T$", SAMPLE) ~ "VDJ-T",
grepl("_B$", SAMPLE) ~ "VDJ-B"
)
)
df$SAMPLE_SHORT = gsub("_R|_A|_T|_B", "", df$SAMPLE)

# cilta-cel samples
cilta.samples = c("MXMERZ002A_03", "MXMERZ002A_19", "MXMERZ002A_20", "MXMERZ002A_04", "MXMERZ002A_08")
df$GEX_REF = gsub("_R|_A|_T|_B", "", df$SAMPLE_SHORT) %in% cilta.samples
df$GEX_REF = ifelse(df$GEX_REF == T, ref.gex.cilta, ref.gex.ide)

df = df[!duplicated(df$SAMPLE), ]

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# submit to Slurm
# There is a link with this script under this path: "out.dir".
# The script was executed there.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
for (sample in unique(df$SAMPLE_SHORT)) {
job_id = paste0("multi_", sample)
print(job_id)
csv.out = paste0(out.dir, job_id, ".csv")
sample.paths = subset(df, SAMPLE_SHORT == sample)
write_multi_csv(sample.paths, csv.out)
write_subscript(paste0(out.dir, job_id, ".slurm"), job_id, csv.out)

cmd = paste0("sbatch ", paste0(out.dir, job_id, ".slurm"))
system(cmd)
}
99 changes: 99 additions & 0 deletions code/02_cellranger_to_seurat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
print("# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
print("Grieb")
print("# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Libraries
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
.cran_packages = c("Seurat", "yaml", "dplyr", "doParallel", "parallel", "data.table")
.bioc_packages = c("biomaRt", "scDblFinder", "SingleCellExperiment")

# Install CRAN packages (if not already installed)
.inst = .cran_packages %in% installed.packages()
if (any(!.inst)) {
install.packages(.cran_packages[!.inst], repos = "http://cran.rstudio.com/")
}

# Install bioconductor packages (if not already installed)
.inst <- .bioc_packages %in% installed.packages()
if (any(!.inst)) {
library(BiocManager)
BiocManager::install(.bioc_packages[!.inst], ask = T)
}

list.of.packages = c(.cran_packages, .bioc_packages)

## Loading library
for (pack in list.of.packages) {
suppressMessages(library(
pack,
quietly = TRUE,
verbose = FALSE,
character.only = TRUE
))
}

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Load Rawcounts and create a merged Seurat object
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
manifest = yaml.load_file("manifest.yaml")
work.path = manifest$grieb$data_dl
seurat.path = manifest$grieb$seurat
dir.create(seurat.path, recursive = T)

obj.path = paste0(paste0(seurat.path, "seurat_ori_pub.Rds"))
obj.sub.path = paste0(paste0(seurat.path, "seurat_sub_ori_pub.Rds"))

cellranger.dirs = list.dirs(path = paste0(work.path, "cellranger"), full.names = T, recursive = T)

fltrd.dirs = cellranger.dirs[grepl("sample_filtered_feature_bc_matrix", cellranger.dirs)]
tmp2 = gsub(paste0(work.path, "cellranger/"), "", fltrd.dirs)
tmp2 = gsub("/", "", gsub("out.*", "", tmp2))
tmp2 = gsub("multi_", "", tmp2)
names(fltrd.dirs) = tmp2
fltrd.dirs = fltrd.dirs[!grepl("^bck", names(fltrd.dirs))]

# i = names(fltrd.dirs)[1]
bpparam = BiocParallel::MulticoreParam(workers = 5)
seurat.l = BiocParallel::bplapply(names(fltrd.dirs), function(i) {

id = i
fltrd.counts = Read10X(data.dir = fltrd.dirs[names(fltrd.dirs) == id], gene.column = 2)

seu.obj = CreateSeuratObject(counts = fltrd.counts[[1]], project = id)
seu.obj[["ADT"]] = CreateAssayObject(counts = fltrd.counts[[2]])
seu.obj@meta.data$orig.ident = id
seu.obj@meta.data$orig.ident = factor(seu.obj@meta.data$orig.ident)

# scDblFinder
sce = scDblFinder(GetAssayData(seu.obj, slot="counts"))
stopifnot(identical(colnames(seu.obj), colnames(sce)))
seu.obj@meta.data$scDblFinder_score = sce$scDblFinder.score
seu.obj@meta.data$scDblFinder_class = sce$scDblFinder.class

seu.obj

}, BPPARAM = bpparam)

seurat = merge(seurat.l[[1]], y = seurat.l[2:length(seurat.l)], add.cell.ids = names(seurat.l), project = "Grieb")
seurat@meta.data$orig.ident = factor(seurat@meta.data$orig.ident)


# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Assay ADT
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
DefaultAssay(seurat) = "ADT"
adt.ftrs = gsub("-proteona", "", rownames(seurat))

rownames(seurat@assays$ADT@counts) = toupper(adt.ftrs)
seurat@assays$ADT@data = seurat@assays$ADT@counts

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Save
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
DefaultAssay(seurat) = "RNA"
Idents(seurat) = "orig.ident"

saveRDS(seurat, file = obj.path)
saveRDS(subset(x = seurat, downsample = 200), file = obj.sub.path)

Loading

0 comments on commit a46d1be

Please sign in to comment.