-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsigTools_runthrough.R
executable file
·166 lines (126 loc) · 6.69 KB
/
sigTools_runthrough.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#! /usr/bin/env Rscript
library(optparse)
# Calling call_hrdetect.R
basedir <- paste(Sys.getenv(c("BASE_DIR")), sep='/')
source(paste0(basedir, "/call_hrdetect.R"))
option_list = list(
make_option(c("-s", "--sampleName"), type="character", default=NULL, help="sample name", metavar="character"),
make_option(c("-r", "--SVrefSigs"), type="character", default=NULL, help="structural variant reference signatures", metavar="character"),
make_option(c("-R", "--SNVrefSigs"), type="character", default=NULL, help="single nucleotide polymorphism reference signatures", metavar="character"),
make_option(c("-S", "--snvFile"), type="character", default=NULL, help="SNV vcf file", metavar="character"),
make_option(c("-I", "--indelFile"), type="character", default=NULL, help="indel vcf file", metavar="character"),
make_option(c("-V", "--SVFile"), type="character", default=NULL, help="Structural variant file", metavar="character"),
make_option(c("-L", "--LOHFile"), type="character", default=NULL, help="LOH file", metavar="character"),
make_option(c("-b", "--bootstraps"), type="numeric", default=2500, help="number of bootstraps for signature detection", metavar="numeric"),
make_option(c("-g", "--genomeVersion"), type="character", default="hg38", help="genome version", metavar="character"),
make_option(c("-i", "--indelCutoff"), type="numeric", default=50, help="minimum number of indels for analysis", metavar="numeric"),
make_option(c("-c", "--snvCutoff"), type="numeric", default=50, help="minimum number of snvs for analysis", metavar="numeric"),
make_option(c("-v", "--svCutoff"), type="numeric", default=1, help="minimum number of SVs for analysis", metavar="numeric")
)
opt_parser <- OptionParser(option_list=option_list, add_help_option=FALSE)
opt <- parse_args(opt_parser)
boots <- opt$bootstraps
genomeVersion <- opt$genomeVersion
indelCutoff <- opt$indelCutoff
snvCutoff <- opt$snvCutoff
svCutoff <- opt$svCutoff
sample_name <- opt$sampleName
SVrefSigs <- opt$SVrefSigs
SNVrefSigs <- opt$SNVrefSigs
snv_vcf_location <- opt$snvFile
indel_vcf_location <- opt$indelFile
SV_vcf_location <- opt$SVFile
LOH_seg_file <- opt$LOHFile
## import signatures
SV_sigs <- read.table(SVrefSigs, sep = "\t", header = TRUE)
SNV_sigs <- read.table(SNVrefSigs, sep = "\t", header = TRUE)
#### PREPROCESS ALL FOUR DATA TYPES ####
missing_data = FALSE
#### pre-process SEGMENT data ####
cat('pre-processing LOH data\n')
seg.data <- read.table(LOH_seg_file, sep="\t", header=TRUE)
#### pre-process In/Del data ####
cat('pre-processing in/del data\n')
indel_vcf <- try(read.table(indel_vcf_location, sep = "\t", header = TRUE))
if("try-error" %in% class(indel_vcf)){
ID.catalog.JSON <- jsonlite::toJSON(list("QC"="PASS","count"=0,"Results"="NA"),pretty=TRUE,auto_unbox=TRUE)
indels_class = NULL
}else{
indels_class <- vcfToIndelsClassification(indel_vcf_location, sample_name, genome.v = genomeVersion)
ID.catalog.JSON <- jsonlite::toJSON(list("QC"="PASS","count"=nrow(indel_vcf),"Results"=indels_class$count_proportion),pretty=TRUE,auto_unbox=TRUE)
if( indels_class$count_proportion$all.del < indelCutoff ){indels_class = NULL}
}
write(ID.catalog.JSON, file = paste0(sample_name,".catalog.ID.json"))
#### pre-process Structural Variant data ####
cat('pre-processing SV data\n')
SV_vcf <- try(read.table(SV_vcf_location, sep = "\t", header = TRUE))
if("try-error" %in% class(SV_vcf)){
SV.JSON <- jsonlite::toJSON(list("QC"="PASS","count"=0,"catalog"="NA","exposures"="NA"),pretty=TRUE,auto_unbox=TRUE)
SV_catalogue = NULL
}else{
SV_catalogue <- SV_vcf_cataloger(SV_vcf_location, sample_name)
SV_ls <- summarize_SVs(SV_catalogue, SV_sigs, sample_name, nboot = 2000)
SV_ls.preJSON <- list("QC"="PASS",
"count"= nrow(SV_vcf),
"catalog"=as.data.frame(t(SV_ls$SV_sigtools_catalog[1])),
"exposures"=SV_ls$SV_sigtools_exposures)
SV.JSON <- jsonlite::toJSON(SV_ls.preJSON,pretty=TRUE,auto_unbox=TRUE)
if( nrow(SV_vcf) < svCutoff ){SV_catalogue = NULL}
}
write(SV.JSON, file = paste0(sample_name,".exposures.SV.json"))
#### pre-process SNV data ####
cat('pre-processing SNV data\n')
snv_df <- try(read.table(snv_vcf_location, sep = "\t", header = FALSE))
if("try-error" %in% class(snv_df)){
SBS.JSON <- jsonlite::toJSON(list("QC"="PASS",
"count"=0,
"catalog"="NA",
"exposures"="NA")
,pretty=TRUE,auto_unbox=TRUE)
snv_df = NULL
}else{
names(snv_df)[c(1,2,4,5)] <- c("Chromosome", "Start_Position", "Reference_Allele", "Allele")
snv_df$Tumor_Sample_Barcode <- sample_name
snv_catalogue <- mut.to.sigs.input(mut.ref=snv_df,
sample.id="Tumor_Sample_Barcode",
chr="Chromosome",
pos="Start_Position",
ref="Reference_Allele",
alt="Allele",
bsg=BSgenome.Hsapiens.UCSC.hg38)
SNV_ls <- summarize_SNVs_dcSigs(snv_df, SNV_sigs, sample_name, subsample_proportion=1)
SNV_ls.preJSON <- list("QC"="PASS",
"count"= nrow(snv_df),
"catalog"=snv_catalogue,
"exposures"=SNV_ls
)
SBS.JSON <- jsonlite::toJSON(SNV_ls.preJSON,pretty=TRUE,auto_unbox=TRUE)
if( nrow(snv_df) < snvCutoff ){snv_df = NULL}
}
write(SBS.JSON, file = paste0(sample_name,".exposures.SBS.json"))
#### run HRDetect ####
if(missing_data == TRUE) {
cat("some data missing, no HRD call!\n")
hrdetect.json <- jsonlite::toJSON(
list("sample_name"=sample_name,
"QC"="FAIL",
"hrdetect_call"="NA"
),pretty=TRUE,auto_unbox=TRUE)
} else {
quantiles <-
call_hrdetect(boots ,
sample_name ,
SV_sigs ,
SNV_sigs ,
snv_df ,
indels_class ,
SV_catalogue ,
seg.data
)
hrdetect_call <- list("sample_name"=sample_name,
"QC"="PASS",
"hrdetect_call"=quantiles
)
hrdetect.json <- jsonlite::toJSON(hrdetect_call,pretty=TRUE,auto_unbox=TRUE)
}
write(hrdetect.json, file = paste0(sample_name,".signatures.json"))