-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.nf
164 lines (140 loc) · 6.19 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env nextflow
/*
========================================================================================
UCT-SRST2 P I P E L I N E
========================================================================================
MLST NEXTFLOW PIPELINE USING SRST2
----------------------------------------------------------------------------------------
*/
/**
Prints help when asked for
*/
def helpMessage() {
log.info"""
===================================
uct-srst2 ~ version ${params.version}
===================================
Usage:
The typical command for running the pipeline is as follows:
nextflow run uct-cbio/uct-yamp --reads '*_R{1,2}.fastq.gz' -profile uct_hex
Mandatory arguments:
--reads Path to input data (must be surrounded with quotes)
-profile Hardware config to use. uct_hex OR standard
--mlst_db Specify fasta-formatted mlst database for srst2
--mlst_definitions Definitions for MLST scheme (required if mlst_db
supplied and you want to calculate STs)
Other srst2 options:
--mlst_delimiter Character(s) separating gene name from allele number
in MLST database (default "-", as in arcc-1)
--mlst_max_mismatch Maximum number of mismatches per read for MLST allele calling (default 10)
--gene_db Fasta-fromatted gene databases for resistance OR virulence factor analysis (optional)
--min_gene_cov Minimum %coverage cutoff for gene reporting (default 90)
--max_gene_divergence Maximum %divergence cutoff for gene reporting (default 10)
General options:
--outdir The output directory where the results will be saved
--email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
-name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
Help:
--help Will print out summary above when executing nextflow run uct-cbio/uct-yamp --help
""".stripIndent()
}
/*
* SET UP CONFIGURATION VARIABLES
*/
// Configurable variables
params.name = false
//params.project = false
params.email = false
params.plaintext_email = false
// Show help emssage
params.help = false
if (params.help){
helpMessage()
exit 0
}
//Validate inputs
// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
custom_runName = workflow.runName
}
// Validate user-inputs
if( params.mlst_db ) {
mlst_db = file(params.mlst_db)
if( !mlst_db.exists() ) exit 1, "MLST DB file could not be found: ${params.mlst_db}"
}
if( params.mlst_definitions ) {
mlst_definitions= file(params.mlst_definitions)
if( !mlst_definitions.exists() ) exit 1, "MLST definitions file could not be found: ${params.mlst_definitions}"
}
if( params.gene_db ) {
gene_db = file(params.gene_db)
if( !gene_db.exists() ) exit 1, "Gene DB file could not be found: ${params.gene_db}"
}
// Returns a tuple of read pairs in the form
// [sample_id, forward.fq, reverse.fq] where
// the dataset_id is the shared prefix from
// the two paired FASTQ files.
Channel
.fromFilePairs( params.reads )
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!" }
.into { ReadPairsToSrst2 }
// Header log info
log.info "==================================="
log.info " uct-srst2 ~ version ${params.version}"
log.info "==================================="
def summary = [:]
summary['Run Name'] = custom_runName ?: workflow.runName
summary['Reads'] = params.reads
if(params.mlst_db) summary['MLST DB'] = params.mlst_db
if(params.gene_db) summary['Gene DB'] = params.gene_db
summary['Min gene coverage'] = params.min_gene_cov
summary['Max gene divergence'] = params.max_gene_divergence
summary['OS'] = System.getProperty("os.name")
summary['OS.arch'] = System.getProperty("os.arch")
summary['OS.version'] = System.getProperty("os.version")
summary['javaversion'] = System.getProperty("java.version") //Java Runtime Environment version
summary['javaVMname'] = System.getProperty("java.vm.name") //Java Virtual Machine implementation name
summary['javaVMVersion'] = System.getProperty("java.vm.version") //Java Virtual Machine implementation version
//Gets starting time
sysdate = new java.util.Date()
summary['User'] = System.getProperty("user.name") //User's account name
summary['Max Memory'] = params.max_memory
summary['Max CPUs'] = params.max_cpus
summary['Max Time'] = params.max_time
summary['Output dir'] = params.outdir
summary['Working dir'] = workflow.workDir
summary['Container'] = workflow.container
if(workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Current home'] = "$HOME"
summary['Current user'] = "$USER"
summary['Current path'] = "$PWD"
summary['Script dir'] = workflow.projectDir
summary['Config Profile'] = workflow.profile
if(params.email) {
summary['E-mail Address'] = params.email
}
log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
log.info "========================================="
/*
*
* Step 1: srst2 (run per sample)
*
*/
process srst2 {
tag { "srst2.${pairId}" }
publishDir "${params.outdir}/srst2", mode: "copy"
input:
set pairId, file(reads) from ReadPairsToSrst2
output:
file("${pairId}_srst2*")
script:
geneDB = params.gene_db ? "--gene_db $gene_db" : ''
mlstDB = params.mlst_db ? "--mlst_db $mlst_db" : ''
mlstdef = params.mlst_db ? "--mlst_definitions $mlst_definitions" : ''
mlstdelim = params.mlst_db ? "--mlst_delimiter $params.mlst_delimiter" : ''
"""
srst2 --input_pe $reads --output ${pairId}_srst2 --min_coverage $params.min_gene_cov --max_divergence $params.max_gene_divergence $mlstDB $mlstdef $mlstdelim $geneDB
"""
}