forked from tgen/phoenix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.jst
executable file
·167 lines (137 loc) · 6.56 KB
/
main.jst
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Phoenix Human GRCh38 pipeline
#
# The config variables includes an array of dataFiles. Here we
# need to group the data files into read groups, and then group
# read_groups into samples. This will make it easier to add tasks
# later in the pipeline. The resulting samples dict will look
# like this:
#
# sampleMergeKey/sampleName:
# glPrep: RNA
# glType: RNA
# ...
# read_groups:
# C140KACXX_7_TAGCTT:
# glPrep: RNA
# glType: RNA
# ...
# data_files:
# -
# fastqPath: .../MMRF_1157_1_BM_CD138pos_T2_TSMRU_K03096_C140KACXX_TAGCTT_L007_R1_001.fastq.gz
# fastqCode: R1
# glPrep: RNA
# glType: RNA
# ...
# -
# fastqPath: .../MMRF_1157_1_BM_CD138pos_T2_TSMRU_K03096_C140KACXX_TAGCTT_L007_R2_001.fastq.gz
# fastqCode: R2
# glPrep: RNA
# glType: RNA
# ...
{# Setting debug to True retains many of the intermediate files #}
{% if debug is not defined %}
{% set debug = False %}
{% endif %}
{# Tasks contains switches for toggling many of the features #}
{% if tasks is not defined %}
{% set tasks = {} %}
{% endif %}
{# submissionSource toggles specific tasks based on how they were submitted #}
{% if submissionSource is not defined %}
{% set submissionSource = "unknown" %}
{% endif %}
{% from 'utilities/copy_fastq.jst' import copy_fastq with context %}
{% from 'utilities/finalize.jst' import finalize with context %}
{% from 'modules/dna_alignment/main.jst' import dna_alignment with context %}
{% from 'modules/qc/bam_qc_snpsniffer.jst' import snpsniffer_summary with context %}
{% from 'modules/constitutional/main.jst' import constitutional_variant_calling with context %}
{% from 'modules/somatic/main.jst' import somatic_variant_calling with context %}
{% from 'modules/rna/main.jst' import rna_quant with context %}
{% from 'modules/rna/main.jst' import rna_fusion_detection with context %}
{% from 'modules/single_cell/main.jst' import single_cell_rna with context %}
{% from 'modules/CHIP/main.jst' import chip_workflow with context %}
{% from 'modules/tumor_only/main.jst' import tumor_only_variant_calling with context %}
{% set samples = {} %}
{% set fastq_validation = [] %}
{% for file in dataFiles %}
{% set bn = file.fastqPath | basename %}
{% if 'sampleMergeKey' in file %}
{% set name = file.sampleMergeKey %}
{% else %}
{% set name = file.sampleName %}
{% endif %}
{% do file.update({'name': name}) %}
{% do file.update({'basename': bn}) %}
{% do file.update({'gltype': file.glType.lower()}) %}
{% do file.update({'glprep': file.glPrep.lower()}) %}
{% do fastq_validation.append(bn) %}
{% if 'subGroup' not in file %}
{% do file.update({'subGroup': 'constitutional'}) %}
{% endif %}
{% if name not in samples %}
{% do samples.update({name: {}}) %}
{% do samples[name].update(file) %}
{% do samples[name].update({"name": name}) %}
{% endif %}
{% endfor %}
{% for rgid, data_files in dataFiles | groupby('rgid') %}
{% set rg = data_files|first %}
{% if 'sampleMergeKey' in rg %}
{% set name = rg.sampleMergeKey %}
{% else %}
{% set name = rg.sampleName %}
{% endif %}
{% if not 'read_groups' in samples[name] %}
{% do samples[name].update({'read_groups': {}}) %}
{% endif %}
{% do samples[name]['read_groups'].update({rgid: {}}) %}
{% do samples[name]['read_groups'][rgid].update(rg) %}
{% do samples[name]['read_groups'][rgid].update({'data_files': data_files}) %}
{% endfor %}
{# Setup calculated props for samples #}
{% for sample in samples.values() if sample.gltype == 'exome' %}
{% do sample.update({'library_code': sample.assayCode[0:2]|upper}) %}
{% do sample.update({'capture_kit_code': sample.assayCode[2:5]|upper}) %}
{% set targets_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.targets.interval_list{% endset %}
{% set no_header_targets_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.no.header.targets.interval_list{% endset %}
{% set baits_interval_list %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.baits.interval_list{% endset %}
{% set extended_bed %}{{ constants.phoenix.capture_kit_path }}/{{ sample.capture_kit_code }}/{{ sample.capture_kit_code }}_{{ constants.phoenix.genome_subversion_name }}_{{ constants.phoenix.gene_model_name }}.extended.bed{% endset %}
{% do sample.update({'capture_kit': {}}) %}
{% do sample['capture_kit'].update({'targets_interval_list': targets_interval_list}) %}
{% do sample['capture_kit'].update({'no_header_targets_interval_list': no_header_targets_interval_list}) %}
{% do sample['capture_kit'].update({'baits_interval_list': baits_interval_list}) %}
{% do sample['capture_kit'].update({'extended_bed': extended_bed}) %}
{% endfor %}
{% for sample in samples.values() if sample.gltype == 'rna' %}
{% set readOrientation=sample.readOrientation|default('inward')|lower %}
{% set rnaStrandType=sample.rnaStrandType|default('unstranded')|lower %}
{% set rnaStrandDirection=sample.rnaStrandDirection|default('notapplicable')|lower %}
{% set strandedness %}{{ readOrientation }}-{{ rnaStrandType }}-{{ rnaStrandDirection }}{% endset %}
{% do sample.update({'strandedness': strandedness}) %}
{% endfor %}
{% if debug %}
{{ log(samples|tojson(indent=4), level='CRITICAL') }}
{% endif %}
{# Unique fastq name validation #}
{% if dataFiles | length != fastq_validation | unique | list | length %}
{{ one_or_more_of_the_fastqs_have_the_same_name }}
{% endif %}
{# Copy fastqs to project dir #}
{% for fastq in dataFiles %}
{{- copy_fastq(fastq) }}
{% endfor %}
{# project level quality control #}
{% if samples|length > 1 %}
{{- snpsniffer_summary(samples) }}
{% endif %}
{# Finalize after running all other tasks #}
{{- finalize() }}
{# Start of module calls #}
{{- dna_alignment(samples) }}
{{- constitutional_variant_calling(samples) }}
{{- somatic_variant_calling(samples) }}
{{- rna_quant(samples) }}
{{- rna_fusion_detection(samples) }}
{{- single_cell_rna(dataFiles) }}
{{- chip_workflow(samples) }}
{{- tumor_only_variant_calling(samples) }}