forked from jamesanthonygill3/Herring-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathph_oil_18_trim.txt
418 lines (323 loc) · 15.4 KB
/
ph_oil_18_trim.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
##what's been trimmed
plate 5 -> 4 lanes
/home/jagill/results/oil18/prjct1_plate5/rawdata
plate 4 -> 4 lanes
/home/jagill/results/oil18/prjct1_plate4/raw_data
plate 3 -> 4 lanes
/home/jagill/results/oil18/prjct1_plate3/trim_files
plate 2 -> 4 lanes
/home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files
##what needs trimmed
##all of plate 1
## trim plate 1
## trimmomatic job
# filepath to plate 1
/home/jagill/results/oil18/prjct1_plate1/rawdata
# make a logs file in output filepath
/home/jagill/results/oil18/prjct1_plate1/trim
mkdir logs
nano trim_prjct1plate1.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate1/rawdata
#SBATCH -t 24:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=4G
#SBATCH -J plate1_trim
module load trimmomatic/0.36
for R1 in /home/jagill/results/oil18/prjct1_plate1/rawdata/*R1*
do
R2=${R1//R1_001.fastq.gz/R2_001.fastq.gz}
R1paired=${R1//.fastq.gz/_paired.fastq.gz}
R1unpaired=${R1//.fastq.gz/_unpaired.fastq.gz}
R2paired=${R2//.fastq.gz/_paired.fastq.gz}
R2unpaired=${R2//.fastq.gz/_unpaired.fastq.gz}
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 $R1 $R2 $R1paired $R1unpaired $R2paired $R2unpaired ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate1/trim/logs/prjct1plate1.cmds
done
## trim job timed out after second to last sample. Now run trim last samples
18048FL-23-01-96_S96_L004_R1_001.fastq.gz
18048FL-23-01-96_S96_L004_R2_001.fastq.gz
module load trimmomatic/0.36
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 \
18048FL-23-01-96_S96_L004_R1_001.fastq.gz 18048FL-23-01-96_S96_L004_R2_001.fastq.gz \
18048FL-23-01-96_S96_L004_R1_001_paired.fastq.gz 18048FL-23-01-96_S96_L004_R1_001_unpaired.fastq.gz 18048FL-23-01-96_S96_L004_R2_001_paired.fastq.gz 18048FL-23-01-96_S96_L004_R2_001_unpaired.fastq.gz \
ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate1/trim/logs/prjct1plate1.cmds
----------------------------------
## move all trimmed paired files from
/home/jagill/results/oil18/prjct1_plate1/rawdata/
## to
/home/jagill/results/oil18/prjct1_plate1/trim/
mv -v *001_paired.fastq.gz /home/jagill/results/oil18/prjct1_plate1/trim/
----------------------------
## re-trim all of plate 5
nano trim_prjct1plate5.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate5/rawdata
#SBATCH -t 48:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=4G
#SBATCH -J plate1_trim
module load trimmomatic/0.36
for R1 in /home/jagill/results/oil18/prjct1_plate5/rawdata/*R1*
do
R2=${R1//R1_001.fastq.gz/R2_001.fastq.gz}
R1paired=${R1//.fastq.gz/_paired.fastq.gz}
R1unpaired=${R1//.fastq.gz/_unpaired.fastq.gz}
R2paired=${R2//.fastq.gz/_paired.fastq.gz}
R2unpaired=${R2//.fastq.gz/_unpaired.fastq.gz}
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 $R1 $R2 $R1paired $R1unpaired $R2paired $R2unpaired ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate5/trimfiles/logs/plate5trim.cmds
done
## move all paired files from /home/jagill/results/oil18/prjct1_plate5/rawdata
## to /home/jagill/results/oil18/prjct1_plate5/trimfiles
mv -v *paired* /home/jagill/results/oil18/prjct1_plate5/trimfiles/
-------------------------------
## build indexed ref transcriptome from pacbio transcriptome using salmon/1.3.0
## filepath to transcriptome
/home/jagill/CP_ref_transcriptome/isoseq_2020/polished.hq.fasta
## job title
pacherring4_pcbio_index.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/CP_ref_transcriptome/
#SBATCH -t 24:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
#SBATCH -J transcript_index
module load salmon/1.3.0
salmon index -t /home/jagill/CP_ref_transcriptome/isoseq_2020/polished.hq.fasta -i pacherring4_pcbio_idx
##output and filepath of trinscriptome index
/home/jagill/CP_ref_transcriptome/pacherring4_pcbio_idx
---------------------------
## start with plate 1
## map the selected samples using salmon quant command
##filepath to paired reads
/home/jagill/results/oil18/prjct1_plate1/trim
##filepath to indexed transcriptome
/home/jagill/CP_ref_transcriptome/pacherring4_pcbio_idx
## output filepath
/home/jagill/results/oil18/prjct1_plate1/quant
## job title
PW-D4-L2A_quant.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate1/trim
#SBATCH -t 24:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
#SBATCH -J PW-D4-L2A
module load salmon/1.3.0
salmon quant -i /home/jagill/CP_ref_transcriptome/pacherring4_pcbio_idx -l IU -1 18048FL-23-01-01_S1_L001_R1_001_paired.fastq.gz 18048FL-23-01-01_S1_L002_R1_001_paired.fastq.gz 18048FL-23-01-01_S1_L003_R1_001_paired.fastq.gz 18048FL-23-01-01_S1_L004_R1_001_paired.fastq.gz -2 18048FL-23-01-01_S1_L001_R2_001_paired.fastq.gz 18048FL-23-01-01_S1_L002_R2_001_paired.fastq.gz 18048FL-23-01-01_S1_L003_R2_001_paired.fastq.gz 18048FL-23-01-01_S1_L004_R2_001_paired.fastq.gz --allowDovetail --validateMappings --seqBias -o /home/jagill/results/oil18/prjct1_plate1/quant/PW-D4-L2A
slurm-28792631.out
[2020-12-22 13:19:53.175] [jointLog] [info] Number of mappings discarded because of alignment score : 44,604,977
[2020-12-22 13:19:53.175] [jointLog] [info] Number of fragments entirely discarded because of alignment score : 744,895
[2020-12-22 13:19:53.175] [jointLog] [info] Number of fragments discarded because they are best-mapped to decoys : 0
[2020-12-22 13:19:53.193] [jointLog] [warning] Only 4802227 fragments were mapped, but the number of burn-in fragments was set to 5000000.
The effective lengths have been computed using the observed mappings.
[2020-12-22 13:19:53.193] [jointLog] [info] Mapping rate = 70.5868%
##took 9 minutes to run. Next time ste -t flag for 2 hours.
## job title
nano PW-D2-MH1A_quant.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate1/trim
#SBATCH -t 2:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
module load salmon/1.3.0
salmon quant -i /home/jagill/CP_ref_transcriptome/pacherring4_pcbio_idx -l IU -1 18048FL-23-01-02_S2_L001_R1_001_paired.fastq.gz 18048FL-23-01-02_S2_L002_R1_001_paired.fastq.gz 18048FL-23-01-02_S2_L003_R1_001_paired.fastq.gz 18048FL-23-01-02_S2_L004_R1_001_paired.fastq.gz -2 18048FL-23-01-02_S2_L001_R2_001_paired.fastq.gz 18048FL-23-01-02_S2_L002_R2_001_paired.fastq.gz 18048FL-23-01-02_S2_L003_R2_001_paired.fastq.gz 18048FL-23-01-02_S2_L004_R2_001_paired.fastq.gz --allowDovetail --validateMappings --seqBias -o /home/jagill/results/oil18/prjct1_plate1/quant/PW-D2-MH1A
slurm-28804278.out
[2020-12-23 12:40:06.869] [jointLog] [info] Number of mappings discarded because of alignment score : 64,164,917
[2020-12-23 12:40:06.869] [jointLog] [info] Number of fragments entirely discarded because of alignment score : 1,118,916
[2020-12-23 12:40:06.869] [jointLog] [info] Number of fragments discarded because they are best-mapped to decoys : 0
[2020-12-23 12:40:06.869] [jointLog] [info] Mapping rate = 70.2858%
Daniorerio0!
Daniorerio0!
### generic sbatch script for quant plate5
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate5/trimfiles
#SBATCH -t 2:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
module load salmon/1.3.0
--------------
##plate5 stuff
## move paired trimmed files from
/home/jagill/results/oil18/prjct1_plate5/rawdata
## to
/home/jagill/results/oil18/prjct1_plate5/trimfiles/
mv *paired* /home/jagill/results/oil18/prjct1_plate5/trimfiles/
--------------------
## plate4 stuff
move paired trimmed files from
/home/jagill/results/oil18/prjct1_plate4/raw_data
## to
/home/jagill/results/oil18/prjct1_plate4/trim_files
mv *paired* /home/jagill/results/oil18/prjct1_plate4/trim_files/
-------------------------------------------------------
## plate 3 stuff
92_S113_L003_R2_001.fastq.gz
91_S112_L003_R2_001.fastq.gz
## some missing trim files from plate 3: the first lane of samples 5-9 and 60-96
## move these samples to temp folder "temp_trim" in "rawdata" directory
nano trim_plate3_remainders.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate3/rawdata/temp_trim
#SBATCH -t 12:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=4G
#SBATCH -J plate3_trim
module load trimmomatic/0.36
for R1 in /home/jagill/results/oil18/prjct1_plate3/rawdata/temp_trim/*R1*
do
R2=${R1//R1_001.fastq.gz/R2_001.fastq.gz}
R1paired=${R1//.fastq.gz/_paired.fastq.gz}
R1unpaired=${R1//.fastq.gz/_unpaired.fastq.gz}
R2paired=${R2//.fastq.gz/_paired.fastq.gz}
R2unpaired=${R2//.fastq.gz/_unpaired.fastq.gz}
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 $R1 $R2 $R1paired $R1unpaired $R2paired $R2unpaired ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate3/trim_files/logs/prjct1plate3.cmds
done
## when job's done mv all the paired files into trim folder and recommence salmon
## starting with sample 5
mv *paired* /home/jagill/results/oil18/prjct1_plate3/trim_files/
------------------------------
## some missing trim files from plate 2
well73_S193_L005_R1_001.fastq.gz
well73_S193_L005_R2_001.fastq.gz
## these files are corrupted can't trim, because not in gzip format had to remove those files from the sample for salmon
well71_S191_L006_R1_001.fastq.gz
well71_S191_L006_R2_001.fastq.gz
module load trimmomatic/0.36
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 \
well73_S193_L005_R1_001.fastq.gz well73_S193_L005_R2_001.fastq.gz \
well73_S193_L005_R1_001_paired.fastq.gz well73_S193_L005_R1_001_unpaired.fastq.gz well73_S193_L005_R2_001_paired.fastq.gz well73_S193_L005_R2_001_unpaired.fastq.gz \
ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 \
LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/logs/ph18_trim.cmds
nano trim_well71_L006
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate2_4lanes/rawdata
#SBATCH -t 2:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
module load trimmomatic/0.36
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 \
well71_S191_L006_R1_001.fastq.gz well71_S191_L006_R2_001.fastq.gz \
well71_S191_L006_R1_001_paired.fastq.gz well71_S191_L006_R1_001_unpaired.fastq.gz well71_S191_L006_R2_001_paired.fastq.gz well71_S191_L006_R2_001_unpaired.fastq.gz \
ILLUMINACLIP:/home/jagill/sample_info/illumina_adapters_bradseq.fa:2:30:10 \
LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/logs/ph18_trim.cmds
well71_S191_L004_R1_001.fastq.gz
well71_S191_L004_R2_001.fastq.gz
well71_S191_L005_R1_001.fastq.gz
well71_S191_L005_R2_001.fastq.gz
well71_S191_L006_R1_001.fastq.gz
well71_S191_L006_R2_001.fastq.gz
mv well73_S193_L005_R1_001_paired.fastq.gz /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/
mv well73_S193_L005_R1_001_unpaired.fastq.gz /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/
mv well73_S193_L005_R2_001_paired.fastq.gz /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/
mv well73_S193_L005_R2_001_unpaired.fastq.gz /home/jagill/results/oil18/prjct1_plate2_4lanes/trim_files/
------------------------------------------------------
nano plate1trim.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/results/oil18/prjct1_plate1/rawdata
#SBATCH -t 10:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
#SBATCH -J plate1trim
module load trimmomatic/0.36
for R1 in /home/jagill/results/oil18/prjct1_plate1/rawdata/*R1*
do
R2=${R1//R1_001.fastq.gz/R2_001.fastq.gz}
R1paired=${R1//.fastq.gz/_paired.fastq.gz}
R1unpaired=${R1//.fastq.gz/_unpaired.fastq.gz}
R2paired=${R2//.fastq.gz/_paired.fastq.gz}
R2unpaired=${R2//.fastq.gz/_unpaired.fastq.gz}
java -jar /share/apps/Trimmomatic-0.36/trimmomatic.jar PE -phred33 $R1 $R2 $R1paired $R1unpaired $R2paired $R2unpaired ILLUMINACLIP:/group/awhitehegrp/rnaseq/ph_oil18/samp_info/illumina_adapters_bradseq.fa:2:40:15 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:25 >> /home/jagill/results/oil18/prjct1_plate1/trim/prjct1_plate1.cmds
done
---------------------------------------------------------------------------
## start with plate1
## job title
nano PW-D4-L2A_quant.sh
#!/bin/bash
#
#SBATCH -D /home/jagill/ph_oil18_testsamples
#SBATCH -t 24:00:00
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -p high
#SBATCH -c 8
#SBATCH --mem=10G
#SBATCH -J PW-D10-C2a
module load salmon/0.14.1
salmon quant -i /home/jagill/CP_ref_transcriptome/pacherring3_pcbio_idx -l IU -1 18048FL-09-01-61_S105_L002_R1_001_paired.fastq.gz well61_S181_L004_R1_001_paired.fastq.gz well61_S181_L005_R1_001_paired.fastq.gz well61_S181_L006_R1_001_paired.fastq.gz -2 18048FL-09-01-61_S105_L002_R2_001_paired.fastq.gz well61_S181_L004_R2_001_paired.fastq.gz well61_S181_L005_R2_001_paired.fastq.gz well61_S181_L006_R2_001_paired.fastq.gz --allowDovetail --validateMappings --seqBias -o pw-d10-c2a
-------------------------------------
### move salmon output files from all plates onto harddrive
## transfer salmon output files for each sample from farm to harddrive
## origination file path for
plate 1
/home/jagill/results/oil18/prjct1_plate1/quant
plate 2
/home/jagill/results/oil18/prjct1_plate2_4lanes/quant2
plate 3
/home/jagill/results/oil18/prjct1_plate3/quant3
plate 4
/home/jagill/results/oil18/prjct1_plate4/quant4
plate 5
/home/jagill/results/oil18/prjct1_plate5/quant
## destination file path
/Volumes/herring2/herring_project/bradseq/ph_oil_2018/prjct1/quantfiles
scp -P 2022 -r [email protected]:/home/jagill/results/oil18/prjct1_plate5/quant/ /Volumes/herring2/herring_project/bradseq/ph_oil_2018/prjct1/quantfiles/
to view total file size of all first level children of all subdirectories in a directory
du -d1h
missing salmon quant files will have to go back at later date to figure out why salmon didn't work on these files.
in meantime delete from tximport R script
S-D10-C2A
CP-D4-ML3A
CP-D4-MH4A
PW-D4-H4A
libtype test
IU -
[2021-01-29 15:35:36.526] [jointLog] [info] Number of mappings discarded because of alignment score : 88,524,771
[2021-01-29 15:35:36.526] [jointLog] [info] Number of fragments entirely discarded because of alignment score : 1,363,221
[2021-01-29 15:35:36.526] [jointLog] [info] Number of fragments discarded because they are best-mapped to decoys : 0
[2021-01-29 15:35:36.526] [jointLog] [info] Mapping rate = 64.0038%
ISF -
[2021-01-29 14:32:19.421] [jointLog] [info] Number of mappings discarded because of alignment score : 88,524,771
[2021-01-29 14:32:19.421] [jointLog] [info] Number of fragments entirely discarded because of alignment score : 1,363,221
[2021-01-29 14:32:19.421] [jointLog] [info] Number of fragments discarded because they are best-mapped to decoys : 0
[2021-01-29 14:32:19.421] [jointLog] [info] Mapping rate = 63.3312%
A -
[2021-01-29 16:11:42.471] [jointLog] [info] Number of mappings discarded because of alignment score : 88,524,771
[2021-01-29 16:11:42.471] [jointLog] [info] Number of fragments entirely discarded because of alignment score : 1,363,221
[2021-01-29 16:11:42.471] [jointLog] [info] Number of fragments discarded because they are best-mapped to decoys : 0
[2021-01-29 16:11:42.471] [jointLog] [info] Mapping rate = 63.3318%