-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbwa_mem_paired_array.sh
59 lines (38 loc) · 1.4 KB
/
bwa_mem_paired_array.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash -l
#SBATCH -J array_job
#SBATCH -o array_job_out_%A_%a.txt
#SBATCH -e array_job_err_%A_%a.txt
#SBATCH --array=1-728
#SBATCH -p hi
#SBATCH --cpus-per-task=6
#SBATCH --mem=10000
#load software for this job
module load boost/1.55.0
module load samtools/1.2
module load bowtie2/2.2.5
module load tophat/2.1.0
module load bwa/0.7.9a
#tell bowtie where the indexed genome is
export BOWTIE2_INDEXES=/home/nreid/popgen/kfish3
#various relevant directories and files
genomebase=/home/nreid/popgen/kfish3/killifish20130322asm
genome=/home/nreid/popgen/kfish3/killifish20130322asm.fa
outdir1=/home/nreid/rnaseq/alignments
#paired reads (unpaired leftovers run in another file)
fq1=$(find ~/rnaseq/ -type f -name "*.gz" | grep ".*R1.*pt" | sed -n $(echo $SLURM_ARRAY_TASK_ID)p)
fq2=$(echo $fq1 | sed 's/R1/R2/')
run=$(echo $fq1 | grep -oP '(?<=rawdata/)[^/]+')
lib=$(echo $fq1 | grep -oP '(?<=Sample_)[^_]+')
samp=$(echo $fq1 | grep -oP "(?<=/AWJRDD00._)[^/]+(?=_[ACGT]+)")
lane=$(echo $fq1 | grep -oP "L00.")
sub=$(echo $fq1 | grep -oP "(?<=L00._R._)...")
rg=$(echo \@RG\\tID:$samp.$lib.$run.$lane.$sub\\tPL:Illumina\\tPU:x\\tLB:$lib\\tSM:$samp)
outdir2=$outdir1/$samp
outfile=$samp.$lib.$run.$lane.$sub.bam
mkdir -p $outdir2
echo $outdir2
echo $outfile
bwa mem -R $rg $genome $fq1 $fq2 | \
samtools view -b - | \
samtools fixmate -O bam - - | \
samtools sort -O bam -T /scratch/$outfile.temp >$outdir2/$outfile