-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_reads.sh
executable file
·40 lines (17 loc) · 1.02 KB
/
extract_reads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Directory containing BAM files
BAM_DIR="/home/storage/DataLake/WIP/Nanopore/outdir_pilot_samples/minimap2"
# Loop through each BAM file in the directory
for BAM_FILE in "${BAM_DIR}"/*_R1.sorted.bam; do
# Extract the filename without extension
FILENAME=$(basename "${BAM_FILE}" .bam)
# Define the output BAM filename
#OUTPUT_BAM="${FILENAME}_3k_10k.bam"
OUTPUT_BAM="${FILENAME}_less_1k.bam"
# Run the pipeline to extract reads
#samtools view -h "${BAM_FILE}" | awk '{ if (substr($0,1,1) == "@" || (length($10) >= 3000 && length($10) <= 10000)) {print $0} }' | samtools view -b > "${OUTPUT_BAM}"
#samtools view -h "${BAM_FILE}" | awk '{ if (substr($0,1,1) == "@" || (length($10) >= 10000)) {print $0} }' | samtools view -b > "${OUTPUT_BAM}"
samtools view -h "${BAM_FILE}" | awk '{ if (substr($0,1,1) == "@" || (length($10) < 1000)) {print $0} }' | samtools view -b > "${OUTPUT_BAM}"
# Print the processed filename
echo "Processed: ${BAM_FILE}"
done