forked from bowmanjeffs/shared_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdada2.r
66 lines (46 loc) · 1.87 KB
/
dada2.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/Rscript
# See tutorial at https://benjjneb.github.io/dada2/tutorial.html. This script will
# QC and identify valid unique reads, then assemble. It will execute on all files in
# the directory "multiplexed", and create the directories "filtered", and "merged".
# The Python script deunique_dada2.py should be used to inflate all of the output tables
# into redundant fasta files for paprica.
library(dada2)
path <- 'demultiplexed'
fnFs <- sort(list.files(path, pattern = '-R1.fastq', full.names = T))
fnRs <- sort(list.files(path, pattern = '-R2.fastq', full.names = T))
sample.names <- sapply(strsplit(basename(fnFs), "-"), `[`, 1)
pdf('quality_profiles.pdf', width = 6, height = 6)
for(i in 1:length(fnFs)){
plotQualityProfile(fnFs[i])
plotQualityProfile(fnRs[i])
}
dev.off()
file_path <- file.path("filtered") # Place filtered files in filtered/ subdirectory
filtFs <- file.path(file_path, paste0(sample.names, "_R1.filt.fastq"))
filtRs <- file.path(file_path, paste0(sample.names, "_R2.filt.fastq"))
## multithreading only useful if multiple fastq files
out <- filterAndTrim(fnFs,
filtFs,
fnRs,
filtRs,
#multithread = T,
minQ = 20,
verbose = T)
errF <- learnErrors(filtFs, multithread=TRUE)
errR <- learnErrors(filtRs, multithread=TRUE)
pdf('error_rates.pdf', width = 6, height = 6)
plotErrors(errF, nominalQ = T)
plotErrors(errR, nominalQ = T)
dev.off()
derepFs <- derepFastq(filtFs, verbose = T)
derepRs <- derepFastq(filtRs, verbose = T)
## wouldn't it make more sense to assemble here?
names(derepFs) <- sample.names
names(derepRs) <- sample.names
dadaFs <- dada(derepFs, err=errF, multithread=TRUE)
dadaRs <- dada(derepRs, err=errF, multithread=TRUE)
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE)
dir.create('merged')
for(name in names(mergers)){
write.csv(mergers[[name]], paste0('merged/', name, '.csv'), quote = F, row.names = F)
}