Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Awesome Workshop! #3

Open
aaiezza opened this issue Oct 4, 2016 · 4 comments
Open

Awesome Workshop! #3

aaiezza opened this issue Oct 4, 2016 · 4 comments

Comments

@aaiezza
Copy link

aaiezza commented Oct 4, 2016

This was a very well done workshop! Found it super helpful in understanding heteroscedasticity.
I have to ask though if/when you are planning on doing more of these?

Thank you,
Alex

PS: Here is the script I ended up creating during the workshop:

# DE Analysis of RNA-seq data

# Difference between library and require?
# Load libraries
library( edgeR )
library( DESeq2 )
library( limma )
library( Biobase )
library( gplots )

# Load Data
load( 'bottomly_eset.RData' )

# Plot 2 replicate dataset
plot2rep <- function()
{
    eset <- bottomly.2reps
    cpm.mat <- log(cpm(exprs(eset)))
    mean.vec <- apply(cpm.mat, 1, mean)
    sdvec <- apply(cpm.mat, 1, sd)
    plot(mean.vec, sdvec, pch=".", main="2 replicates", ylab="sd", xlab="Average logCPM")
}
# Plot 5 replicate dataset
plot5rep <- function()
{
    eset <- bottomly.5reps
    cpm.mat <- log(cpm(exprs(eset)))
    mean.vec <- apply(cpm.mat, 1, mean)
    sdvec <- apply(cpm.mat, 1, sd)
    plot(mean.vec, sdvec, pch=".", main="5 replicates", ylab="sd", xlab="Average logCPM")
}
# Plot 10 replicate dataset
plot10rep <- function()
{
    eset <- bottomly.eset
    cpm.mat <- log(cpm(exprs(eset)))
    mean.vec <- apply(cpm.mat, 1, mean)
    sdvec <- apply(cpm.mat, 1, sd)
    plot(mean.vec, sdvec, pch=".", main="10 replicates", ylab="sd", xlab="Average logCPM")
}
# Plot all datasets
plotAll <- function( filename = 'heterscedasticity_mean-variance.png', toFile = FALSE )
{
    if( toFile )
        png( filename,
            height = 1920, width = 1080, pointsize = 12.5, res = 100 )

    layout( matrix( c(1:12, rep(c(rep(13,2),14),2), rep(c(rep(15,2),16),2)), ncol = 3, byrow = TRUE ) )
    plot2rep()
    plot5rep()
    plot10rep()

    deSeq2.analysis( TRUE )
    edgeR.analysis( TRUE )
    limma.voom.analysis( TRUE )

    deAnalysis()
    deAnalysis.2reps()

    if( toFile )
        graphics.off()
}

# # # # # # #
# Create DESeq2 datasets
deSeq2.analysis <- function( plot = FALSE )
{
    #2 Replicate
    if( !exists( 'dds.2rep' ) )
    {
        dds.2rep <<- DESeqDataSetFromMatrix(countData = exprs(bottomly.2reps), colData = pData(bottomly.2reps), design = ~ strain )
        dds.2rep <<- DESeq(dds.2rep)
    }

    #5 replicate
    if( !exists( 'dds.5rep' ) )
    {
        dds.5rep <<- DESeqDataSetFromMatrix(countData = exprs(bottomly.5reps), colData = pData(bottomly.5reps), design = ~ strain )
        dds.5rep <<- DESeq(dds.5rep)
    }

    #10 replicate
    if( !exists( 'dds' ) )
    {
        dds <<- DESeqDataSetFromMatrix(countData = exprs(bottomly.eset), colData = pData(bottomly.eset), design = ~ strain )
        dds <<- DESeq(dds)
    }

    # Plot dispersion estimates & show separate by strain
    if ( plot )
    {
        # par( mfrow = c(3,1) )
        # par( mfrow = c(3,2) )
        plotDispEsts(dds.2rep, main = '2 replicates' )
        # plotPCA(rlogTransformation(dds.2rep), intgroup='strain' )
        plotDispEsts(dds.5rep, main = '5 replicates' )
        # plotPCA(rlogTransformation(dds.5rep), intgroup='strain' )
        plotDispEsts(dds, main = '10 replicates' )
        # plotPCA(rlogTransformation(dds), intgroup='strain' )
    }
}


# # # # # # #
# Use edgeR
edgeR.analysis <- function( plot = FALSE )
{
    if( !exists( 'dge' ) )
    {
        dge <<- DGEList( counts=exprs(bottomly.eset), group=pData(bottomly.eset)$strain )
        # Normalize by total count
        dge <<- calcNormFactors( dge )

        # Create the contrast matrix
        design.mat <<- model.matrix(~ 0 + dge$samples$group)
        colnames(design.mat) <<- levels(dge$samples$group)

        # Estimate dispersion parameter for GLM
        dge <<- estimateGLMCommonDisp(dge, design.mat)
        dge <<- estimateGLMTrendedDisp(dge, design.mat, method="power")
        dge <<- estimateGLMTagwiseDisp(dge,design.mat)
    }

    # Do it all over again for 5 replicates
    if( !exists( 'dge.5reps' ) )
    {
        dge.5reps <<- DGEList(counts=exprs(bottomly.5reps), group=pData(bottomly.5reps)$strain)
        dge.5reps <<- calcNormFactors(dge.5reps)
        design.mat <<- model.matrix(~ 0 + dge.5reps$samples$group)
        colnames(design.mat) <<- levels(dge.5reps$samples$group)

        dge.5reps <<- estimateGLMCommonDisp(dge.5reps, design.mat)
        dge.5reps <<- estimateGLMTrendedDisp(dge.5reps, design.mat, method="power")
        dge.5reps <<- estimateGLMTagwiseDisp(dge.5reps,design.mat)
    }

    # Do it all over again for 2 replicates
    if( !exists( 'dge.2reps' ) )
    {
        dge.2reps <<- DGEList(counts=exprs(bottomly.2reps), group=pData(bottomly.2reps)$strain)
        dge.2reps <<- calcNormFactors(dge.2reps)
        design.mat <<- model.matrix(~ 0 + dge.2reps$samples$group)
        colnames(design.mat) <<- levels(dge.2reps$samples$group)

        dge.2reps <<- estimateGLMCommonDisp(dge.2reps, design.mat)
        dge.2reps <<- estimateGLMTrendedDisp(dge.2reps, design.mat, method="power")
        dge.2reps <<- estimateGLMTagwiseDisp(dge.2reps,design.mat)
    }

    # Plot mean-variance
    if ( plot )
    {
        # par( mfrow = c(3,1) )
        plotBCV(dge.2reps, main = '2 replicates' )
        plotBCV(dge.5reps, main = '5 replicates' )
        plotBCV(dge, main = '10 replicates' )
    }
}

# # # # # # #
# Use limma-voom
limma.voom.analysis <- function( plot = FALSE )
{
    # par( mfrow = c(3,1) )
    # Create design matrix
    design.mat <<- model.matrix(~ pData(bottomly.2reps)$strain)
    colnames(design.mat) <<- levels(pData(bottomly.2reps)$strain)

    # Apply voom transformation
    nf <<- calcNormFactors(bottomly.2reps)
    v.2reps <<- voom(exprs(bottomly.2reps), design.mat,
        lib.size=colSums(exprs(bottomly.2reps))*nf,
            normalize.method="quantile", plot = plot )

    # Do same for 5 replicate dataset
    design.mat <<- model.matrix(~ pData(bottomly.5reps)$strain)
    colnames(design.mat) <<- levels(pData(bottomly.5reps)$strain)
    nf <<- calcNormFactors(bottomly.5reps)
    v.5reps <<- voom(exprs(bottomly.5reps), design.mat,
        lib.size=colSums(exprs(bottomly.5reps))*nf,
            normalize.method="quantile", plot = plot )

    # Do same for 2 replicates dataset
    design.mat <<- model.matrix(~ pData(bottomly.eset)$strain)
    colnames(design.mat) <<- levels(pData(bottomly.eset)$strain)
    nf <<- calcNormFactors(bottomly.eset)
    v <<- voom(exprs(bottomly.eset), design.mat,
        lib.size=colSums(exprs(bottomly.eset))*nf,
            normalize.method="quantile", plot = plot )

}


### DEG analysis using different tools

# Set threshold
deAnalysis <- function( p.threshold = 5e-2 )
{
    ###########
    ## edgeR ##
    # Design matrix
    design.mat <- model.matrix(~ 0 + dge$samples$group)
    colnames(design.mat) <- c("C57BL", "DBA")

    # Model fitting
    fit.edgeR <- glmFit(dge, design.mat)

    # Differential expression
    contrasts.edgeR <- makeContrasts(C57BL - DBA, levels=design.mat)
    lrt.edgeR <- glmLRT(fit.edgeR, contrast=contrasts.edgeR)

    # Access results tables
    edgeR_results <- lrt.edgeR$table
    sig.edgeR <- decideTestsDGE(lrt.edgeR, adjust.method="BH", p.value = p.threshold)
    genes.edgeR <- row.names(edgeR_results)[which(sig.edgeR != 0)]

    ############
    ## DESeq2 ##
    contrast.deseq2 <- as.list(resultsNames(dds)[2:3])
    # contrast.deseq2 <- list("strainC57BL.6J", "strainDBA.2J")
    deseq2_results <- results(dds, contrast=contrast.deseq2)
    deseq2_results$threshold <- as.logical(deseq2_results$padj < p.threshold)
    genes.deseq <- row.names(deseq2_results)[which(deseq2_results$threshold)]
    summary(deseq2_results, alpha = p.threshold)

    ################
    ## voom-limma ##
    # Create design matrix
    design <- model.matrix(~ pData(bottomly.eset)$strain)

    # Usual limma pipeline
    fit.voom <- lmFit(v, design)
    fit.voom <- eBayes(fit.voom)

    voom_results <- topTable(fit.voom, coef=2,  adjust="BH", number = nrow(exprs(bottomly.eset)))
    voom_results$threshold <- as.logical(voom_results$adj.P.Val < p.threshold)
    genes.voom <- row.names(voom_results)[which(voom_results$threshold)]

    ## Plot that stuff!
    ## Check out lengths
    textplot( sprintf( 'edgeR:%d deseq:%d voom:%d\nSDE genes comparison | 10 replicates',
        length( genes.edgeR ),
        length( genes.deseq ),
        length( genes.voom ) ) )
    venn( list(edgeR = genes.edgeR, DESeq2 = genes.deseq, voom = genes.voom) )
}

deAnalysis.2reps <- function( p.threshold = 5e-2 )
{
    ###########
    ## edgeR ##
    # Design matrix
    design.mat <- model.matrix(~ 0 + dge.2reps$samples$group)
    colnames(design.mat) <- c("C57BL", "DBA")

    # Model fitting
    fit.edgeR <- glmFit(dge.2reps, design.mat)

    # Differential expression
    contrasts.edgeR <- makeContrasts(C57BL - DBA, levels=design.mat)
    lrt.edgeR <- glmLRT(fit.edgeR, contrast=contrasts.edgeR)

    # Access results tables
    edgeR_results_2reps <- lrt.edgeR$table
    sig.edgeR.2reps <- decideTestsDGE(lrt.edgeR, adjust.method="BH", p.value = p.threshold)
    genes.edgeR.2reps <- row.names(edgeR_results_2reps)[which(sig.edgeR.2reps != 0)]

    ############
    ## DESeq2 ##
    contrast.deseq2 <- as.list(resultsNames(dds)[2:3])
    # contrast.deseq2 <- list("strainC57BL.6J", "strainDBA.2J")
    deseq2_results_2reps <- results(dds.2rep, contrast=contrast.deseq2)
    deseq2_results_2reps$threshold <- as.logical(deseq2_results_2reps$padj < p.threshold)
    genes.deseq.2reps <- row.names(deseq2_results_2reps)[which(deseq2_results_2reps$threshold)]
    summary(deseq2_results_2reps, alpha = p.threshold)

    ################
    ## voom-limma ##
    # Create design matrix
    design <- model.matrix(~ pData(bottomly.2reps)$strain)

    # Usual limma pipeline
    fit.voom <- lmFit(v.2reps, design)
    fit.voom <- eBayes(fit.voom)

    voom_results_2reps <- topTable(fit.voom, coef=2,  adjust="BH", number = nrow(exprs(bottomly.2reps)))
    voom_results_2reps$threshold <- as.logical(voom_results_2reps$adj.P.Val < p.threshold)
    genes.voom.2reps <- row.names(voom_results_2reps)[which(voom_results_2reps$threshold)]


    ## Plot that stuff!
    ## Check out lengths
    textplot( sprintf( 'edgeR:%d deseq:%d voom:%d\nSDE genes comparison | 2 replicates',
        length( genes.edgeR.2reps ),
        length( genes.deseq.2reps ),
        length( genes.voom.2reps ) ) )
    venn(list(edgeR = genes.edgeR.2reps, DESeq2 = genes.deseq.2reps, voom = genes.voom.2reps) )
}
@mistrm82
Copy link
Owner

mistrm82 commented Oct 7, 2016

@aaiezza thanks so much for the script! glad to hear you enjoyed it! This lesson does need a few updates just haven't gotten around to it lately. Not sure when @ctb is planning another round of sign-ups for remote-local workshops but I would definitely do it again!

@NasserMahna1
Copy link

HI everybody,
Yes, it was a really good workshop. But, I got a problem in one of step:

resultsNames(dds)
[1] "Intercept" "strain_DBA.2J_vs_C57BL.6J"

contrast.deseq2 <- list("strain_C57BL.6J", "strain_DBA.2J")
deseq2_results <- results(dds, contrast=contrast.deseq2)
Error in checkContrast(contrast, resNames) :
all elements of the contrast as a list of length 2 should be elements of 'resultsNames(object)'

AS you can find, this part ""strain_DBA.2J_vs_C57BL.6J"" was different from the results in the workshop session and I think that is why I did not get DESeq2 results.
Would you please help me in this?
Thanks,
Nasser

@cicicamus
Copy link

HI everybody,
Yes, it was a really good workshop. But, I got a problem in one of step:

resultsNames(dds)
[1] "Intercept" "strain_DBA.2J_vs_C57BL.6J"

contrast.deseq2 <- list("strain_C57BL.6J", "strain_DBA.2J")
deseq2_results <- results(dds, contrast=contrast.deseq2)
Error in checkContrast(contrast, resNames) :
all elements of the contrast as a list of length 2 should be elements of 'resultsNames(object)'

AS you can find, this part ""strain_DBA.2J_vs_C57BL.6J"" was different from the results in the workshop session and I think that is why I did not get DESeq2 results.
Would you please help me in this?
Thanks,
Nasser

Yes that's the problem.
change the line to contrast.deseq2 <- list(c("strain_DBA.2J_vs_C57BL.6J") ) and it should work.

@NasserMahna1
Copy link

contrast.deseq2 <- list(c("strain_DBA.2J_vs_C57BL.6J") )

Hi there,

Thanks for your help. Actually it did not work like this: contrast.deseq2 <- list(c("strain_DBA.2J_vs_C57BL.6J") )
But I could get the results through this:
contrast.deseq2 <- list("strain_DBA.2J_vs_C57BL.6J")

The problem solved.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants