1) using cutadapt to trim off low-quality nucleotides (Phred quality lower than 20) and Illumina adaptor sequences at the 3’ end
for i in "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_In_C3" "s_6_In_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" "s_6_IP_C3" "s_6_IP_C8"
do
qsub -j y -o qsubout/cutadapt.$i.qsubout -b y -cwd -l h_vmem=50G -N
for i in "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_In_C3" "s_6_In_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" "s_6_IP_C3" "s_6_IP_C8"
do
qsub -j y -o qsubout/$i.rRNA.PhiX.out -b y -cwd -pe onehost 10 -l h_vmem=3G -N
3) Custom Perl scripts to remove contaminant reads based on bowtie alignment (custom Perl scripts can be downloaded from https://github.com/Qiongyi/m6A/)
for i in "s_6_In_C3" "s_6_In_C8" "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_IP_C3" "s_6_IP_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" do Discard_fastq_PE.pl /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/bowtie2rRNAPhiX/$i.rRNAPhiX.sam /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/${i}_read1.fastq.gz /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/${i}_read2.fastq.gz /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/filter/${i}_read1.fastq /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/filter/${i}_read2.fastq filter_stats.xls done
for i in "s_6_In_C3" "s_6_In_C8" "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_IP_C3" "s_6_IP_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" do qsub -j y -o qsubout/$i.tophat.cutadapt.out -b y -cwd -pe onehost 8 -l h_vmem=5G -N cutadapt.$i.tophat tophat --mate-inner-dist 50 --mate-std-dev 50 --microexon-search --min-anchor 10 -p 8 -o ${i}_tophat_filter -G /clusterdata/hiseq_apps/resources/freeze001/mm10/mm10.gtf --transcriptome-index /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/transcriptome_data/known /clusterdata/hiseq_apps/resources/freeze001/mm10/bowtie2_index/mm10 /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/filter/${i}_read1.fastq /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/fastq/cutadapt_new/filter/${i}_read2.fastq done
for i in "s_6_In_C3" "s_6_In_C8" "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_IP_C3" "s_6_IP_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" do samtools view -h -bu -f2 -q 4 /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/accepted_hits.bam |samtools sort - /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/Properly_accepted_hits.sorted samtools index /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/Properly_accepted_hits.sorted.bam done
for i in "s_6_In_C3" "s_6_In_C8" "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_IP_C3" "s_6_IP_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20"
do
qsub -j y -o qsubout/MarkDuplicates.$i -b y -cwd -pe onehost 8 -l h_vmem=5G -N
for i in "s_6_In_C3" "s_6_In_C8" "s_6_In_C13" "s_6_In_C15" "s_6_In_C18" "s_6_In_C20" "s_6_IP_C3" "s_6_IP_C8" "s_6_IP_C13" "s_6_IP_C15" "s_6_IP_C18" "s_6_IP_C20" do samtools view -b -o /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/Properly_accepted_hits.asd.rmdup.bam -F 1024 /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/Properly_accepted_hits.asd.bam samtools index /illumina/Data/131212_7001408_0063_BC39MNACXX/TBJW_RNAMETHYL/results/tophat2/${i}_tophat_filter/Properly_accepted_hits.asd.rmdup.bam
for i in "C3" "C8" "C13" "C15" "C18" "C20"
do
qsub -j y -o qsubout/$i.winscore.out -b y -cwd -l h_vmem=20G -N
RNAMethy_GetUniqSummit.pl Naive_SummitPosition.xls Naive_UniqSummitPosition.xls RNAMethy_GetUniqSummit.pl Context_SummitPosition.xls Context_UniqSummitPosition.xls RNAMethy_GetUniqSummit.pl FC_SummitPosition.xls FC_UniqSummitPosition.xls
RNAMethy_CombineUniqSummit.pl Naive_UniqSummitPosition.xls Context_UniqSummitPosition.xls FC_UniqSummitPosition.xls Naive_Context_FC_SummitPosition.xls
RNAMethy_GetUniqSummit2.pl 150 Naive_Context_FC_SummitPosition.xls Naive_Context_FC_SummitPosition.group.xls Naive_Context_FC_UniqSummitPosition.xls
RNAMethy_Summit_TTest.pl Naive_Context_FC_UniqSummitPosition.xls Naive_Context_FC.ttest.xls
RNAMethy_AddGenomicLocation_Case.pl /clusterdata/hiseq_apps/resources/freeze001/mm10/mm10.gtf Naive_UniqSummitPosition.xls Naive.final.GenomicLocation.xls
RNAMethy_GenomicLocation.pl /clusterdata/hiseq_apps/resources/freeze001/mm10/mm10.gtf Naive_SummitPosition.xls Naive_SummitPosition_TranscriptSegments.xls Naive_SummitPosition_TranscriptSegments_stat.xls
for i in {4..6}
do
RNAMethy_MotifFinder1_Kmer.pl
for i in {4..6}
do
awk '$1=="Kmer" || ($4>=$5*1.5 &&
RNAMethy_MotifFinder2_Cluster.pl 1 m6A_Kmers_4bp_1.5fold.xls m6A_Kmers_5bp_1.5fold.xls m6A_Kmers_6bp_1.5fold.xls Kmer_cluster.xls
4) to calculate the position-specific scoring matrix for detected motif (shown is an example for the first seed)
RNAMethy_MotifFinder3_PSSM.pl Kmer_sequences_seed1.fa seed1.PSSM.xls