Updating file names and making cigar and bowtie as scripts installed

Vicky-Hunt-Lab · Feb 26, 2021 · 8e824fc · 8e824fc
1 parent 236c63a
commit 8e824fc
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 12 deletions.
diff --git a/bin/stepRNA b/bin/stepRNA
@@ -17,9 +17,9 @@ from stepRNA.output import make_csv, make_type_csv, write_to_bam, print_hist, re
 #Scripts to be used...
 import stepRNA.remove_exact as remove_exact
 import stepRNA.make_unique as make_unique
-import stepRNA.run_bowtie as run_bowtie
+import stepRNA.stepRNA-run_bowtie as run_bowtie
 import stepRNA.index_bowtie as index_bowtie
-import stepRNA.cigar_process as cigar_process
+import stepRNA.stepRNA-cigar_process as cigar_process
 
 #Modules that need to be installed
 try:
@@ -60,6 +60,7 @@ optional.add_argument('-m', '--min_score', default=-1, type=int, help='Minimum s
 flags.add_argument('-e', '--remove_exact', action='store_true', help='Remove exact read matches to the reference sequence')
 flags.add_argument('-u', '--make_unique', action='store_true', help='Make FASTA headers unique in reference and reads i.e. >Read_1 >Read_2')
 flags.add_argument('-j', '--write_json', action='store_true', help='Write count dictionaries to a JSON file')
+flags.add_argument('-V', '--version', action='version', version='stepRNA v1.0.0', help='Print version number then exit.')
 
 #parser._action_groups.append(optional)
 #parser._action_groups.append(flags)
@@ -109,13 +110,23 @@ sorted_bam = run_bowtie.main(ref_base, reads, prefix, min_score, logger)
 logger.write('Alignment completed')
 
 #Cigar process...
+fpath = os.path.join(outdir, prefix + '_AlignmentFiles')
+if os.path.isdir(fpath):
+    logger.write('Removing contents in {}'.format(fpath))
+    for f in os.listdir(fpath):
+        try:
+            os.remove(os.path.join(fpath, f))
+        except:
+            logger.log('Could not remove {}'.format(f))
+
+logger.write('Processing Cigar strings...')
 right_dic, left_dic, type_dic, read_len_dic, refs_read_dic = cigar_process.main(sorted_bam, prefix, args.write_json)
 logger.write('Cigar strings processed')
 
 # Count unique references
 right_unique_dic = defaultdict(lambda:0) 
 left_unique_dic = defaultdict(lambda:0)
-fpath = os.path.join(outdir, 'AlignmentFiles')
+fpath = os.path.join(outdir, prefix + '_AlignmentFiles')
 for f in os.listdir(fpath):
     if 'passed' not in f:
         key = int(f.split('_')[-2])
@@ -129,14 +140,14 @@ for f in os.listdir(fpath):
 
 #Put overhangs infomation into a csv and print to terminal...
 logger.write('\n## Overhang counts ##')
-make_csv([right_dic, left_dic], prefix + '_overhang.csv', ['OH','Left','Right'])
+make_csv([right_dic, left_dic], prefix + '_overhang.csv', ['Overhang','5prime','3prime'])
 logger.write('\n## Unique overhang counts ##')
-make_csv([right_unique_dic, left_unique_dic], prefix + '_unique_overhang.csv', ['OH','Left','Right'])
+make_csv([right_unique_dic, left_unique_dic], prefix + '_unique_overhang.csv', ['Overhang','3prime','5prime'])
 logger.write('\n## Overhang types ##')
-make_type_csv(type_dic, prefix + '_type.csv', ['OH_type', 'count'])
+make_type_csv(type_dic, prefix + '_overhang_type.csv', ['Classification', 'count'])
 logger.write('\n## Read lengths ##')
-make_type_csv(read_len_dic, prefix + '_read_len.csv', ['Read_length', 'count'], sort=True)
-make_type_csv(refs_read_dic, prefix + '_ref_hits.csv', ['Reference', 'count'], show=False)
+make_type_csv(read_len_dic, prefix + '_passenger_length.csv', ['sRNA_read', 'passenger_count'], sort=True)
+make_type_csv(refs_read_dic, prefix + '_passenger_number.csv', ['Passenger_length', 'number'], show=False)
 print()
 
 def make_hist(csv_in):

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     long_description = fh.read()
 
 setuptools.setup(name='stepRNA',
-    version='0.1.10',
+    version='0.1.11',
     author='Ben Murcott',
     author_email='[email protected]',
     description='Align short RNA seqeuncing reads to determine the length of of overhang.',
@@ -25,6 +25,9 @@
         "Bio>=0.3.0",
         "numpy>=1.20.1"
         ],
-    scripts=["bin/stepRNA"]
+    scripts=["bin/stepRNA",
+    "stepRNA/stepRNA-run_bowtie.py",
+    "stepRNA/stepRNA-cigar_process.py"
+    ]
 )
 
diff --git a/stepRNA/cigar_process.py → stepRNA/stepRNA-cigar_process.py b/stepRNA/cigar_process.py → stepRNA/stepRNA-cigar_process.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 from collections import defaultdict
 import json
 import os
@@ -50,8 +52,9 @@ def add_to_MakeBam(dic, length, additional, record):
                         refs_read_dic[line.reference_name] += 1 # number of reads algining to reference
                     except Exception:
                         continue
-    directory = os.path.dirname(filepath)
-    outdir = os.path.join(directory, 'AlignmentFiles')
+    #directory = os.path.dirname(filepath)
+    #outdir = os.path.join(directory, 'AlignmentFiles')
+    outdir = filepath + '_AlignmentFiles'
     check_dir(outdir)
     for key in MakeBam_dic:
         outfile = os.path.join(outdir, '{}_{}.bam'.format(os.path.basename(filepath), key))

diff --git a/stepRNA/run_bowtie.py → stepRNA/stepRNA-run_bowtie.py b/stepRNA/run_bowtie.py → stepRNA/stepRNA-run_bowtie.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 from stepRNA.processing import sam_to_bam
 from stepRNA.general import mini_maxi, replace_ext, check_dir
 from subprocess import run, PIPE