Skip to content

Commit

Permalink
Edit library files to process based on column headers, and try to eli…
Browse files Browse the repository at this point in the history
…minate multiple paths matching due to overlapping sample names
  • Loading branch information
chenv3 committed Dec 18, 2023
1 parent fccebd6 commit 2a7f53c
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions workflow/scripts/create_library_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

import argparse, sys
import argparse, sys, os, re

class SmartFormatter(argparse.HelpFormatter):

Expand All @@ -27,24 +27,32 @@ def main(raw_args=None):
with open(args.file_name) as f:
headers = next(f).strip().split(',')
#print(headers)
try:
name_index = [i for i in range(len(headers)) if headers[i].casefold() == 'Name'.casefold()][0]
flowcell_index = [i for i in range(len(headers)) if headers[i].casefold() == 'Flowcell'.casefold()][0]
sample_index = [i for i in range(len(headers)) if headers[i].casefold() == 'Sample'.casefold()][0]
library_index = [i for i in range(len(headers)) if headers[i].casefold() == 'Type'.casefold()][0]
except:
print("File headers could not be parsed. Please check that they match the expected format of: Name,Flowcell,Sample,Type\n")
samples = dict()
for line in f:
line = line.strip().split(',')
if line[0] in samples:
samples[line[0]].append(line[1:])
if line[name_index] in samples:
samples[line[name_index]].append([line[flowcell_index], line[sample_index], line[library_index]])
else:
samples[line[0]] = [line[1:]]

samples[line[name_index]] = [[line[flowcell_index], line[sample_index], line[library_index]]]
for sample in samples:
text = []
for values in samples[sample]:
if args.fastqs != None:
runs = [path for path in fastqs if values[0].rstrip('/') in path]
if len(runs) != 1:
runs = [run for run in runs if values[1] in run]
# runs = [run for run in runs if values[1] in run]
runs = [run for run in runs for j in run.split(os.sep) if len(re.findall(values[1] + r'$', j)) > 0]
if len(runs) != 1:
sys.exit("Problems finding unique match for %s in %s" % (values[0], args.fastqs))
else:
print([runs[0], values[1], values[2]])
text.append(",".join([runs[0], values[1], values[2]]))
else:
text.append(",".join(values))
Expand Down

0 comments on commit 2a7f53c

Please sign in to comment.