Skip to content

Commit

Permalink
Completes support for FastQ files
Browse files Browse the repository at this point in the history
  • Loading branch information
Joaquín Tárraga Giménez committed Dec 10, 2014
1 parent ae6c5d0 commit 34d7a67
Show file tree
Hide file tree
Showing 9 changed files with 594 additions and 115 deletions.
121 changes: 121 additions & 0 deletions c/src/bioformats/fastq/fastq_edit.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include <stdlib.h>

#include "fastq_read.h"
#include "fastq_edit.h"

//------------------------------------------------------------------------

fastq_edit_options_t *fastq_edit_options_new(int left_length, int min_left_quality,
int max_left_quality, int right_length,
int min_right_quality, int max_right_quality,
int min_N_quality, int max_N_quality,
int convert_quality) {

fastq_edit_options_t *b = (fastq_edit_options_t *) malloc(sizeof(fastq_edit_options_t));


b->left_length = left_length;
b->min_left_quality = min_left_quality;
b->max_left_quality = max_left_quality;

b->right_length = right_length;
b->min_right_quality = min_right_quality;
b->max_right_quality = max_right_quality;

b->min_N_quality = min_N_quality;
b->max_N_quality = max_N_quality;

b->convert_quality = convert_quality;

return b;
}

//------------------------------------------------------------------------

void fastq_edit_options_free(fastq_edit_options_t *b) {
if (b) {
free(b);
}
}

//------------------------------------------------------------------------

int fastq_edit(array_list_t *reads, fastq_edit_options_t *options) {
char *sequence, *quality;
fastq_read_t *read;

size_t num_edited = 0;

int left_qual, acc_left_qual;
int right_qual, acc_right_qual, right_start;
int len, start_trim, end_trim, k;
/*qual_on, qual, acc_qual;
int out_of, N_on, num_N;
*/

int read_length, num_items = array_list_size(reads);

// #pragma omp parallel for schedule(dynamic, 100000)
for (size_t i = 0; i < num_items; i++) {
read = array_list_get(i, reads);
sequence = read->sequence;
quality = read->quality;
read_length = read->length;

// init
start_trim = 0;
end_trim = read_length - 1;

// trimming left ?
len = options->left_length;
if (len > 0) {
acc_left_qual = 0;
for (size_t j = 0; j < len; j++) {
acc_left_qual += quality[j];
}
left_qual = round(1.0f * acc_left_qual / len);
if (left_qual < options->min_left_quality || left_qual > options->max_left_quality) {
start_trim = len;
}
}

// trimming right ?
len = options->right_length;
if (len > 0) {
acc_right_qual = 0;
end_trim = read_length - len - 1;
for (size_t j = end_trim; j < read_length; j++) {
acc_right_qual += quality[j];
}
right_qual = round(1.0f * acc_right_qual / len);
if (right_qual >= options->min_right_quality && right_qual <= options->max_right_quality) {
end_trim = read_length - 1;
}
}

if (start_trim != 0 || end_trim != read_length - 1) {

// we must trim
num_edited++;
if (start_trim != 0) {
k = 0;
for (size_t j = start_trim; j <= end_trim; j++) {
sequence[k] = sequence[j];
quality[k] = quality[j];
k++;
}
read->length = k;
} else {
read->length = end_trim;
}
sequence[read->length] = 0;
quality[read->length] = 0;
}
}

return num_edited;
}

//------------------------------------------------------------------------
//------------------------------------------------------------------------
65 changes: 65 additions & 0 deletions c/src/bioformats/fastq/fastq_edit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* fastq_edit.h
*
* Created on: May 23, 2013
* Author: jtarraga
*
*/

#ifndef FASTQ_EDIT_H
#define FASTQ_EDIT_H

#ifdef __cplusplus
extern "C" {
#endif

#include "containers/array_list.h"

#include "fastq_filter.h"

//------------------------------------------------------------------------

#define PHRED_33_TO_64 1
#define PHRED_64_TO_33 2

//------------------------------------------------------------------------

/**
*
*/
typedef struct fastq_edit_options {
// to left trim
int left_length;
int min_left_quality;
int max_left_quality;

// to right trim
int right_length;
int min_right_quality;
int max_right_quality;

// convert to N those nucleotides with quality
int min_N_quality;
int max_N_quality;

// convert quality
int convert_quality;
} fastq_edit_options_t;


fastq_edit_options_t *fastq_edit_options_new(int left_length, int min_left_quality,
int max_left_quality, int right_length,
int min_right_quality, int max_right_quality,
int min_N_quality, int max_N_quality,
int convert_quality);


void fastq_edit_options_free(fastq_edit_options_t *options);

int fastq_edit(array_list_t *reads, fastq_edit_options_t *options);

#ifdef __cplusplus
}
#endif

#endif /* FASTQ_EDIT_H_ */
44 changes: 33 additions & 11 deletions c/src/bioformats/fastq/fastq_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -536,22 +536,44 @@ int fastq_fread_index_positions(fastq_read_t* buffer_reads, int *index_positions
return count;
}

int fastq_fwrite(fastq_read_t* buffer_reads, int num_writes, fastq_file_t *fq_file) {
int count = 0;
//--------------------------------------------------------------------

while (count < num_writes) {
fprintf(fq_file->fd, "%s\n", buffer_reads->id);
fprintf(fq_file->fd, "%s\n", buffer_reads->sequence);
fprintf(fq_file->fd, "+\n");
fprintf(fq_file->fd, "%s\n", buffer_reads->quality);
int fastq_fwrite(array_list_t *reads, fastq_file_t *fq_file) {
fastq_read_t *fq_read;
size_t num_items = array_list_size(reads);

buffer_reads++;
count++;
}
for (size_t i = 0; i < num_items; i++) {
fq_read = array_list_get(i, reads);

return count;
fprintf(fq_file->fd, "%s\n", fq_read->id);
fprintf(fq_file->fd, "%s\n", fq_read->sequence);
fprintf(fq_file->fd, "+\n");
fprintf(fq_file->fd, "%s\n", fq_read->quality);
}

return num_items;
}


//--------------------------------------------------------------------

int fastq_fwrite_buffer(fastq_read_t* buffer_reads, int num_writes, fastq_file_t *fq_file) {
int count = 0;
while (count < num_writes) {
fprintf(fq_file->fd, "%s\n", buffer_reads->id);
fprintf(fq_file->fd, "%s\n", buffer_reads->sequence);
fprintf(fq_file->fd, "+\n");
fprintf(fq_file->fd, "%s\n", buffer_reads->quality);

buffer_reads++;
count++;
}

return count;
}

//-----------------------------------------------------

unsigned int fastq_fcount(fastq_file_t *fq_file) {
return fq_file->num_reads;
}
Expand Down
12 changes: 11 additions & 1 deletion c/src/bioformats/fastq/fastq_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ int fastq_fread_paired_batch_max_size2(fastq_batch_t *fq_batch, unsigned long ma
*/
int fastq_fread_index_positions(fastq_read_t* buffer_reads, int *index_positions, fastq_file_t *fq_file);

/**
* @brief Writes reads stored in a array list to file
* @param reads pointer to array list of fastq reads
* @param fq_file pointer to the fastq file handler
* @return number of written reads
*
* Writes reads stored in a buffer to file using the given file handler
*/
int fastq_fwrite(array_list_t *reads, fastq_file_t *fq_file);

/**
* @brief Writes reads stored in a buffer to file
* @param buffer_reads pointer to fastq reads buffer
Expand All @@ -181,7 +191,7 @@ int fastq_fread_index_positions(fastq_read_t* buffer_reads, int *index_positions
*
* Writes reads stored in a buffer to file using the given file handler
*/
int fastq_fwrite(fastq_read_t* buffer_reads, int num_writes, fastq_file_t *fq_file);
int fastq_fwrite_buffer(fastq_read_t *buffer_reads, int num_writes, fastq_file_t *fq_file);

/**
* @brief Returns the number of reads of a fastq file
Expand Down
Loading

0 comments on commit 34d7a67

Please sign in to comment.