Skip to content

Commit

Permalink
Add nucleotide-count exercise
Browse files Browse the repository at this point in the history
  • Loading branch information
keiravillekode committed Oct 15, 2023
1 parent 218ee45 commit 625f2d6
Show file tree
Hide file tree
Showing 12 changed files with 3,582 additions and 0 deletions.
12 changes: 12 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@
"filtering"
]
},
{
"uuid": "2ef7f926-e6f1-4828-94ed-5aee5930987a",
"slug": "nucleotide-count",
"name": "Nucleotide Count",
"practices": [],
"prerequisites": [],
"difficulty": 3,
"topics": [
"arrays",
"strings"
]
},
{
"slug": "rotational-cipher",
"name": "Rotational Cipher",
Expand Down
23 changes: 23 additions & 0 deletions exercises/practice/nucleotide-count/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Instructions

Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
All known life depends on DNA!

> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
We call the order of these nucleotides in a bit of DNA a "DNA sequence".

We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.

Given a string representing a DNA sequence, count how many of each nucleotide is present.
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.

For example:

```text
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
"INVALID" -> error
```
2 changes: 2 additions & 0 deletions exercises/practice/nucleotide-count/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.o
tests
19 changes: 19 additions & 0 deletions exercises/practice/nucleotide-count/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"authors": [
"keiravillekode"
],
"files": {
"solution": [
"nucleotide_count.asm"
],
"test": [
"nucleotide_count_test.c"
],
"example": [
".meta/example.asm"
]
},
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
"source": "The Calculating DNA Nucleotides_problem at Rosalind",
"source_url": "https://rosalind.info/problems/dna/"
}
60 changes: 60 additions & 0 deletions exercises/practice/nucleotide-count/.meta/example.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
section .text
global nucleotide_counts

; rdi - address of strand string
; rsi - address of counts array
; cl - current character
; r8 - adenine count
; r9 - cytosine count
; r10 - guanine count
; r11 - thymine count
nucleotide_counts:
xor r8, r8 ; Current adenine count is 0
xor r9, r9 ; Current cytosine count is 0
xor r10, r10 ; Current guanine count is 0
xor r11, r11 ; Current thymine count is 0
jmp .read

.adenine:
inc r8
jmp .read

.cytosine:
inc r9
jmp .read

.guanine:
inc r10
jmp .read

.thymine:
inc r11

.read:
mov cl, byte [rdi] ; Load strand character
inc rdi
cmp cl, 'A'
je .adenine
cmp cl, 'C'
je .cytosine
cmp cl, 'G'
je .guanine
cmp cl, 'T'
je .thymine
cmp cl, 0
je .report ; Check if we have reached end of string
mov r8, -1
mov r9, -1
mov r10, -1
mov r11, -1

.report:
mov qword [rsi], r8 ; Report adenine count
mov qword [rsi + 4], r9 ; Report cytosine count
mov qword [rsi + 8], r10 ; Report guanine count
mov qword [rsi + 12], r11 ; Report thymine count
ret

%ifidn __OUTPUT_FORMAT__,elf64
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
25 changes: 25 additions & 0 deletions exercises/practice/nucleotide-count/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[3e5c30a8-87e2-4845-a815-a49671ade970]
description = "empty strand"

[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
description = "can count one nucleotide in single-character input"

[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
description = "strand with repeated nucleotide"

[40a45eac-c83f-4740-901a-20b22d15a39f]
description = "strand with multiple nucleotides"

[b4c47851-ee9e-4b0a-be70-a86e343bd851]
description = "strand with invalid nucleotides"
46 changes: 46 additions & 0 deletions exercises/practice/nucleotide-count/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
AS = nasm

CFLAGS = -g -Wall -Wextra -pedantic -Werror
LDFLAGS =
ASFLAGS = -g -F dwarf -Werror

ifeq ($(shell uname -s),Darwin)
ifeq ($(shell sysctl -n hw.optional.arm64 2>/dev/null),1)
ALL_CFLAGS = -target x86_64-apple-darwin
endif
ALL_LDFLAGS = -Wl,-pie -Wl,-fatal_warnings
ALL_ASFLAGS = -f macho64 --prefix _
else
ALL_LDFLAGS = -pie -Wl,--fatal-warnings
ALL_ASFLAGS = -f elf64
endif

ALL_CFLAGS += -std=c99 -fPIE -m64 $(CFLAGS)
ALL_LDFLAGS += $(LDFLAGS)
ALL_ASFLAGS += $(ASFLAGS)

C_OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
AS_OBJS = $(patsubst %.asm,%.o,$(wildcard *.asm))
ALL_OBJS = $(filter-out example.o,$(C_OBJS) $(AS_OBJS) vendor/unity.o)

CC_CMD = $(CC) $(ALL_CFLAGS) -c -o $@ $<

all: tests
@./$<

tests: $(ALL_OBJS)
@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -o $@ $(ALL_OBJS)

%.o: %.asm
@$(AS) $(ALL_ASFLAGS) -o $@ $<

%.o: %.c
@$(CC_CMD)

vendor/unity.o: vendor/unity.c vendor/unity.h vendor/unity_internals.h
@$(CC_CMD)

clean:
@rm -f *.o vendor/*.o tests

.PHONY: all clean
9 changes: 9 additions & 0 deletions exercises/practice/nucleotide-count/nucleotide_count.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
section .text
global nucleotide_counts
nucleotide_counts:
; Provide your implementation here
ret

%ifidn __OUTPUT_FORMAT__,elf64
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
83 changes: 83 additions & 0 deletions exercises/practice/nucleotide-count/nucleotide_count_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Version: 1.0.0

#include "vendor/unity.h"

enum nucleotide {
ADENINE,
CYTOSINE,
GUANINE,
THYMINE
};

extern void nucleotide_counts(const char* strand, int* counts);

void setUp(void) {
}

void tearDown(void) {
}


void test_empty_strand(void) {
int counts[4];

nucleotide_counts("", counts);
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(0, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_can_count_one_nucleotide_in_single_character_input(void) {
TEST_IGNORE();
int counts[4];

nucleotide_counts("G", counts);
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(1, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_strand_with_repeated_nucleotide(void) {
TEST_IGNORE();
int counts[4];

nucleotide_counts("GGGGGGG", counts);
TEST_ASSERT_EQUAL_INT(0, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(0, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(7, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(0, counts[THYMINE]);
}

void test_strand_with_multiple_nucleotides(void) {
TEST_IGNORE();
int counts[4];

nucleotide_counts("AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC", counts);
TEST_ASSERT_EQUAL_INT(20, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(12, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(17, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(21, counts[THYMINE]);
}

void test_strand_with_invalid_nucleotides(void) {
TEST_IGNORE();
int counts[4];

nucleotide_counts("AGXXACT", counts);
TEST_ASSERT_EQUAL_INT(-1, counts[ADENINE]);
TEST_ASSERT_EQUAL_INT(-1, counts[CYTOSINE]);
TEST_ASSERT_EQUAL_INT(-1, counts[GUANINE]);
TEST_ASSERT_EQUAL_INT(-1, counts[THYMINE]);
}

int main(void) {
UNITY_BEGIN();
RUN_TEST(test_empty_strand);
RUN_TEST(test_can_count_one_nucleotide_in_single_character_input);
RUN_TEST(test_strand_with_repeated_nucleotide);
RUN_TEST(test_strand_with_multiple_nucleotides);
RUN_TEST(test_strand_with_invalid_nucleotides);
return UNITY_END();
}
Loading

0 comments on commit 625f2d6

Please sign in to comment.