Skip to content

Commit

Permalink
Merge pull request #22 from CarolineMorton/validate-codelist
Browse files Browse the repository at this point in the history
Validate codelist
  • Loading branch information
em-baggie authored Jan 7, 2025
2 parents 4ad2f66 + 115a2de commit 064f5a4
Show file tree
Hide file tree
Showing 7 changed files with 734 additions and 28 deletions.
2 changes: 2 additions & 0 deletions rust/codelist-validator-rs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

.codegpt
7 changes: 6 additions & 1 deletion rust/codelist-validator-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ description = "Validation library for medical code lists"

[dependencies]
codelist-rs = { path = "../codelist-rs" }
serde = { version = "1.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
thiserror = { version = "2.0.9" }
thiserror-ext = { version = "0.2.1" }
regex = "1.11.1"


25 changes: 25 additions & 0 deletions rust/codelist-validator-rs/src/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/// Enum to represent the different types of errors that can occur in the codelist-validator library
///
/// * `InvalidCodeLength` - An error that occurs when the code is not the correct length
/// * `ParseIntError` - An error that occurs when the code is not composed of numerical characters
/// * `InvalidCodeContents` - An error that occurs when the code does not match the expected format
#[derive(Debug, thiserror::Error, thiserror_ext::Construct, Clone)]
pub enum CodeListValidatorError {
#[error("Code {code} is an invalid length for type {codelist_type}. Reason: {reason}")]
InvalidCodeLength { code: String, reason: String, codelist_type: String },

#[error("Code {code} is not composed of all numerical characters for type {codelist_type}. Reason: {reason}")]
#[construct(skip)]
ParseIntError {
code: String,
reason: String,
codelist_type: String,
},

#[error("Code {code} contents is invalid for type {codelist_type}. Reason: {reason}")]
InvalidCodeContents { code: String, reason: String, codelist_type: String },

#[error("Some codes in the list are invalid. Details: {reasons:?}")]
InvalidCodelist { reasons: Vec<String> },
}
243 changes: 234 additions & 9 deletions rust/codelist-validator-rs/src/icd10_validator.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,247 @@
use crate::codelist::CodeList;
use codelist_rs::codelist::CodeList;
use regex::Regex;
use std::sync::LazyLock;
use crate::errors::CodeListValidatorError;

/// ICD10 code regex pattern
static REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^[A-Z]\d{2}(X|(\.\d{1,3})?|\d{1,4})?$").expect("Unable to create regex")
});

/// ICD10 validator trait
///
/// `validate_code`: validates a single ICD10 code
/// `validate_all_code`: validates all ICD10 codes in the codelist
pub trait ICD10Validator {
fn validate_code(&self, code: &str) -> bool; // for 1 code
fn validate_all_code(&self);
fn validate_code(&self, code: &str) -> Result<(), CodeListValidatorError>; // for 1 code
fn validate_all_code(&self) -> Result<(), CodeListValidatorError>;
}

impl ICD10Validator for CodeList {
fn validate_code(&self, code: &str) -> bool {
//TODO
true
/// Validate the form of a single ICD10 code
///
/// The rules are:
/// - The code must be 7 characters or less
/// - The first character must be a letter
/// - The second and third characters must be numbers
/// - The fourth character must be a dot, or a number or X
/// - If the fourth character is a dot, there must be at least 1 number after the dot
/// - If the fourth character is a X, there are no further characters
/// - The fifth to seventh characters must be numbers if present
///
/// # Arguments
///
/// * `code`: the code to validate
///
/// # Returns
///
/// * `Result<(), CodeListValidatorError>`: unit type if the code is valid, otherwise an error containing the code and the reason the code is invalid
fn validate_code(&self, code: &str) -> Result<(), CodeListValidatorError> {
if code.len() > 7 {
return Err(CodeListValidatorError::invalid_code_length(code, "Code is greater than 7 characters in length", self.codelist_type.to_string()));
}

let re = &REGEX;

if !re.is_match(code) {
return Err(CodeListValidatorError::invalid_code_contents(code, "Code does not match the expected format", self.codelist_type.to_string()));
}
Ok(())
}

/// Validate all ICD10 codes in the codelist
///
/// # Returns
///
/// * `Result<(), CodeListValidatorError>`: unit type if all codes are valid in the codelist, otherwise an error containing all invalid codes and the reason for being invalid
fn validate_all_code(&self) -> Result<(), CodeListValidatorError> {
let mut reasons = Vec::new();

for code_entry in self.entries.iter() {
let code = &code_entry.code;
if let Err(err) = self.validate_code(code) {
let error_reason = format!("{}", err);
reasons.push(error_reason);
}
}

if reasons.is_empty() {
Ok(())
} else {
Err(CodeListValidatorError::invalid_codelist(reasons))
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use codelist_rs::metadata::{ Metadata, MetadataSource };
use codelist_rs::codelist::CodeList;
use codelist_rs::types::CodeListType;
use codelist_rs::errors::CodeListError;

// Helper function to create test metadata
fn create_test_metadata() -> Metadata {
Metadata {
source: MetadataSource::ManuallyCreated,
authors: Some(vec!["Caroline Morton".to_string()]),
version: Some("2024-12-19".to_string()),
description: Some("A test codelist".to_string()),
}
}

// Helper function to create a test codelist with two entries, default options and test metadata
fn create_test_codelist() -> Result<CodeList, CodeListError> {
let codelist = CodeList::new(CodeListType::ICD10, create_test_metadata(), None);
Ok(codelist)
}

#[test]
fn test_validate_code_with_valid_code() -> Result<(), CodeListError> {;
let codelist = create_test_codelist()?;
let code = "A009";
assert!(codelist.validate_code(code).is_ok());
Ok(())
}

#[test]
fn test_validate_code_with_invalid_code_length_too_long() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A009000000";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A009000000 is an invalid length for type ICD10. Reason: Code is greater than 7 characters in length");
Ok(())
}

fn validate_all_code(&self) {
#[test]
fn test_validate_invalid_code_first_character_not_a_letter() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "1009";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code 1009 contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_second_character_not_a_number() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "AA09";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code AA09 contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_third_character_not_a_number() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A0A9";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A0A9 contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_fourth_character_not_a_dot_number_or_x() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A00A";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A00A contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_no_number_after_fourth_character_dot() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A00.A";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A00.A contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_characters_after_fourth_character_x() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A00X12";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A00X12 contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_invalid_code_fifth_to_seventh_characters_not_numbers() -> Result<(), CodeListError> {
let codelist = create_test_codelist()?;
let code = "A00.4AA";
let error = codelist.validate_code(code).unwrap_err().to_string();
assert_eq!(error, "Code A00.4AA contents is invalid for type ICD10. Reason: Code does not match the expected format");
Ok(())
}

#[test]
fn test_validate_codelist_with_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("A54".to_string(), "Gonorrhoea".to_string())?;
codelist.add_entry("A37".to_string(), "Pertussis".to_string())?;
codelist.add_entry("A05".to_string(), "Measles".to_string())?;
codelist.add_entry("B74.0".to_string(), "Lymphatic filariasis".to_string())?;
codelist.add_entry("N40".to_string(), "Benign prostatic hypertrophy".to_string())?;
codelist.add_entry("M10".to_string(), "Gout".to_string())?;
codelist.add_entry("Q90".to_string(), "Down Syndrome".to_string())?;
codelist.add_entry("K02".to_string(), "Dental caries".to_string())?;
assert!(codelist.validate_all_code().is_ok());
Ok(())
}

#[test]
fn test_validate_codelist_with_all_invalid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("A009000000".to_string(), "Gonorrhoea".to_string())?;
codelist.add_entry("1009".to_string(), "Pertussis".to_string())?;
codelist.add_entry("AA09".to_string(), "Measles".to_string())?;
codelist.add_entry("A0A9".to_string(), "Lymphatic filariasis".to_string())?;
codelist.add_entry("A00A".to_string(), "Benign prostatic hypertrophy".to_string())?;
codelist.add_entry("A00.A".to_string(), "Gout".to_string())?;
codelist.add_entry("A00X12".to_string(), "Down Syndrome".to_string())?;
codelist.add_entry("A00.4AA".to_string(), "Dental caries".to_string())?;
let error = codelist.validate_all_code().unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code A009000000 is an invalid length for type ICD10. Reason: Code is greater than 7 characters in length"));
assert!(error_string.contains("Code 1009 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code AA09 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A0A9 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00A contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00.A contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00X12 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00.4AA contents is invalid for type ICD10. Reason: Code does not match the expected format"));

assert!(matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 8));
Ok(())
}

#[test]
fn test_validate_codelist_with_mixed_invalid_and_valid_codes() -> Result<(), CodeListError> {
let mut codelist = create_test_codelist()?;
codelist.add_entry("A54".to_string(), "Gonorrhoea".to_string())?;
codelist.add_entry("1009".to_string(), "Pertussis".to_string())?;
codelist.add_entry("A05".to_string(), "Measles".to_string())?;
codelist.add_entry("A0A9".to_string(), "Lymphatic filariasis".to_string())?;
codelist.add_entry("N40".to_string(), "Benign prostatic hypertrophy".to_string())?;
codelist.add_entry("A00.A".to_string(), "Gout".to_string())?;
codelist.add_entry("Q90".to_string(), "Down Syndrome".to_string())?;
codelist.add_entry("A00.4AA".to_string(), "Dental caries".to_string())?;
let error = codelist.validate_all_code().unwrap_err();
let error_string = error.to_string();

assert!(error_string.contains("Some codes in the list are invalid. Details:"));
assert!(error_string.contains("Code 1009 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A0A9 contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00.A contents is invalid for type ICD10. Reason: Code does not match the expected format"));
assert!(error_string.contains("Code A00.4AA contents is invalid for type ICD10. Reason: Code does not match the expected format"));

assert!(matches!(error, CodeListValidatorError::InvalidCodelist { reasons } if reasons.len() == 4));
Ok(())
}
}

//TODO: tests
// cargo test --test icd10_validator
3 changes: 3 additions & 0 deletions rust/codelist-validator-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
extern crate core;

pub mod opcs_validator;
pub mod snomed_validator;
pub mod icd10_validator;
pub mod errors;
Loading

0 comments on commit 064f5a4

Please sign in to comment.