From d4b3a2e625b2c4baa69cd4fea86b23366926cf8c Mon Sep 17 00:00:00 2001 From: Dario Date: Tue, 11 Jun 2024 14:03:21 +0200 Subject: [PATCH] llm-based documentation --- docker/adb_docker/Dockerfile | 6 +- docker/bigg_docker/Dockerfile | 7 +- docker/java_docker/Dockerfile | 13 +- src/main/java/edu/ucsd/sbrg/bigg/BiGGId.java | 311 +++++++++++------ .../edu/ucsd/sbrg/bigg/ModelPolisher.java | 209 +++++++----- .../sbrg/bigg/annotation/BiGGAnnotation.java | 193 ++++++----- .../bigg/annotation/CVTermAnnotation.java | 39 ++- .../annotation/CompartmentAnnotation.java | 20 +- .../annotation/GeneProductAnnotation.java | 103 ++++-- .../sbrg/bigg/annotation/ModelAnnotation.java | 50 ++- .../bigg/annotation/ReactionAnnotation.java | 100 ++++-- .../bigg/annotation/SpeciesAnnotation.java | 93 ++--- .../bigg/polishing/CompartmentPolishing.java | 43 ++- .../bigg/polishing/GeneProductPolishing.java | 24 ++ .../sbrg/bigg/polishing/ModelPolishing.java | 79 ++++- .../sbrg/bigg/polishing/PolishingUtils.java | 18 +- .../bigg/polishing/ReactionPolishing.java | 283 ++++++++++++---- .../sbrg/bigg/polishing/SBMLPolisher.java | 85 +++-- .../sbrg/bigg/polishing/SpeciesPolishing.java | 44 ++- .../sbrg/bigg/polishing/UnitPolishing.java | 206 +++++++++--- .../java/edu/ucsd/sbrg/db/AnnotateDB.java | 24 +- src/main/java/edu/ucsd/sbrg/db/BiGGDB.java | 318 ++++++++++++------ src/main/java/edu/ucsd/sbrg/db/DBConfig.java | 46 ++- .../edu/ucsd/sbrg/miriam/CompactEntry.java | 7 +- .../edu/ucsd/sbrg/miriam/CompactResource.java | 77 ++++- .../java/edu/ucsd/sbrg/miriam/Entries.java | 90 +++-- .../java/edu/ucsd/sbrg/miriam/Namespace.java | 30 +- .../java/edu/ucsd/sbrg/miriam/Registry.java | 227 +++++++++---- .../edu/ucsd/sbrg/miriam/RegistryParser.java | 32 +- .../ucsd/sbrg/miriam/RegistryProvider.java | 19 +- .../edu/ucsd/sbrg/miriam/models/Miriam.java | 33 +- .../edu/ucsd/sbrg/util/CombineArchive.java | 77 +++-- .../java/edu/ucsd/sbrg/util/GPRParser.java | 112 ++++-- .../java/edu/ucsd/sbrg/util/SBMLUtils.java | 49 ++- .../edu/ucsd/sbrg/util/UpdateListener.java | 59 +++- .../bigg/polishing/ModelPolishingTest.java | 5 + 36 files changed, 2260 insertions(+), 871 deletions(-) diff --git a/docker/adb_docker/Dockerfile b/docker/adb_docker/Dockerfile index e93c19ce..40e75e95 100644 --- a/docker/adb_docker/Dockerfile +++ b/docker/adb_docker/Dockerfile @@ -1,6 +1,9 @@ FROM postgres:11.4 -MAINTAINER ktrivedi@cs.iitr.ac.in +# Set the maintainer label to the email of the responsible party +LABEL maintainer="ktrivedi@cs.iitr.ac.in" + +# Add a new user 'adb' with no password and no additional info RUN adduser --disabled-password --gecos '' adb RUN apt-get update && \ @@ -13,3 +16,4 @@ RUN apt-get update && \ COPY ./scripts/restore_adb.sh /docker-entrypoint-initdb.d/restore_adb.sh EXPOSE 5432 + diff --git a/docker/bigg_docker/Dockerfile b/docker/bigg_docker/Dockerfile index ba6e02b0..181cbdc8 100644 --- a/docker/bigg_docker/Dockerfile +++ b/docker/bigg_docker/Dockerfile @@ -1,6 +1,10 @@ FROM postgres:9.6.2 -MAINTAINER zajac.thomas1992@gmail.com +# Set the maintainer label to the email of the responsible party +LABEL maintainer="zajac.thomas1992@gmail.com" + +# Update the package lists, install curl, create a directory for the database dump, +# download the database dump, and clean up the package lists directory RUN apt-get update && \ apt-get install curl -y && \ # Create directory '/bigg_database_dump/' and download bigg_database dump as 'database.dump' @@ -11,3 +15,4 @@ RUN apt-get update && \ COPY ./scripts/restore_biggdb.sh /docker-entrypoint-initdb.d/restore_biggdb.sh EXPOSE 5432 + diff --git a/docker/java_docker/Dockerfile b/docker/java_docker/Dockerfile index b24a79d8..4d416238 100644 --- a/docker/java_docker/Dockerfile +++ b/docker/java_docker/Dockerfile @@ -1,8 +1,17 @@ FROM openjdk:11-slim -MAINTAINER zajac.thomas1992@gmail.com +# Set the maintainer email for the image +LABEL maintainer="zajac.thomas1992@gmail.com" + +# Copy the ModelPolisher jar file into the root directory of the image COPY ModelPolisher-2.1.jar / -# from https://stackoverflow.com/a/23961144 + +# Create necessary directories for Java preferences and set permissions +# Reference: https://stackoverflow.com/a/23961144 RUN mkdir -p /.java/.systemPrefs && mkdir /.java/.userPrefs && chmod -R 777 /.java + +# Set the container to run the ModelPolisher jar file as the default executable ENTRYPOINT ["java", "-jar", "/ModelPolisher-2.1.jar"] + +# Set the default command to display the help message of ModelPolisher CMD ["--help"] diff --git a/src/main/java/edu/ucsd/sbrg/bigg/BiGGId.java b/src/main/java/edu/ucsd/sbrg/bigg/BiGGId.java index 8e37bf3b..a1654662 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/BiGGId.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/BiGGId.java @@ -10,9 +10,22 @@ import de.zbit.util.ResourceManager; + /** - * This class stores the information from BiGG identifiers and provides methods - * to access all components of the identifier. + * Represents a BiGG identifier used to uniquely identify various biological entities + * such as reactions, metabolites, and genes within the BiGG database. This class provides methods to parse, validate, + * and manipulate BiGG IDs according to the standards specified in the BiGG database. + * + * The BiGG ID typically consists of several parts: + * - A prefix indicating the type of entity (e.g., 'R' for reaction, 'M' for metabolite, 'G' for gene). + * - An abbreviation which is the main identifier part. + * - A compartment code that specifies the cellular location of the metabolite. + * - A tissue code that indicates the tissue specificity of the identifier, applicable in multicellular organisms. + * + * This class also includes methods to create BiGG IDs from strings, validate them against known patterns, and + * extract specific parts like the compartment code. It supports handling special cases and correcting common + * formatting issues in BiGG IDs. + * * For a formal description of the structure of BiGG ids see the proposed * @@ -48,9 +61,7 @@ public class BiGGId { */ private String tissueCode; - /** - * - */ + enum IDPattern { ATPM("[Aa][Tt][Pp][Mm]"), @@ -61,79 +72,82 @@ enum IDPattern { UNIVERSAL("^(?[RMG])_(?[a-zA-Z0-9][a-zA-Z0-9_]+?)(?:_(?[a-z][a-z0-9]?))?" + "(?:_(?[A-Z][A-Z0-9]?))?$"); - /** - * - */ + private final Pattern pattern; - /** - * @param pattern - */ + IDPattern(String pattern) { this.pattern = Pattern.compile(pattern); } - /** - * @return - */ + Pattern get() { return pattern; } } - /** - * - */ + public BiGGId() { super(); } - /** - * @param id - */ + public BiGGId(String id) { this(); parseBiGGId(id); } - /** - * @param prefix - * @param abbreviation - * @param compartmentCode - * @param tissueCode - */ + public BiGGId(String prefix, String abbreviation, String compartmentCode, String tissueCode) { this(); String id = toBiGGId(prefix, abbreviation, compartmentCode, tissueCode); parseBiGGId(id); } - + /** + * Creates a BiGG ID for a metabolite with default correction behavior. + * + * @param id The raw metabolite ID string. + * @return An Optional containing the BiGGId if the ID is non-empty and valid, or an empty Optional otherwise. + */ public static Optional createMetaboliteId(String id) { return createMetaboliteId(id, true); } + /** + * Creates a BiGG ID for a metabolite based on the provided string identifier. + * This method handles the correction and standardization of the metabolite ID according to BiGG database standards. + * + * @param id The raw metabolite ID string. + * @param correct A boolean flag indicating whether to correct the ID to conform to BiGG standards. + * @return An Optional containing the BiGGId if the ID is non-empty and valid, or an empty Optional otherwise. + */ public static Optional createMetaboliteId(String id, boolean correct) { + // Return empty if the input ID is empty if (id.isEmpty()) { return Optional.empty(); } + // Fix the compartment code in the ID id = fixCompartmentCode(id); + // Correct the ID to conform to BiGG standards if required if (correct) { id = makeBiGGConform(id); + // Remove leading underscore if present if (id.startsWith("_")) { - id = id.substring(1); + id = id.substring(1); } + // Standardize the prefix for metabolites from 'm_' to 'M_' if (id.startsWith("m_")) { - id = id.replaceAll("^m_", "M_"); + id = id.replaceAll("^m_", "M_"); } else if (!id.startsWith("M_")) { - id = "M_" + id; + id = "M_" + id; } } - // handle one letter abbreviation metabolites like 'h' which are not in accord with the specification, but still + // Special handling for one-letter abbreviation metabolites not conforming to the specification, but still // present in BiGG Matcher metaboliteSpecialCase = IDPattern.METABOLITE_SPECIAL.get().matcher(id); if (metaboliteSpecialCase.matches()) { @@ -148,11 +162,29 @@ public static Optional createMetaboliteId(String id, boolean correct) { } + /** + * Creates a BiGG ID for a gene using the default correction behavior. + * + * @param id The raw gene ID string. + * @return An Optional containing the BiGGId if the ID is non-empty and valid, or an empty Optional otherwise. + */ public static Optional createGeneId(String id) { return createGeneId(id, true); } - + /** + * Creates a BiGG ID for a gene, with an option to correct the ID to conform to BiGG standards. + * + * This method first checks if the provided ID is empty, returning an empty Optional if true. + * If the 'correct' parameter is true, the ID is processed to conform to BiGG standards: + * - Leading underscores are removed. + * - The prefix "g_" is replaced with "G_" to standardize the gene ID format. + * - If the ID does not start with "G_", the prefix "G_" is prepended. + * + * @param id The raw gene ID string. + * @param correct A boolean flag indicating whether to correct the ID to conform to BiGG standards. + * @return An Optional containing the BiGGId if the ID is non-empty and valid, or an empty Optional otherwise. + */ public static Optional createGeneId(String id, boolean correct) { if (id.isEmpty()) { return Optional.empty(); @@ -160,39 +192,71 @@ public static Optional createGeneId(String id, boolean correct) { if (correct) { id = makeBiGGConform(id); if (id.startsWith("_")) { - id = id.substring(1); + id = id.substring(1); } if (id.startsWith("g_")) { - id = id.replaceAll("^g_", "G_"); + id = id.replaceAll("^g_", "G_"); } else if (!id.startsWith("G_")) { - id = "G_" + id; + id = "G_" + id; } } return Optional.of(new BiGGId(id)); } - + /** + * Creates a BiGG ID for a reaction based on the provided string identifier. + * This method handles the prefix stripping and checks if the reaction is a pseudo-reaction. + * Depending on these checks, it delegates to the overloaded createReactionId method with appropriate flags. + * + * @param id The raw reaction ID string. + * @return An Optional containing the BiGGId if the ID is non-empty and valid, or an empty Optional otherwise. + */ public static Optional createReactionId(String id) { String prefixStripped = ""; + // Strip the prefix if it starts with 'R_' or 'r_' if (id.startsWith("R_") || id.startsWith("r_")) { prefixStripped = id.substring(2); } + // Check if the original ID is a pseudo-reaction if (isPseudo(id)) { return createReactionId(id, true, true); - } else if (!prefixStripped.isEmpty() && isPseudo(prefixStripped)) { + } + // Check if the ID without the prefix is a pseudo-reaction + else if (!prefixStripped.isEmpty() && isPseudo(prefixStripped)) { return createReactionId(prefixStripped, true, true); - } else { + } + // Handle normal reaction ID + else { return createReactionId(id, true, false); } } + /** + * Checks if the given reaction ID corresponds to a pseudo-reaction. + * Pseudo-reactions are predefined patterns that do not correspond to actual biochemical reactions + * but are used for modeling purposes. This method checks if the reaction ID matches any of the + * predefined pseudo-reaction patterns such as ATP maintenance (ATPM), biomass, or generic pseudo-reactions. + * + * @param reactionId The reaction ID to be checked. + * @return true if the reaction ID matches any pseudo-reaction pattern, false otherwise. + */ private static boolean isPseudo(String reactionId) { - return IDPattern.ATPM.get().matcher(reactionId).matches() || IDPattern.BIOMASS.get().matcher(reactionId).matches() - || IDPattern.PSEUDO.get().matcher(reactionId).matches(); + return IDPattern.ATPM.get().matcher(reactionId).matches() || + IDPattern.BIOMASS.get().matcher(reactionId).matches() || + IDPattern.PSEUDO.get().matcher(reactionId).matches(); } + /** + * Creates a BiGG ID for a reaction based on the provided string identifier. + * The method can correct the ID to conform to BiGG standards and adjust the prefix based on whether it is a pseudo-reaction. + * + * @param id The raw reaction ID string. + * @param correct If true, the ID will be corrected to conform to BiGG standards. + * @param isPseudo If true, the ID is treated as a pseudo-reaction, affecting the prefix handling. + * @return An Optional containing the BiGGId if the ID is non-empty, or an empty Optional if the ID is empty. + */ public static Optional createReactionId(String id, boolean correct, boolean isPseudo) { if (id.isEmpty()) { return Optional.empty(); @@ -200,47 +264,67 @@ public static Optional createReactionId(String id, boolean correct, bool if (correct) { id = makeBiGGConform(id); if (id.startsWith("_")) { - id = id.substring(1); + id = id.substring(1); } if (!isPseudo && id.startsWith("r_")) { - id = id.replaceAll("^r_", "R_"); + id = id.replaceAll("^r_", "R_"); } else if (!isPseudo && !id.startsWith("R_")) { - id = "R_" + id; + id = "R_" + id; } } return Optional.of(new BiGGId(id)); } + /** + * Transforms a given identifier into a format conforming to BiGG ID standards. + * This method applies several transformations to ensure the ID adheres to the required naming conventions: + * - Prefixes IDs starting with a digit with an underscore. + * - Replaces certain characters with specific strings or patterns to avoid conflicts in naming conventions. + * - Extracts and reformats compartment codes enclosed in parentheses or brackets. + * - Removes trailing parts of IDs that are marked as copies. + * - Ensures that only alphanumeric characters and underscores are retained, replacing all other characters with underscores. + * - Trims any trailing underscores from the final ID. + * + * @param id The original identifier that needs to be transformed. + * @return A string representing the transformed identifier conforming to BiGG standards. + */ private static String makeBiGGConform(String id) { + // Prefix the ID with an underscore if it starts with a digit if (Character.isDigit(id.charAt(0))) { id = "_" + id; } + // Replace problematic characters with specific strings id = id.replaceAll("[-/]", "__").replaceAll("\\.", "__SBML_DOT__").replaceAll("\\(", "_LPAREN_") - .replaceAll("\\)", "_RPAREN_").replaceAll("\\[", "_LBRACKET_").replaceAll("]", "_RBRACKET_"); + .replaceAll("\\)", "_RPAREN_").replaceAll("\\[", "_LBRACKET_").replaceAll("]", "_RBRACKET_"); + // Extract and reformat compartment codes enclosed in parentheses Pattern parenCompartment = Pattern.compile("_LPAREN_(?.*?)_RPAREN_"); Matcher parenMatcher = parenCompartment.matcher(id); if (parenMatcher.find()) { id = id.replaceAll(parenCompartment.toString(), "_" + parenMatcher.group("paren")); } + // Extract and reformat compartment codes enclosed in brackets Pattern bracketCompartment = Pattern.compile("_LBRACKET_(?.*)_RBRACKET_"); Matcher bracketMatcher = bracketCompartment.matcher(id); if (bracketMatcher.find()) { id = id.replaceAll(bracketCompartment.toString(), "_" + bracketMatcher.group("bracket")); } + // Remove the '_copy' suffix and any trailing digits if (id.matches(".*_copy\\d*")) { id = id.substring(0, id.lastIndexOf('_')); } + // Retain only alphanumeric characters and underscores, replacing all other characters Pattern alphaNum = Pattern.compile("[a-zA-Z0-9_]"); StringBuilder builder = new StringBuilder(id.length()); for (char ch : id.toCharArray()) { if (alphaNum.matcher(String.valueOf(ch)).matches()) { - builder.append(ch); + builder.append(ch); } else { - builder.append("_"); + builder.append("_"); } } id = builder.toString(); + // Remove any trailing underscores if (id.endsWith("_")) { id = id.substring(0, id.length() - 1); } @@ -257,18 +341,26 @@ public static boolean isValid(String queryId) { /** - * @param id - * @return + * Corrects the format of compartment codes in the given identifier string. + * This method is specifically designed to handle cases where the compartment code is incorrectly + * formatted with square brackets (e.g., [cc]) instead of the expected underscore format (e.g., _cc_). + * + * @param id The identifier string potentially containing incorrectly formatted compartment codes. + * @return The identifier string with corrected compartment code format. */ private static String fixCompartmentCode(String id) { - // Workaround for models with wrong compartment code format [cc] instead of _cc + // Define a pattern to identify and extract compartment codes enclosed in square brackets Pattern rescueCompartment = Pattern.compile(".*\\[(?[a-z][a-z0-9]?)\\]"); Matcher rescueMatcher = rescueCompartment.matcher(id); + // Check if the pattern matches and process accordingly if (rescueMatcher.matches()) { + // Extract the compartment code from the matcher String compartmentCode = rescueMatcher.group("code"); + // Replace the bracketed compartment code with the underscore format id = id.replaceAll("\\[[a-z][a-z0-9]?\\]", "_" + compartmentCode + "_"); + // Remove trailing underscore if present if (id.endsWith("_")) { - id = id.substring(0, id.length() - 1); + id = id.substring(0, id.length() - 1); } } return id; @@ -276,26 +368,33 @@ private static String fixCompartmentCode(String id) { /** - * @param id - * the identifier to be parsed into a bigg_id. + * Parses the given identifier into a structured BiGG ID. This method first checks if an ID + * corresponding to a reaction might in fact identify a pseudo-reaction. If it is a pseudo-reaction, + * it is processed accordingly. Otherwise, it checks against the UNIVERSAL pattern to handle + * normal BiGG IDs. If neither condition is met, the ID is handled as a special case. + * + * @param id the identifier to be parsed into a BiGG ID. */ private void parseBiGGId(String id) { Matcher matcher = IDPattern.UNIVERSAL.get().matcher(id); - // Handle PseudoReaction with wrongfully added prefix correctly - boolean isPseudoReaction = false; - if (id.startsWith("R_")) { - isPseudoReaction = isPseudo(id); - } + // Determine if the ID is a pseudo-reaction, which are special cases like ATP maintenance or biomass reactions + boolean isPseudoReaction = id.startsWith("R_") && isPseudo(id); if (!isPseudoReaction && matcher.matches()) { + // If it matches the universal pattern and is not a pseudo-reaction, handle it as a normal BiGG ID handleNormalId(matcher); } else { + // If it does not match or is a pseudo-reaction, handle it according to its special characteristics handleSpecialCases(id); } } /** - * @param matcher + * Processes a Matcher object that has matched a BiGG ID against the UNIVERSAL pattern. + * This method extracts the components of the BiGG ID from the Matcher and sets the corresponding fields + * in the BiGGId object. + * + * @param matcher The Matcher object containing the groups corresponding to the components of the BiGG ID. */ private void handleNormalId(Matcher matcher) { String prefix = matcher.group("prefix"); @@ -310,7 +409,12 @@ private void handleNormalId(Matcher matcher) { /** - * @param id + * Handles special cases for BiGG ID parsing where standard parsing fails. + * This method checks the given ID against several predefined patterns to determine + * if the ID corresponds to pseudoreactions, biomass, ATP maintenance (ATPM), or compartment identifiers. + * Depending on the match, it reformats the ID or logs a warning if no known pattern is matched. + * + * @param id The BiGG ID string to be evaluated and handled for special cases. */ private void handleSpecialCases(String id) { Matcher pseudoreactionMatcher = IDPattern.PSEUDO.get().matcher(id); @@ -336,6 +440,17 @@ private void handleSpecialCases(String id) { } + /** + * Extracts the compartment code from a given identifier if it matches the expected pattern. + * The expected pattern allows an optional prefix "C_" followed by one or two lowercase letters, + * optionally followed by a digit. If the identifier does not match this pattern, an empty + * {@link Optional} is returned. If the identifier starts with "C_", this prefix is removed + * before returning the compartment code. + * + * @param id The identifier from which to extract the compartment code. + * @return An {@link Optional} containing the compartment code if the identifier matches the pattern, + * otherwise an empty {@link Optional}. + */ public static Optional extractCompartmentCode(String id) { if (!Pattern.compile("(C_)?[a-z][a-z0-9]?").matcher(id).matches()) { return Optional.empty(); @@ -365,21 +480,21 @@ public boolean equals(Object obj) { BiGGId other = (BiGGId) obj; if (abbreviation == null) { if (other.abbreviation != null) { - return false; + return false; } } else if (!abbreviation.equals(other.abbreviation)) { return false; } if (compartmentCode == null) { if (other.compartmentCode != null) { - return false; + return false; } } else if (!compartmentCode.equals(other.compartmentCode)) { return false; } if (prefix == null) { if (other.prefix != null) { - return false; + return false; } } else if (!prefix.equals(other.prefix)) { return false; @@ -391,9 +506,7 @@ public boolean equals(Object obj) { } - /** - * @return the abbreviation - */ + public String getAbbreviation() { return isSetAbbreviation() ? abbreviation : ""; } @@ -420,9 +533,7 @@ public void setAbbreviation(String abbreviation) { } - /** - * @return the compartmentCode - */ + public String getCompartmentCode() { return isSetCompartmentCode() ? compartmentCode : ""; } @@ -442,9 +553,7 @@ public void setCompartmentCode(String compartmentCode) { } - /** - * @return the prefix - */ + public String getPrefix() { return isSetPrefix() ? prefix : ""; } @@ -470,9 +579,7 @@ public void setPrefix(String prefix) { } - /** - * @return the tissueCode - */ + public String getTissueCode() { return isSetTissueCode() ? tissueCode : ""; } @@ -508,63 +615,62 @@ public int hashCode() { } - /** - * @return - */ + public boolean isSetAbbreviation() { return abbreviation != null; } - /** - * @return - */ + public boolean isSetCompartmentCode() { return compartmentCode != null; } - /** - * @return - */ + public boolean isSetPrefix() { return prefix != null; } - /** - * @return - */ + public boolean isSetTissueCode() { return tissueCode != null; } /** - * Generates an actual BiGG id for this object. + * Generates a BiGG ID for this object based on its properties. The BiGG ID is constructed by concatenating + * the available properties (prefix, abbreviation, compartment code, and tissue code) in that order, each separated + * by an underscore. Each property is included only if it is set (i.e., not null). * - * @return + * @return A string representing the BiGG ID, constructed by concatenating the set properties with underscores. + * If none of the properties are set, returns an empty string. */ public String toBiGGId() { StringBuilder sb = new StringBuilder(); + // Append prefix if set if (isSetPrefix()) { sb.append(getPrefix()); } + // Append abbreviation if set, prefixed by an underscore if not the first element if (isSetAbbreviation()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(getAbbreviation()); } + // Append compartment code if set, prefixed by an underscore if not the first element if (isSetCompartmentCode()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(getCompartmentCode()); } + // Append tissue code if set, prefixed by an underscore if not the first element if (isSetTissueCode()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(getTissueCode()); } @@ -573,30 +679,35 @@ public String toBiGGId() { /** - * Generates an actual BiGG id for this object. + * Constructs a BiGG ID using the provided components. Each component is separated by an underscore. + * If a component is null or empty, it is omitted from the final ID. * - * @return + * @param prefix The first part of the BiGG ID, typically representing the type of entity (e.g., 'R', 'M', 'G'). + * @param abbreviation The main identifier, usually an alphanumeric string that uniquely describes the entity. + * @param compartmentCode A code indicating the compartmentalization, relevant for compartmentalized entities. + * @param tissueCode A code representing the tissue specificity, applicable to certain biological models. + * @return A string representing the constructed BiGG ID, formed by concatenating the provided components with underscores. */ public String toBiGGId(String prefix, String abbreviation, String compartmentCode, String tissueCode) { StringBuilder sb = new StringBuilder(); - if (prefix != null && !prefix.equals("")) { + if (prefix != null && !prefix.isEmpty()) { sb.append(prefix); } - if (abbreviation != null && !abbreviation.equals("")) { + if (abbreviation != null && !abbreviation.isEmpty()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(abbreviation); } - if (compartmentCode != null && !compartmentCode.equals("")) { + if (compartmentCode != null && !compartmentCode.isEmpty()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(compartmentCode); } - if (tissueCode != null && !tissueCode.equals("")) { + if (tissueCode != null && !tissueCode.isEmpty()) { if (sb.length() > 0) { - sb.append('_'); + sb.append('_'); } sb.append(tissueCode); } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/ModelPolisher.java b/src/main/java/edu/ucsd/sbrg/bigg/ModelPolisher.java index b8f633a0..43014f49 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/ModelPolisher.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/ModelPolisher.java @@ -69,6 +69,24 @@ import edu.ucsd.sbrg.util.UpdateListener; /** + * The ModelPolisher class is the entry point of this application. + * It extends the Launcher class and provides functionality to polish SBML models. + * It handles command-line arguments to configure the polishing process, manages file input and output, + * and integrates various utilities for processing SBML, JSON, and MatLab files. The class supports + * operations such as reading, validating, and writing SBML documents, converting JSON and MatLab files + * to SBML, and annotating models with data from BiGG. + * + * The main functionalities include: + * - Command-line argument parsing and processing. + * - Batch processing of files and directories for model polishing. + * - File type detection and appropriate handling of SBML, JSON, and MatLab files. + * - HTML tag correction in SBML files. + * - SBML document validation and conversion. + * - Annotation of models using external databases. + * - Output management including file writing, COMBINE archive creation, and compression. + * + * This class also handles error logging and provides detailed logging of the processing steps. + * * @author Andreas Dräger */ public class ModelPolisher extends Launcher { @@ -180,38 +198,35 @@ private void initParameters(SBProperties args) { /** - * Processes given input and output parameters and recurses, if input denotes a directory. - * Creates output directory, if not present + * Processes the specified input and output paths. If the input is a directory, it recursively processes each file within. + * It ensures that the output directory exists before processing starts. * - * @param input: - * Path to input file/directory to process, corresponds to {@link Parameters#input()} at top level - * @param output: - * Path to output file/directory, corresponds to {@link Parameters#output()} - * @throws IOException - * if input file is not found, or no file is present in the input directory - * @throws XMLStreamException - * propagated from {@link ModelPolisher#processFile(File, File)} + * @param input Path to the input file or directory to be processed. This should correspond to {@link Parameters#input()}. + * @param output Path to the output file or directory where processed files should be saved. This should correspond to {@link Parameters#output()}. + * @throws IOException if the input file or directory does not exist, or if no files are found within the directory. + * @throws XMLStreamException if an error occurs during file processing, propagated from {@link ModelPolisher#processFile(File, File)}. */ private void batchProcess(File input, File output) throws IOException, XMLStreamException { + // Check if the input exists, throw an exception if it does not if (!input.exists()) { throw new IOException(format(MESSAGES.getString("READ_FILE_ERROR"), input.toString())); } - // Create output directory if output is a directory or create output file's directory if output is a file + // Ensure the output directory or file's parent directory exists checkCreateOutDir(output); - // Move down into the directory + // If the input is a directory, process each file within it if (input.isDirectory()) { + // If the output is not a directory but the input is, log an error and return if (!output.isDirectory()) { - // input == dir && output != dir -> should only happen if already inside a directory and trying to recurse, - // which is not supported - // Couldn't this be done in IOOptions using dependencies? - logger.info( - format(MESSAGES.getString("WRITE_DIR_TO_FILE_ERROR"), input.getAbsolutePath(), output.getAbsolutePath())); + logger.info(format(MESSAGES.getString("WRITE_DIR_TO_FILE_ERROR"), input.getAbsolutePath(), output.getAbsolutePath())); return; } + // List all files in the input directory File[] files = input.listFiles(); + // If no files are found, throw an exception if (files == null || files.length < 1) { throw new IllegalArgumentException(MESSAGES.getString("NO_FILES_ERROR")); } + // Recursively process each file in the directory for (File file : files) { File target = getOutputFileName(file, output); batchProcess(file, target); @@ -296,45 +311,46 @@ private boolean isDirectory(File file) { /** - * Preprocessing based on file type, i.e. setting correct output file extension and applying workaround for SBML files - * with top level namespace declarations + * Processes the input file by determining its type and applying necessary preprocessing steps. + * If the file type is unknown, it attempts to update SBML files with top-level namespace declarations, + * which might be present due to specific tools like CarveMe. If the file remains unknown after attempting + * to update, it logs a warning and returns without further processing. + * If the output path is a directory, it adjusts the output file name based on the input file's type and name. + * Finally, it calls the method to read and polish the file. * - * @param input: - * input file - * @param output: - * output file or directory - * @throws XMLStreamException - * propagated from {@link #readAndPolish(File, File)} - * @throws IOException - * propagated from {@link #readAndPolish(File, File)} + * @param input The input file to be processed. + * @param output The output file or directory where the processed file should be saved. + * @throws XMLStreamException If an XML processing error occurs. + * @throws IOException If an I/O error occurs. */ private void processFile(File input, File output) throws XMLStreamException, IOException { - // get fileType array and check if any value is true + // Determine the file type of the input file fileType = getFileType(input); + // Handle unknown file types by checking and updating HTML tags if (fileType.equals(FileType.UNKNOWN)) { - // do this for now to update SBML files with top level namespace declarations (Possibly from CarveMe) - // should skip invocation of most of the code later on as tags are already replaced checkHTMLTags(input); - fileType = getFileType(input); - // did not fix the issue, abort + fileType = getFileType(input); // Re-check file type after updating tags + // Abort processing if file type is still unknown if (fileType.equals(FileType.UNKNOWN)) { logger.warning(format(MESSAGES.getString("INPUT_UNKNOWN"), input.getPath())); return; } } + // Adjust output file name if the output is a directory if (output.isDirectory()) { output = getOutputFileName(input, output); } + // Read and polish the file readAndPolish(input, output); } /** - * Get file type from input file + * Determines the type of the input file based on its extension or content. + * This method checks if the file is an SBML, MatLab, or JSON file by utilizing the {@link SBFileFilter} class. * - * @param input - * File used in {@link #batchProcess(File, File)} - * @return FileType of given file, only SBML, MatLab and JSON files are supported + * @param input The file whose type needs to be determined. + * @return FileType The type of the file, which can be SBML_FILE, MAT_FILE, JSON_FILE, or UNKNOWN if the type cannot be determined. */ private FileType getFileType(File input) { if (SBFileFilter.isSBMLFile(input)) { @@ -350,22 +366,25 @@ private FileType getFileType(File input) { /** - * Read input file and dispatch polishing tasks for different possible file types + * This method reads an input file, determines its type (SBML, MAT, or JSON), and applies the appropriate + * parsing and polishing processes. The result is written to the specified output file in SBML format. + * + * The method logs the start of the reading process, determines the file type, and uses the corresponding + * parser to convert the file into an SBMLDocument. If the file is an SBML file, it first checks and corrects + * HTML tags. After parsing, if the document is null (indicating a parsing failure), it logs an error and exits. + * Otherwise, it proceeds to polish the document and logs the time taken for the entire process upon completion. * - * @param input: - * Input file in either SBML, MAT or JSON format - * @param output: - * Output file in SBML format - * @throws XMLStreamException - * propagated from {@link #polish(SBMLDocument, File)} - * @throws IOException - * propagated from {@link #polish(SBMLDocument, File)} + * @param input The input file which can be in SBML, MAT, or JSON format. + * @param output The output file where the polished SBML will be saved. + * @throws XMLStreamException If an error occurs during XML parsing or writing. + * @throws IOException If an I/O error occurs during file reading or writing. */ private void readAndPolish(File input, File output) throws XMLStreamException, IOException { - long time = System.currentTimeMillis(); + long startTime = System.currentTimeMillis(); logger.info(format(MESSAGES.getString("READ_FILE_INFO"), input.getAbsolutePath())); SBMLDocument doc; - // reading or parsing input + + // Determine the file type and parse accordingly if (fileType.equals(FileType.MAT_FILE)) { doc = COBRAParser.read(input); } else if (fileType.equals(FileType.JSON_FILE)) { @@ -374,24 +393,32 @@ private void readAndPolish(File input, File output) throws XMLStreamException, I checkHTMLTags(input); doc = SBMLReader.read(input, new UpdateListener()); } + + // Check if the document was successfully parsed if (doc == null) { logger.severe(format(MESSAGES.getString("ALL_DOCS_PARSE_ERROR"), input.toString())); return; } + + // Polish the document and write to output polish(doc, output); - // Clear map for next model + + // Clear temporary data structures used during parsing SBMLUtils.clearGPRMap(); GPRParser.clearAssociationMap(); - time = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - time); - logger.info(String.format(MESSAGES.getString("FINISHED_TIME"), (time / 60), (time % 60))); + + // Log the time taken to process the file + long timeTaken = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime); + logger.info(String.format(MESSAGES.getString("FINISHED_TIME"), (timeTaken / 60), (timeTaken % 60))); } /** - * Replaces wrong html tags in a SBML model with body tags + * Replaces incorrect HTML tags in an SBML file with correct body tags and creates a backup of the original file. + * This method reads the input SBML file, checks for incorrect HTML tags, and replaces them with the correct tags. + * It also creates a backup of the original file before making any changes. * - * @param input: - * SBML file + * @param input The SBML file to be checked and corrected. */ private void checkHTMLTags(File input) { // Replace tags and replace file for processing @@ -403,6 +430,7 @@ private void checkHTMLTags(File input) { sb.append(line).append("\n"); } String doc = sb.toString(); + // Check if the document contains incorrect HTML tags if (!doc.contains("", ""); - // Preserve a copy of the original. + // Create a backup of the original file before modifying it try { Path output = Paths.get(input.getAbsolutePath() + ".bak"); Files.copy(input.toPath(), output, StandardCopyOption.REPLACE_EXISTING); @@ -426,10 +453,11 @@ private void checkHTMLTags(File input) { logger.info(MESSAGES.getString("SKIP_TAG_REPLACEMENT")); return; } - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(input))); - writer.write(doc); - logger.info(format(MESSAGES.getString("WROTE_CORRECT_HTML"), input.toPath())); - writer.close(); + // Write the corrected document back to the original file + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(input)))) { + writer.write(doc); + logger.info(format(MESSAGES.getString("WROTE_CORRECT_HTML"), input.toPath())); + } } catch (FileNotFoundException exc) { logger.severe(format(MESSAGES.getString("READ_FILE_ERROR"), input.toPath())); } catch (IOException e) { @@ -439,28 +467,32 @@ private void checkHTMLTags(File input) { /** - * Dispatch central tasks like polishing, annotation, combine archive creation, compression and validation + * This method orchestrates the polishing process of an SBML document, including annotation, JSON conversion, file writing, + * COMBINE archive creation, and compression. It ensures the model exists within the document before proceeding with further tasks. * - * @param doc - * @param output - * @throws IOException - * @throws XMLStreamException + * @param doc The SBMLDocument to be polished. + * @param output The file where the polished SBML document will be written. + * @throws IOException If an I/O error occurs during file writing or archive creation. + * @throws XMLStreamException If an error occurs during XML processing. */ private void polish(SBMLDocument doc, File output) throws IOException, XMLStreamException { if (doc.getModel() == null) { logger.severe(MESSAGES.getString("MODEL_MISSING")); return; } + // Retrieve global parameters for the polishing process Parameters parameters = Parameters.get(); + // Ensure the document is at the correct SBML level and version doc = checkLevelAndVersion(doc); - // Polishing + // Perform the polishing operations on the document SBMLPolisher polisher = new SBMLPolisher(); doc = polisher.polish(doc); - // Annotation + // Annotate the document if the parameters specify if (parameters.annotateWithBiGG()) { BiGGAnnotation annotation = new BiGGAnnotation(); doc = annotation.annotate(doc); } + // Convert and write the document to JSON if specified if (parameters.writeJSON()) { String out = output.getAbsolutePath().replaceAll("\\.xml", ".json"); try (BufferedWriter writer = new BufferedWriter(new FileWriter(out))) { @@ -470,13 +502,12 @@ private void polish(SBMLDocument doc, File output) throws IOException, XMLStream // writing polished model logger.info(format(MESSAGES.getString("WRITE_FILE_INFO"), output.getAbsolutePath())); TidySBMLWriter.write(doc, output, getClass().getSimpleName(), getVersionNumber(), ' ', (short) 2); - // produce COMBINE archive and delete output model and glossary - // TODO: do compression and combine output work together? + // Handle COMBINE archive creation if specified if (parameters.outputCOMBINE()) { - // producing & writing glossary CombineArchive combineArchive = new CombineArchive(doc, output); combineArchive.write(); } + // Handle file compression based on the specified method if (parameters.compression() != Compression.NONE) { String fileExtension = parameters.compression().getFileExtension(); String archive = output.getAbsolutePath() + "." + fileExtension; @@ -491,9 +522,11 @@ private void polish(SBMLDocument doc, File output) throws IOException, XMLStream default: break; } + // Delete the original output file if compression is successful if (!output.delete()) { logger.warning(format(MESSAGES.getString("REMOVE_ZIP_INPUT_FAIL"), output.getAbsolutePath())); } + // Perform SBML validation if specified if (parameters.SBMLValidation()) { // use offline validation validate(archive, false); @@ -503,23 +536,33 @@ private void polish(SBMLDocument doc, File output) throws IOException, XMLStream /** - * Make sure SBML Level and Version are 3.1, so that needed plugins work + * Ensures that the SBML document is set to Level 3 and Version 1, which are required for compatibility with necessary plugins. + * If the document is not already at this level and version, it updates the document to meet these specifications. + * After ensuring the document is at the correct level and version, it converts the document using the CobraToFbcV2Converter. * - * @param doc: - * SBMLDocument + * @param doc The SBMLDocument to be checked and potentially converted. + * @return The SBMLDocument after potentially updating its level and version and converting it. */ private SBMLDocument checkLevelAndVersion(SBMLDocument doc) { if (!doc.isSetLevelAndVersion() || (doc.getLevelAndVersion().compareTo(ValuePair.of(3, 1)) < 0)) { logger.info(MESSAGES.getString("TRY_CONV_LVL3_V1")); SBMLtools.setLevelAndVersion(doc, 3, 1); } + // Initialize the converter for Cobra to FBC version 2 CobraToFbcV2Converter converter = new CobraToFbcV2Converter(); + // Convert the document and return the converted document return converter.convert(doc); } /** - * @param filename + * Validates an SBML file either online or offline based on the provided parameters. + * Online validation refers to checking the file against a remote service or database, using specific parameters for the validation process. + * Offline validation involves reading the file locally, handling different compression formats if necessary, and validating the SBML document against local constraints. + * Errors encountered during the validation process are logged for further analysis. + * + * @param filename The path to the SBML file to be validated. + * @param online A boolean flag indicating whether to perform online (true) or offline (false) validation. */ private void validate(String filename, boolean online) { if (online) { @@ -529,7 +572,7 @@ private void validate(String filename, boolean online) { Map parameters = new HashMap<>(); parameters.put("output", output); parameters.put("offcheck", offcheck); - logger.info("Validating " + filename + "\n"); + logger.info("Validating " + filename + "\n"); SBMLErrorLog sbmlErrorLog = SBMLValidator.checkConsistency(filename, parameters); handleErrorLog(sbmlErrorLog, filename); } else { @@ -726,7 +769,10 @@ public short getYearWhenProjectWasStarted() { /* - * (non-Javadoc) + * This method is inherited from the base class and is not utilized in this CLI application. + * The ModelPolisher application does not implement a graphical user interface. + * + * @return Always returns false as no GUI is created. * @see de.zbit.Launcher#addCopyrightToSplashScreen() */ @Override @@ -736,7 +782,10 @@ protected boolean addCopyrightToSplashScreen() { /* - * (non-Javadoc) + * This method is inherited from the base class and is not utilized in this CLI application. + * The ModelPolisher application does not implement a graphical user interface. + * + * @return Always returns false as no GUI is created. * @see de.zbit.Launcher#addVersionNumberToSplashScreen() */ @Override @@ -745,8 +794,12 @@ protected boolean addVersionNumberToSplashScreen() { } - /* - * (non-Javadoc) + /** + * This method is inherited from the base class and is not utilized in this CLI application. + * The ModelPolisher application does not implement a graphical user interface. + * + * @param appConf The application configuration settings, not used in this context. + * @return Always returns null as no GUI is created. * @see de.zbit.Launcher#initGUI(de.zbit.AppConf) */ @Override diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/BiGGAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/BiGGAnnotation.java index e1e9a8c8..29fd5ee3 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/BiGGAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/BiGGAnnotation.java @@ -27,6 +27,7 @@ import edu.ucsd.sbrg.miriam.Registry; import org.sbml.jsbml.CVTerm; import org.sbml.jsbml.CVTerm.Qualifier; +import org.sbml.jsbml.Compartment; import org.sbml.jsbml.Model; import org.sbml.jsbml.Reaction; import org.sbml.jsbml.SBMLDocument; @@ -45,7 +46,11 @@ import edu.ucsd.sbrg.db.BiGGDBContract; import edu.ucsd.sbrg.db.QueryOnce; + /** + * This class is responsible for annotating SBML models using data from the BiGG database. + * It handles the addition of annotations related to compartments, species, reactions, and gene products. + * * @author Thomas Zajac * This code runs only, if ANNOTATE_WITH_BIGG is true */ @@ -70,14 +75,15 @@ public class BiGGAnnotation { public BiGGAnnotation() { } - + /** - * Adds annotations from BiGG Knowledgebase for the model contained in the {@link SBMLDocument} - * - * @param doc: - * {@link SBMLDocument} to be annotated with data from BiGG Knowledgebase - * @return Annotated SBMLDocument + * Annotates an SBMLDocument using data from the BiGG Knowledgebase. This method processes various components of the + * SBML model such as compartments, species, reactions, and gene products by adding relevant annotations from BiGG. + * It also handles the addition of publications and notes related to the model. + * + * @param doc The SBMLDocument that contains the model to be annotated. + * @return The annotated SBMLDocument. */ public SBMLDocument annotate(SBMLDocument doc) { if (!doc.isSetModel()) { @@ -85,30 +91,37 @@ public SBMLDocument annotate(SBMLDocument doc) { return doc; } Model model = doc.getModel(); - // add fake count so it never reaches 100 before gene products are processed, as new gene products are added + // Initialize the count for progress tracking, adding a buffer to ensure progress does not reach 100% prematurely, as new gene products are added // dynamically int count = model.getCompartmentCount() + model.getSpeciesCount() + model.getReactionCount() + 50; + // Check for the FBC plugin and adjust the count based on the number of gene products if (model.isSetPlugin(FBCConstants.shortLabel)) { - FBCModelPlugin fbcModelPlug = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); - initialGeneProducts = fbcModelPlug.getGeneProductCount(); + FBCModelPlugin fbcModelPlugin = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); + initialGeneProducts = fbcModelPlugin.getGeneProductCount(); count += initialGeneProducts; } + // Set up the progress bar for tracking annotation progress progress = new ProgressBar(count); + // Process replacements for placeholders in the model notes Map replacements = processReplacements(model); + // Annotate the model with general information ModelAnnotation modelAnnotation = new ModelAnnotation(model); modelAnnotation.annotate(); + // Annotate various components of the model annotatePublications(model); annotateListOfCompartments(model); annotateListOfSpecies(model); annotateListOfReactions(model); annotateListOfGeneProducts(model); + // Append notes to the document, handling potential I/O and XML exceptions try { appendNotes(doc, replacements); } catch (IOException | XMLStreamException exc) { logger.warning(MESSAGES.getString("FAILED_WRITE_NOTES")); } - // Recursively sort and group all annotations in the SBMLDocument. + // Merge all MIRIAM annotations to ensure they are correctly grouped and sorted mergeMIRIAMannotations(doc); + // Finalize the progress once all tasks are completed if (progress != null) { progress.finished(); } @@ -117,19 +130,26 @@ public SBMLDocument annotate(SBMLDocument doc) { /** - * Replace placeholders in {@link Parameters#documentTitlePattern()} and ModelNotes + * Processes and replaces placeholders in the document title pattern and model notes with actual values from the model. + * This method retrieves the model ID and organism information from the BiGG database, and uses these along with + * other parameters to populate a map of replacements. These replacements are used later to substitute placeholders + * in the SBMLDocument notes. * - * @param model: - * {@link Model} contained within {@link SBMLDocument} passed to {@link #annotate(SBMLDocument)} + * @param model The {@link Model} contained within the {@link SBMLDocument} passed to {@link #annotate(SBMLDocument)}. + * @return A map of placeholder strings and their corresponding replacement values. */ private Map processReplacements(Model model) { + // Retrieve the model ID String id = model.getId(); - // Empty organism name should be ok, if it is not a BiGG model + // Attempt to retrieve the organism name associated with the model ID; use an empty string if not available String organism = BiGGDB.getOrganism(id).orElse(""); + // Access the current parameters instance Parameters parameters = Parameters.get(); + // Retrieve and process the document title pattern by replacing placeholders String name = parameters.documentTitlePattern(); name = name.replace("[biggId]", id); name = name.replace("[organism]", organism); + // Initialize a map to hold the replacement values Map replacements = new HashMap<>(); replacements.put("${organism}", organism); replacements.put("${title}", name); @@ -137,6 +157,7 @@ private Map processReplacements(Model model) { replacements.put("${year}", Integer.toString(Calendar.getInstance().get(Calendar.YEAR))); replacements.put("${bigg.timestamp}", BiGGDB.getBiGGVersion().map(date -> format("{0,date}", date)).orElse("")); replacements.put("${species_table}", ""); + // Set the model name to the organism name if it is not already set if (!model.isSetName()) { model.setName(organism); } @@ -145,19 +166,20 @@ private Map processReplacements(Model model) { /** - * Replaces generic placeholders in notes files and appends both note types + * This method appends notes to the SBMLDocument and its model by replacing placeholders in the notes files. + * It handles both model-specific notes and document-wide notes. * - * @param doc: - * {@link SBMLDocument} to add notes to - * @throws IOException: - * propagated from {@link SBMLDocument#appendNotes(String)} or {@link Model#appendNotes(String)} - * @throws XMLStreamException: - * propagated from {@link SBMLDocument#appendNotes(String)} or {@link Model#appendNotes(String)} + * @param doc The SBMLDocument to which the notes will be appended. + * @param replacements A map containing the placeholder text and their replacements. + * @throws IOException If there is an error reading the notes files or writing to the document. + * @throws XMLStreamException If there is an error processing the XML content of the notes. */ private void appendNotes(SBMLDocument doc, Map replacements) throws IOException, XMLStreamException { Parameters parameters = Parameters.get(); String modelNotesFile = "ModelNotes.html"; String documentNotesFile = "SBMLDocumentNotes.html"; + + // Determine the files to use for model and document notes based on user settings if (parameters.noModelNotes()) { modelNotesFile = null; documentNotesFile = null; @@ -171,9 +193,13 @@ private void appendNotes(SBMLDocument doc, Map replacements) thr documentNotesFile = documentNotes != null ? documentNotes.getAbsolutePath() : null; } } + + // Append document notes if the title placeholder is present and the notes file is specified if (replacements.containsKey("${title}") && (documentNotesFile != null)) { doc.appendNotes(parseNotes(documentNotesFile, replacements)); } + + // Append model notes if the notes file is specified if (modelNotesFile != null) { doc.getModel().appendNotes(parseNotes(modelNotesFile, replacements)); } @@ -211,13 +237,13 @@ private void mergeMIRIAMannotations(SBase sbase) { /** - * @param sbase: - * Current {@link SBase} to merge annotations for - * @param miriam: - * Current annotations for the given {@link SBase} - * @return Returns {@code true}, if there are different {@link CVTerm} instances with the same qualifier that need to - * be - * merged + * Evaluates and merges CVTerm annotations for a given SBase element. This method checks each CVTerm associated with + * the SBase and determines if there are multiple CVTerms with the same Qualifier that need merging. It also corrects + * invalid qualifiers based on the type of SBase (Model or other biological elements). + * + * @param sbase The SBase element whose annotations are to be evaluated and potentially merged. + * @param miriam A sorted map that groups CVTerm resources by their qualifiers. + * @return true if there are CVTerms with the same qualifier that need to be merged, false otherwise. */ private boolean hashMIRIAMuris(SBase sbase, SortedMap> miriam) { boolean doMerge = false; @@ -247,11 +273,13 @@ private boolean hashMIRIAMuris(SBase sbase, SortedMap} if an id could be retrieved, else {@link Optional#empty()} + * @param resources A list of URIs containing annotations for the biological entity. + * @param type The type of the biological entity, which can be one of the following: + * {@link BiGGDBContract.Constants#TYPE_SPECIES}, {@link BiGGDBContract.Constants#TYPE_REACTION}, or + * {@link BiGGDBContract.Constants#TYPE_GENE_PRODUCT}. + * @return An {@link Optional} containing the BiGG ID if it could be successfully retrieved, otherwise {@link Optional#empty()}. */ public static Optional getBiGGIdFromResources(List resources, String type) { for (String resource : resources) { - Optional id = Registry.checkResourceUrl(resource).map(Registry::getPartsFromIdentifiersURI) + Optional id = Registry.checkResourceUrl(resource) + .map(Registry::getPartsFromIdentifiersURI) .flatMap(parts -> getBiggIdFromParts(parts, type)); if (id.isPresent()) { return id; @@ -326,15 +356,13 @@ public static Optional getBiGGIdFromResources(List resources, St /** - * Tries to get id from BiGG Knowledgebase based on annotation prefix and id for specific species, reaction or gene - * product + * Attempts to retrieve a BiGG identifier from the BiGG Knowledgebase using a given prefix and identifier. This method + * is used for specific biological entities such as species, reactions, or gene products. * - * @param parts: - * Parts retrieved from the identifiers.org URI - prefix and id - * @param type: - * Either {@link BiGGDBContract.Constants#TYPE_SPECIES}, {@link BiGGDBContract.Constants#TYPE_REACTION} or - * {@link BiGGDBContract.Constants#TYPE_GENE_PRODUCT} - * @return {@link Optional} containing the id, if one could be retrieved, else {@link Optional#empty()} + * @param parts A list containing two elements: the prefix and the identifier, both extracted from an identifiers.org URI. + * @param type The type of biological entity for which the ID is being retrieved. Valid types are defined in + * {@link BiGGDBContract.Constants} and include TYPE_SPECIES, TYPE_REACTION, and TYPE_GENE_PRODUCT. + * @return An {@link Optional} containing the BiGG ID if found, otherwise {@link Optional#empty()}. */ private static Optional getBiggIdFromParts(List parts, String type) { String prefix = parts.get(0); @@ -350,37 +378,44 @@ private static Optional getBiggIdFromParts(List parts, String ty /** - * Delegates annotation of reactions + * Delegates the annotation process for each reaction in the given SBML model. + * This method iterates over all reactions in the model, updates the progress display, + * and invokes the annotation for each reaction. * - * @param model: - * {@link Model} contained within {@link SBMLDocument} passed to {@link #annotate(SBMLDocument)} + * @param model The SBML model containing reactions to be annotated. It is part of the {@link SBMLDocument} passed to {@link #annotate(SBMLDocument)}. */ private void annotateListOfReactions(Model model) { - for (int i = 0; i < model.getReactionCount(); i++) { + for (Reaction reaction : model.getListOfReactions()) { progress.DisplayBar("Annotating Reactions (4/5) "); - ReactionAnnotation reactionAnnotation = new ReactionAnnotation(model.getReaction(i)); + ReactionAnnotation reactionAnnotation = new ReactionAnnotation(reaction); reactionAnnotation.annotate(); } } /** - * Delegates annotation of gene products + * This method handles the annotation of gene products in a given SBML model. It checks if the model has the FBC plugin + * set and then proceeds to annotate each gene product found within the model. The progress bar is updated to reflect + * the number of gene products being annotated. * - * @param model: - * {@link Model} contained within {@link SBMLDocument} passed to {@link #annotate(SBMLDocument)} + * @param model The SBML model containing gene products to be annotated. It must be an instance of {@link Model} + * contained within an {@link SBMLDocument} that is passed to {@link #annotate(SBMLDocument)}. */ private void annotateListOfGeneProducts(Model model) { + // Check if the FBC plugin is set in the model if (model.isSetPlugin(FBCConstants.shortLabel)) { FBCModelPlugin fbcModelPlugin = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); - // update progress bar for added geneProducts, i.e. change dummy count to correct one + + // Calculate the change in the number of gene products to update the progress bar accordingly int changed = fbcModelPlugin.getNumGeneProducts() - initialGeneProducts; if (changed > 0) { long current = progress.getCallNumber(); - // substract fake count + // Adjust the total number of calls for the progress bar by subtracting the placeholder count progress.setNumberOfTotalCalls(progress.getNumberOfTotalCalls() + changed - 50); progress.setCallNr(current); } + + // Iterate over each gene product and annotate it for (GeneProduct geneProduct : fbcModelPlugin.getListOfGeneProducts()) { progress.DisplayBar("Annotating Gene Products (5/5) "); GeneProductAnnotation geneProductAnnotation = new GeneProductAnnotation(geneProduct); @@ -391,13 +426,17 @@ private void annotateListOfGeneProducts(Model model) { /** - * @param location: - * relative path to the resource from this class. - * @param replacements: - * map of actual values for placeholder tokens in the notes - * @return Constants.URL_PREFIX + " like '%%identifiers.org%%'" - * @throws IOException: - * propagated from {@link FileInputStream()} + * Parses the notes from a specified location and replaces placeholder tokens with actual values. + * This method first attempts to read the resource from the classpath. If the resource is not found, + * it falls back to reading from the filesystem. It processes the content line by line, starting to + * append lines to the result after encountering a `` tag and stopping after a `` tag. + * Any placeholders in the format `${placeholder}` found within the body are replaced with corresponding + * values provided in the `replacements` map. + * + * @param location The relative path to the resource from this class. + * @param replacements A map of placeholder tokens to their actual values to be replaced in the notes. + * @return A string containing the processed notes with placeholders replaced by actual values. + * @throws IOException If an I/O error occurs while reading the file. */ private String parseNotes(String location, Map replacements) throws IOException { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/CVTermAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/CVTermAnnotation.java index 5b92a69e..41381a39 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/CVTermAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/CVTermAnnotation.java @@ -26,6 +26,12 @@ import static edu.ucsd.sbrg.db.AnnotateDBContract.Constants.BIGG_REACTION; import static java.text.MessageFormat.format; +/** + * Abstract class providing a framework for annotating SBML elements with Controlled Vocabulary (CV) Terms. + * This class defines the basic structure and operations for adding annotations to SBML elements based on BiGG IDs. + * It includes methods to check the validity of BiGG IDs, add annotations to SBML elements, and specifically handle + * annotations for Species and Reactions using data from BiGG and other databases. + */ public abstract class CVTermAnnotation { /** @@ -33,23 +39,38 @@ public abstract class CVTermAnnotation { */ private static final transient ResourceBundle MESSAGES = ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); + /** + * Abstract method to annotate an SBML element. Implementations should define specific annotation logic. + */ abstract void annotate(); - + /** + * Abstract method to check the validity of a BiGG ID. Implementations should return an Optional containing + * the BiGG ID if it is valid, or an empty Optional if not. + * + * @return Optional containing the valid BiGG ID or empty if the ID is invalid. + */ abstract Optional checkId(); - + /** + * Abstract method to add annotations to an SBML element using a BiGG ID. Implementations should define how + * annotations are added based on the specific type of SBML element and the source of the annotations. + * + * @param biggId The BiGGId used for fetching annotations. + */ abstract void addAnnotations(BiGGId biggId); - /** - * Common annotation method for species and reactions, as they shared much of their code + * Adds annotations to an SBML node (either a Species or a Reaction) using a given BiGGId. + * This method first checks if the node is an instance of Species or Reaction and throws an IllegalArgumentException if not. + * It then removes any existing CVTerm with the qualifier BQB_IS, prepares a new CVTerm if none existed, + * and collects annotations from various sources (BiGG database, AnnotateDB) based on whether the node is a Species or Reaction. + * These annotations are filtered to remove any that already exist on the node, sorted, and then added to the node. + * Finally, it ensures that the node has a metaId set if it has any CVTerms. * - * @param node: - * {@link Reaction} or {@link Species} to get annotations for - * @throws IllegalArgumentException - * if passed {@link SBase} is not a {@link Species} or {@link Reaction}, as the method is only applicable for - * both those cases + * @param node The SBML node to annotate, which should be either a Species or a Reaction. + * @param biggId The BiGGId used for fetching annotations. + * @throws IllegalArgumentException If the node is neither a Species nor a Reaction. */ void addAnnotations(SBase node, BiGGId biggId) throws IllegalArgumentException { if (!(node instanceof Species) && !(node instanceof Reaction)) { diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/CompartmentAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/CompartmentAnnotation.java index 89dc976a..e5695015 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/CompartmentAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/CompartmentAnnotation.java @@ -9,21 +9,31 @@ import edu.ucsd.sbrg.db.BiGGDB; import edu.ucsd.sbrg.db.QueryOnce; +/** + * This class is responsible for annotating a specific compartment within an SBML model using data from the BiGG database. + * It allows for the addition of both BiGG and SBO annotations to a compartment, and can also set the compartment's name + * based on information retrieved from the BiGG database. + */ public class CompartmentAnnotation { /** - * Instance of compartment to annotate + * The compartment instance that will be annotated. */ private final Compartment compartment; + /** + * Constructs a new CompartmentAnnotation object for a given compartment. + * + * @param compartment The compartment to be annotated. + */ public CompartmentAnnotation(Compartment compartment) { this.compartment = compartment; } - + /** - * Adds bigg and SBO annotation for the given compartment and sets its name from BiGG, if no name is set or if it is - * the default compartment. - * Only works for compartment codes contained in BiGG Knowledgebase + * Annotates the compartment with BiGG and SBO terms. If the compartment's name is not set or is set to "default", + * it updates the name based on the BiGG database. This method only processes compartments that are recognized + * within the BiGG Knowledgebase. */ public void annotate() { BiGGId biggId = new BiGGId(compartment.getId()); diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/GeneProductAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/GeneProductAnnotation.java index 414fd8dd..4203210f 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/GeneProductAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/GeneProductAnnotation.java @@ -19,6 +19,11 @@ import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_GENE_PRODUCT; import static java.text.MessageFormat.format; +/** + * Provides functionality to annotate gene products in an SBML model using data from the BiGG database. + * This class extends {@link CVTermAnnotation} and specifically handles the annotation of {@link GeneProduct} instances. + * It includes methods to validate gene product IDs, retrieve and set labels, and add annotations based on BiGG IDs. + */ public class GeneProductAnnotation extends CVTermAnnotation { /** @@ -34,23 +39,30 @@ public class GeneProductAnnotation extends CVTermAnnotation { */ private final GeneProduct geneProduct; + /** + * Constructs a new {@link GeneProductAnnotation} instance for a given {@link GeneProduct}. + * + * @param geneProduct The {@link GeneProduct} to be annotated. + */ public GeneProductAnnotation(GeneProduct geneProduct) { this.geneProduct = geneProduct; } /** - * Adds annotation for a gene product + * Annotates a gene product by adding relevant metadata and references. + * This method first checks the gene product's ID for validity and retrieves a corresponding BiGGId if available. + * It then attempts to get a label for the gene product. If no label is found, the method returns early. + * If a label is present, it updates the gene product reference in the association, adds annotations using the BiGGId, + * and sets the gene product's metaId if it has any CV terms. Finally, it sets the gene product's label name. */ @Override public void annotate() { Optional biggId = checkId(); - // TODO: don't pass optional around, handle this differently Optional label = getLabel(biggId); if (label.isEmpty()) { return; } - // fix geneProductReference in Association not updated SBMLUtils.updateGeneProductReference(geneProduct); biggId.ifPresent(id -> { addAnnotations(id); @@ -63,32 +75,39 @@ public void annotate() { /** - * Checks if {@link GeneProduct#getId()} returns a correct {@link BiGGId} and tries to retrieve a corresponding - * {@link BiGGId} based on annotations present. + * Validates the ID of a {@link GeneProduct} against the expected BiGG ID format and attempts to retrieve a + * corresponding {@link BiGGId} from existing annotations if the initial ID does not conform to the BiGG format. + * The method first checks if the gene product's ID matches the BiGG ID pattern. If it does not match, it then + * tries to find a valid BiGG ID from the gene product's annotations. If a valid BiGG ID is found among the annotations, + * it updates the ID; otherwise, it retains the original ID. * - * @return String representation of {@link BiGGId} + * @return An {@link Optional} containing the validated or retrieved BiGG ID, or an empty Optional if no valid ID is found. */ @Override public Optional checkId() { String id = geneProduct.getId(); boolean isBiGGid = id.matches("^(G_)?([a-zA-Z][a-zA-Z0-9_]+)(?:_([a-z][a-z0-9]?))?(?:_([A-Z][A-Z0-9]?))?$"); if (!isBiGGid) { - // Flatten all resources for all CVTerms into a list + // Collect all resources from CVTerms that qualify as BQB_IS into a list List resources = geneProduct.getAnnotation().getListOfCVTerms().stream() .filter(cvTerm -> cvTerm.getQualifier() == Qualifier.BQB_IS) - .flatMap(term -> term.getResources().stream()).collect(Collectors.toList()); - if (!resources.isEmpty()) { - // update id if we found something - id = getBiGGIdFromResources(resources, TYPE_GENE_PRODUCT).orElse(id); - } + .flatMap(term -> term.getResources().stream()) + .collect(Collectors.toList()); + // Attempt to update the ID with a valid BiGG ID from the resources, if available + id = getBiGGIdFromResources(resources, TYPE_GENE_PRODUCT).orElse(id); } + // Create and return a BiGGId object based on the validated or updated ID return BiGGId.createGeneId(id); } /** - * @param biggId - * @return + * Retrieves the label for a gene product based on the provided BiGGId. If the gene product has a label set and it is not "None", + * that label is returned. If no label is set but the gene product has an ID, the BiGGId is converted to a string and returned. + * If neither condition is met, an empty string is returned. + * + * @param biggId An Optional containing the BiGGId of the gene product, which may be used to generate a label if the gene product's own label is not set. + * @return An Optional containing the label of the gene product, or an empty string if no appropriate label is found. */ public Optional getLabel(Optional biggId) { if (geneProduct.isSetLabel() && !geneProduct.getLabel().equalsIgnoreCase("None")) { @@ -100,54 +119,66 @@ public Optional getLabel(Optional biggId) { } } - + /** - * Set gene product label and, if possible, update or set the gene product name to the one obtained from BiGG by use - * of the label, which was either already set or corresponds to a {@link BiGGId} created from - * {@link GeneProduct#getId()} + * Updates the label of a gene product and sets its name based on the retrieved gene name from the BiGG database. + * If the current label is set to "None", it updates the label to the provided one. It then attempts to fetch + * the gene name corresponding to this label from the BiGG database. If a gene name is found, it checks if the + * current gene product name is different from the fetched name. If they differ, it logs a warning and updates + * the gene product name. If no gene name is found, it logs this as a fine-level message. * - * @param label + * @param label The label to set or use for fetching the gene name. This label should correspond to a {@link BiGGId} + * or be derived from {@link GeneProduct#getId()}. */ public void setGPLabelName(String label) { - // we successfully found information by using the id, so this needs to be the label + // Check if the current label is "None" and update it if so if (geneProduct.getLabel().equalsIgnoreCase("None")) { geneProduct.setLabel(label); } + // Attempt to fetch the gene name from the BiGG database using the label BiGGDB.getGeneName(label).ifPresent(geneName -> { + // Log if no gene name is associated with the label if (geneName.isEmpty()) { logger.fine(format(MESSAGES.getString("NO_GENE_FOR_LABEL"), geneProduct.getName())); - } else if (geneProduct.isSetName() && !geneProduct.getName().equals(geneName)) { - logger.warning(format(MESSAGES.getString("UPDATE_GP_NAME"), geneProduct.getName(), geneName)); + } else { + // Log a warning if the gene product name is set and differs from the fetched gene name + if (geneProduct.isSetName() && !geneProduct.getName().equals(geneName)) { + logger.warning(format(MESSAGES.getString("UPDATE_GP_NAME"), geneProduct.getName(), geneName)); + } + // Update the gene product name with the fetched gene name + geneProduct.setName(geneName); } - geneProduct.setName(geneName); }); } - + /** - * Add annotations for gene product based on {@link BiGGId} + * Adds annotations to a gene product based on a given {@link BiGGId}. This method differentiates between + * annotations that specify what the gene product 'is' and what it 'is encoded by'. Resources are fetched + * from the BiGG database using the abbreviation from the provided BiGGId. Each resource URL is checked and + * parsed to determine the appropriate category ('is' or 'is encoded by') based on predefined prefixes. * - * @param biggId: - * {@link BiGGId} from species id + * @param biggId The {@link BiGGId} associated with the gene product, typically derived from a species ID. */ @Override public void addAnnotations(BiGGId biggId) { CVTerm termIs = new CVTerm(Qualifier.BQB_IS); CVTerm termEncodedBy = new CVTerm(Qualifier.BQB_IS_ENCODED_BY); - // label is stored without "G_" prefix in BiGG + // Retrieve gene IDs from BiGG database and categorize them based on their prefix BiGGDB.getGeneIds(biggId.getAbbreviation()).forEach( resource -> Registry.checkResourceUrl(resource).map(Registry::getPartsFromIdentifiersURI) - .filter(parts -> parts.size() > 0).map(parts -> parts.get(0)).ifPresent(prefix -> { + .filter(parts -> !parts.isEmpty()).map(parts -> parts.get(0)).ifPresent(prefix -> { switch (prefix) { - case "interpro": - case "pdb": - case "uniprot": - termIs.addResource(resource); - break; - default: - termEncodedBy.addResource(resource); + case "interpro": + case "pdb": + case "uniprot": + termIs.addResource(resource); + break; + default: + termEncodedBy.addResource(resource); } })); + // Add the CVTerm to the gene product if resources are present if (termIs.getResourceCount() > 0) { geneProduct.addCVTerm(termIs); } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ModelAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ModelAnnotation.java index 58bd4448..553ecced 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ModelAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ModelAnnotation.java @@ -14,6 +14,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +/** + * This class is responsible for annotating an SBML {@link Model} with relevant metadata and references. + * It handles the annotation of the model itself and delegates the annotation of contained elements such as + * {@link Compartment}, {@link Species}, {@link Reaction}, and {@link GeneProduct}. + * The annotations can include taxonomy information, database references, and meta identifiers. + */ public class ModelAnnotation { private final Model model; @@ -24,46 +30,72 @@ public ModelAnnotation(Model model) { /** - * Process annotations pertaining to the actual {@link Model} and delegate annotation for all {@link Compartment}, - * {@link Species}, {@link Reaction} and {@link GeneProduct} instances in the model + * Annotates the {@link Model} with relevant metadata and delegates the annotation of contained elements such as + * {@link Compartment}, {@link Species}, {@link Reaction}, and {@link GeneProduct}. + * + * Steps: + * 1. Retrieves the model's ID and uses it to fetch and add a taxonomy annotation if available. + * 2. Checks if the model exists in the database and adds specific BiGG database annotations. + * 3. Sets the model's MetaId to its ID if MetaId is not already set and the model has at least one CVTerm. */ public void annotate() { + // Retrieve the model's identifier String id = model.getId(); + // Attempt to fetch and add a taxonomy annotation using the model's ID BiGGDB.getTaxonId(id).ifPresent( taxonId -> model.addCVTerm(new CVTerm(CVTerm.Qualifier.BQB_HAS_TAXON, Registry.createURI("taxonomy", taxonId)))); + // Check if the model is recognized in the database and add specific annotations if (QueryOnce.isModel(id)) { addBiGGModelAnnotations(); } + // Set the model's MetaId to its ID if MetaId is not set and there are existing CVTerms if (!model.isSetMetaId() && (model.getCVTermCount() > 0)) { model.setMetaId(model.getId()); } } - /** - * Add annotation of genomic sequence/assembly to models contained in BiGG. - * Only MIRIAM annotations are added, if {@link Parameters#includeAnyURI()} returns {@code false} + * Adds annotations related to the genomic sequence or assembly for models stored in the BiGG database. + * This method first adds a MIRIAM annotation indicating the model's identity within BiGG. + * It then attempts to annotate the model with its genomic accession number, which could be a RefSeq or a genome assembly accession. + * If the accession matches a RefSeq pattern, it is annotated directly using a RefSeq URI. + * If the accession matches a genome assembly pattern and non-MIRIAM URIs are allowed (controlled by {@link Parameters#includeAnyURI()}), + * it is annotated using a direct link to the NCBI assembly resource. Otherwise, it uses a direct link to the NCBI nucleotide resource. + * Annotations are only added if they are successfully created and contain at least one resource. */ private void addBiGGModelAnnotations() { + // Add a basic MIRIAM annotation indicating the model's identity within BiGG model.addCVTerm(new CVTerm(CVTerm.Qualifier.BQM_IS, Registry.createURI("bigg.model", model.getId()))); + + // Retrieve the genomic accession number for the model String accession = BiGGDB.getGenomeAccesion(model.getId()); - Matcher refseqMatcher = - Pattern.compile("^(((AC|AP|NC|NG|NM|NP|NR|NT|NW|XM|XP|XR|YP|ZP)_\\d+)|(NZ_[A-Z]{2,4}\\d+))(\\.\\d+)?$") - .matcher(accession); + + // Prepare a pattern matcher for RefSeq accession numbers + Matcher refseqMatcher = Pattern.compile("^(((AC|AP|NC|NG|NM|NP|NR|NT|NW|XM|XP|XR|YP|ZP)_\\d+)|(NZ_[A-Z]{2,4}\\d+))(\\.\\d+)?$") + .matcher(accession); + // Create a CVTerm for versioning annotation CVTerm term = new CVTerm(CVTerm.Qualifier.BQB_IS_VERSION_OF); + + // Check if the accession matches the RefSeq pattern if (refseqMatcher.matches()) { + // Add a RefSeq resource to the CVTerm term.addResource(Registry.createShortURI("refseq:" + accession)); } else { + // Check if non-MIRIAM URIs are allowed if (Parameters.get().includeAnyURI()) { + // Prepare a pattern matcher for genome assembly accession numbers Matcher genomeAssemblyMatcher = Pattern.compile("^GC[AF]_[0-9]{9}\\.[0-9]+$").matcher(accession); if (genomeAssemblyMatcher.matches()) { - // resolution issues with https://identifiers.org/insdc.gca, resolve non MIRIAM way (see Issue #96) + // Add a genome assembly resource to the CVTerm, resolving non-MIRIAM way due to known issues term.addResource("https://www.ncbi.nlm.nih.gov/assembly/" + accession); } else { + // Add a nucleotide resource to the CVTerm for other cases term.addResource("https://www.ncbi.nlm.nih.gov/nuccore/" + accession); } } } + + // Add the CVTerm to the model if it contains any resources if (term.getResourceCount() > 0) { model.addCVTerm(term); } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java index 32a1a363..0a0af388 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/ReactionAnnotation.java @@ -21,6 +21,13 @@ import java.util.logging.Logger; import java.util.stream.Collectors; +/** + * This class provides functionality to annotate a reaction in an SBML model using BiGG database identifiers. + * It extends the {@link CVTermAnnotation} class, allowing it to manage controlled vocabulary (CV) terms + * associated with the reaction. The class handles various aspects of reaction annotation including setting + * the reaction's name, SBO term, and additional annotations. It also processes gene reaction rules and + * subsystem information associated with the reaction. + */ public class ReactionAnnotation extends CVTermAnnotation { /** @@ -46,37 +53,41 @@ public ReactionAnnotation(Reaction reaction) { /** - * Adds annotations to a reaction, parses associated gene reaction rules and creates missing gene products and - * converts subsystem information into corresponding groups + * Annotates a reaction by setting its name, SBO term, and additional annotations. It also processes gene reaction rules + * and subsystem information associated with the reaction. This method retrieves a BiGG ID for the reaction, either from + * the reaction's ID directly or through associated annotations. If a valid BiGG ID is found, it proceeds with the + * annotation and parsing processes. */ @Override public void annotate() { - // This biggId corresponds to BiGGId calculated from getSpeciesBiGGIdFromUriList method, if not present as - // reaction.id + // Attempt to retrieve a BiGG ID for the reaction, either directly from the reaction ID or through associated annotations checkId().ifPresent(biggId -> { - setName(biggId); - setSBOTerm(biggId); - addAnnotations(biggId); - parseGeneReactionRules(biggId); - parseSubsystems(biggId); + setName(biggId); // Set the reaction's name based on the BiGG ID + setSBOTerm(biggId); // Assign the appropriate SBO term based on the BiGG ID + addAnnotations(biggId); // Add additional annotations related to the BiGG ID + parseGeneReactionRules(biggId); // Parse and process gene reaction rules associated with the BiGG ID + parseSubsystems(biggId); // Convert subsystem information into corresponding groups based on the BiGG ID }); } /** - * Checks if {@link Species#getId()} returns a correct {@link BiGGId} and tries to retrieve a corresponding - * {@link BiGGId} based on annotations present. + * This method checks if the ID of the reaction is a valid BiGG ID and attempts to retrieve a corresponding + * BiGG ID based on existing annotations. It first checks if the reaction ID matches the expected BiGG ID format + * and verifies its existence in the database. If the ID does not match or is not found, it then attempts to + * extract a BiGG ID from the reaction's annotations. This involves parsing the CVTerms associated with the reaction, + * extracting URLs, validating them, and then querying the BiGG database for corresponding reaction IDs that match + * the reaction's compartment. * - * @return If creation was successful, internal ModelPolisher internal BiGGId representation wrapped in an Optional is - * returned, else Optional.empty() is returned + * @return An {@link Optional} containing the BiGG ID if found or created successfully, otherwise {@link Optional#empty()} */ @Override public Optional checkId() { String id = reaction.getId(); - // extracting BiGGId if not present for species + // Check if the reaction ID matches the expected BiGG ID format and exists in the database boolean isBiGGid = id.matches("^(R_)?([a-zA-Z][a-zA-Z0-9_]+)(?:_([a-z][a-z0-9]?))?(?:_([A-Z][A-Z0-9]?))?$") && QueryOnce.isReaction(id); if (!isBiGGid) { - // Flatten all resources for all CVTerms into a list + // Extract BiGG IDs from annotations if the direct ID check fails Set ids = reaction.getAnnotation().getListOfCVTerms() .stream() .filter(cvTerm -> cvTerm.getQualifier() == Qualifier.BQB_IS) @@ -93,6 +104,7 @@ public Optional checkId() { .filter(this::matchingCompartments) .map(fr -> fr.reactionId) .collect(Collectors.toSet()); + // Select the first valid ID from the set, if available id = ids.stream() .findFirst() .orElse(id); @@ -100,6 +112,18 @@ public Optional checkId() { return BiGGId.createReactionId(id); } + /** + * Determines if the reaction's compartment matches the compartment information of a foreign reaction from the BiGG database. + * + * This method checks various conditions to ensure that the compartments are correctly matched: + * 1. If the reaction does not have a compartment set and the foreign reaction also lacks compartment details, it returns true. + * 2. If the reaction does not have a compartment set but the foreign reaction does, it returns false. + * 3. If the reaction has a compartment set, it checks if the compartment ID matches the foreign reaction's compartment ID. + * 4. If the reaction has a named compartment instance set, it checks if the name matches the foreign reaction's compartment name. + * + * @param foreignReaction The foreign reaction object containing compartment details to compare against the reaction. + * @return true if the compartments match according to the conditions above, false otherwise. + */ private boolean matchingCompartments(BiGGDB.ForeignReaction foreignReaction) { if (!reaction.isSetCompartment() && null == foreignReaction.compartmentId @@ -120,9 +144,11 @@ private boolean matchingCompartments(BiGGDB.ForeignReaction foreignReaction) { return false; } - /** - * @param biggId + * Sets the name of the reaction based on the provided BiGGId. It retrieves the reaction name using the abbreviation + * from the BiGGId, polishes the name, and updates the reaction's name if the new name is different from the current name. + * + * @param biggId The BiGGId object containing the abbreviation used to fetch and potentially update the reaction's name. */ public void setName(BiGGId biggId) { String abbreviation = biggId.getAbbreviation(); @@ -132,9 +158,14 @@ public void setName(BiGGId biggId) { .ifPresent(reaction::setName); } - /** - * @param biggId + * Sets the SBO term for a reaction based on the given BiGGId. + * If the reaction does not already have an SBO term set, it determines the appropriate SBO term + * based on whether the reaction is a pseudoreaction or a generic process. Pseudoreactions are assigned + * an SBO term of 631, while generic processes are assigned an SBO term of 375 unless the configuration + * specifies to omit generic terms. + * + * @param biggId The BiGGId object containing the abbreviation used to check if the reaction is a pseudoreaction. */ public void setSBOTerm(BiGGId biggId) { String abbreviation = biggId.getAbbreviation(); @@ -150,36 +181,45 @@ public void setSBOTerm(BiGGId biggId) { /** - * Add annotations for reaction based on {@link BiGGId}, update http to https for MIRIAM URIs and merge duplicates + * This method delegates the task of adding annotations to a reaction based on the provided {@link BiGGId}. + * It specifically focuses on updating MIRIAM URIs from http to https, merging duplicate annotations, and + * ensuring that the reaction is annotated with the correct identifiers from the BiGG database. * - * @param biggId: - * {@link BiGGId} from reaction id + * @param biggId The {@link BiGGId} associated with the reaction, used to fetch and apply annotations. */ @Override public void addAnnotations(BiGGId biggId) { addAnnotations(reaction, biggId); } - /** - * @param biggId + * Parses gene reaction rules for a given reaction based on the BiGG database identifier. + * This method retrieves gene reaction rules associated with the reaction's abbreviation + * from the BiGG database and applies gene-protein-reaction (GPR) parsing to the reaction. + * It considers whether generic terms should be omitted based on the current parameters. + * + * @param biggId The BiGG database identifier for the reaction, used to fetch and parse gene reaction rules. */ public void parseGeneReactionRules(BiGGId biggId) { String abbreviation = biggId.getAbbreviation(); Parameters parameters = Parameters.get(); List geneReactionRules = BiGGDB.getGeneReactionRule(abbreviation, reaction.getModel().getId()); - for (String geneRactionRule : geneReactionRules) { - GPRParser.parseGPR(reaction, geneRactionRule, parameters.omitGenericTerms()); + for (String geneReactionRule : geneReactionRules) { + GPRParser.parseGPR(reaction, geneReactionRule, parameters.omitGenericTerms()); } } /** - * Retrieve subsystem information from BiGG Knowledgebase and convert subsystem information to corresponding group - * using {@link GroupsModelPlugin} and link reaction to corresponding group + * Retrieves subsystem information from the BiGG Knowledgebase and converts it into corresponding groups using the + * {@link GroupsModelPlugin}. It then links the reaction to the appropriate group based on the subsystem information. + * If the model is not from BiGG, it logs a warning and uses a different method to fetch subsystems. + * It also ensures that subsystems are unique by converting them to lowercase and removing duplicates. + * If multiple subsystems are found for a non-BiGG model, the method returns early to avoid ambiguity. + * Each subsystem is then either fetched from a cache or a new group is created and added to the model. + * Finally, the reaction is linked to the group. * - * @param biggId: - * {@link BiGGId} from reaction id + * @param biggId the {@link BiGGId} associated with the reaction, used to fetch subsystem information */ private void parseSubsystems(BiGGId biggId) { Model model = reaction.getModel(); diff --git a/src/main/java/edu/ucsd/sbrg/bigg/annotation/SpeciesAnnotation.java b/src/main/java/edu/ucsd/sbrg/bigg/annotation/SpeciesAnnotation.java index 6492b0d7..243f5a1f 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/annotation/SpeciesAnnotation.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/annotation/SpeciesAnnotation.java @@ -24,6 +24,13 @@ import static edu.ucsd.sbrg.db.BiGGDBContract.Constants.TYPE_SPECIES; import static java.text.MessageFormat.format; +/** + * This class provides functionality to annotate a species in an SBML model using BiGG database identifiers. + * It extends the {@link CVTermAnnotation} class, allowing it to manage controlled vocabulary (CV) terms + * associated with the species. The class handles various aspects of species annotation including setting + * the species' name, SBO term, and additional annotations. It also sets the chemical formula and charge + * for the species using FBC (Flux Balance Constraints) extensions. + */ public class SpeciesAnnotation extends CVTermAnnotation { /** @@ -45,57 +52,62 @@ public SpeciesAnnotation(Species species) { /** - * Annotates given species with different information from BiGG Knowledgebase. - * Sets a name, if possible, if none is set the name corresponds to the species BiGGId. - * Adds chemical formula for the species. - **/ + * This method annotates a species with various details fetched from the BiGG Knowledgebase. It performs the following: + * 1. Sets the species name based on the BiGGId. If the species does not have a name, it uses the BiGGId as the name. + * 2. Assigns an SBO (Systems Biology Ontology) term to the species based on the BiGGId. + * 3. Adds additional annotations to the species, such as database cross-references. + * 4. Sets the chemical formula and charge for the species using FBC (Flux Balance Constraints) extensions. + * + * The BiGGId used for these operations is either derived from the species' URI list or directly from its ID if available. + */ @Override public void annotate() { - // This biggId corresponds to BiGGId calculated from getSpeciesBiGGIdFromUriList method, if not present as - // species.id + // Retrieve the BiGGId for the species, either from its URI list or its direct ID checkId().ifPresent(biggId -> { - setName(biggId); - setSBOTerm(biggId); - addAnnotations(biggId); - FBCSetFormulaCharge(biggId); + setName(biggId); // Set the species name based on the BiGGId + setSBOTerm(biggId); // Assign the appropriate SBO term + addAnnotations(biggId); // Add database cross-references and other annotations + FBCSetFormulaCharge(biggId); // Set the chemical formula and charge }); } /** - * Checks if {@link Species#getId()} returns a correct {@link BiGGId} and tries to retrieve a corresponding - * {@link BiGGId} based on annotations present. + * Validates the species ID and attempts to retrieve a corresponding BiGGId based on existing annotations. + * This method first tries to create a BiGGId from the species ID. If the species ID does not correspond to a known + * BiGGId in the database, it then searches through the species' annotations to find a valid BiGGId. * - * @return If creation was successful, internal ModelPolisher internal BiGGId representation wrapped in an Optional is - * returned, else Optional.empty() is returned + * @return An {@link Optional} containing the BiGGId if a valid one is found or created, otherwise {@link Optional#empty()} */ @Override public Optional checkId() { + // Attempt to create a BiGGId from the species ID Optional metaboliteId = BiGGId.createMetaboliteId(species.getId()); + // Check if the created BiGGId is valid, if not, try to find a BiGGId from annotations Optional id = metaboliteId.flatMap(biggId -> { - // extracting BiGGId if not present for species boolean isBiGGid = QueryOnce.isMetabolite(biggId.getAbbreviation()); List resources = new ArrayList<>(); if (!isBiGGid) { - // Flatten all resources for all CVTerms into a list + // Collect all resources from CVTerms that qualify as BQB_IS resources = species.getAnnotation().getListOfCVTerms().stream() .filter(cvTerm -> cvTerm.getQualifier() == Qualifier.BQB_IS) - .flatMap(term -> term.getResources().stream()).collect(Collectors.toList()); + .flatMap(term -> term.getResources().stream()) + .collect(Collectors.toList()); } - // update id if we found something + // Attempt to retrieve a BiGGId from the collected resources return getBiGGIdFromResources(resources, TYPE_SPECIES); }); - // Create BiGGId from retrieved id or return BiGGId constructed for original id + // Return the found BiGGId or the originally created one if no new ID was found return id.map(BiGGId::createMetaboliteId).orElse(metaboliteId); } /** - * Set species name from BiGG Knowledgebase if name is not yet set or corresponds to the species id. - * Depends on the presence of the BiGGId in BiGG + * Updates the name of the species based on data retrieved from the BiGG Knowledgebase. The species name is set only if it + * has not been previously set or if the current name follows a default format that combines the BiGGId abbreviation and + * compartment code. This method relies on the availability of a valid {@link BiGGId} for the species. * - * @param biggId: - * {@link BiGGId} constructed from the species id + * @param biggId The {@link BiGGId} associated with the species, used to fetch the component name from the BiGG database. */ public void setName(BiGGId biggId) { if (!species.isSetName() @@ -106,42 +118,42 @@ public void setName(BiGGId biggId) { /** - * Set SBO terms for species, depending on its component type, i.e. metabolite, protein or a generic material entity. - * Annotation for the last case is only written, if {@link Parameters#omitGenericTerms()} returns {@code false}. - * If no component type can be retrieved from BiGG, annotation with material entity can still be performed + * Assigns the SBO term to a species based on its component type as determined from the BiGG database. + * The component type can be a metabolite, protein, or a generic material entity. If the component type is not explicitly + * identified in the BiGG database, the species is annotated as a generic material entity unless the configuration + * explicitly omits such generic terms (controlled by {@link Parameters#omitGenericTerms()}). * - * @param biggId: - * {@link BiGGId} constructed from the species id + * @param biggId The {@link BiGGId} associated with the species, used to determine the component type from the BiGG database. */ private void setSBOTerm(BiGGId biggId) { Parameters parameters = Parameters.get(); BiGGDB.getComponentType(biggId).ifPresentOrElse(type -> { switch (type) { case "metabolite": - species.setSBOTerm(SBO.getSimpleMolecule()); + species.setSBOTerm(SBO.getSimpleMolecule()); // Assign SBO term for simple molecules (metabolites). break; case "protein": - species.setSBOTerm(SBO.getProtein()); + species.setSBOTerm(SBO.getProtein()); // Assign SBO term for proteins. break; default: if (!parameters.omitGenericTerms()) { - species.setSBOTerm(SBO.getMaterialEntity()); + species.setSBOTerm(SBO.getMaterialEntity()); // Assign SBO term for generic material entities. } break; } }, () -> { if (!parameters.omitGenericTerms()) { - species.setSBOTerm(SBO.getMaterialEntity()); + species.setSBOTerm(SBO.getMaterialEntity()); // Default SBO term assignment when no specific type is found. } }); } /** - * Add annotations for species based on {@link BiGGId}, update http to https for MIRIAM URIs and merge duplicates + * This method delegates the task of adding annotations to the species based on the provided {@link BiGGId}. + * It ensures that annotations are added to the species, updates HTTP URIs to HTTPS in MIRIAM URIs, and merges any duplicate annotations. * - * @param biggId: - * {@link BiGGId} from species id + * @param biggId the {@link BiGGId} associated with the species ID, used for fetching and adding annotations. */ @Override public void addAnnotations(BiGGId biggId) { @@ -150,10 +162,15 @@ public void addAnnotations(BiGGId biggId) { /** - * Tries to set chemical formula and charge for the given species + * Sets the chemical formula and charge for a species based on the provided BiGGId. + * This method first checks if the species belongs to a BiGG model and retrieves the compartment code. + * It then attempts to set the chemical formula if it has not been set already. The formula is fetched + * from the BiGG database either based on the model ID or the compartment code if the model ID fetch fails. + * If the formula is successfully retrieved, it is set using the FBCSpeciesPlugin. + * Similarly, the charge is fetched and set if the species does not already have a charge set. + * If a charge is fetched and it contradicts an existing charge, a warning is logged and the existing charge is unset. * - * @param biggId: - * {@link BiGGId} from species id + * @param biggId: {@link BiGGId} from species id */ @SuppressWarnings("deprecation") private void FBCSetFormulaCharge(BiGGId biggId) { diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/CompartmentPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/CompartmentPolishing.java index 95db507b..cdf9c3d7 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/CompartmentPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/CompartmentPolishing.java @@ -2,13 +2,10 @@ import static java.text.MessageFormat.format; -import java.util.Optional; import java.util.ResourceBundle; import java.util.logging.Logger; import edu.ucsd.sbrg.miriam.Registry; -import org.sbml.jsbml.Annotation; -import org.sbml.jsbml.CVTerm; import org.sbml.jsbml.Compartment; import org.sbml.jsbml.Model; import org.sbml.jsbml.Unit; @@ -16,6 +13,11 @@ import de.zbit.util.ResourceManager; import edu.ucsd.sbrg.bigg.BiGGId; +/** + * This class is responsible for polishing the properties of a compartment in an SBML model to ensure + * compliance with standards and completeness. It handles the annotation processing, ID and name setting, + * and ensures that necessary attributes like units and spatial dimensions are appropriately set. + */ public class CompartmentPolishing { private final static transient Logger logger = Logger.getLogger(CompartmentPolishing.class.getName()); @@ -26,35 +28,52 @@ public CompartmentPolishing(Compartment compartment) { this.compartment = compartment; } + /** + * Polishes the properties of a compartment to ensure compliance with standards and completeness. + * This method processes annotations, sets default values for missing identifiers, names, and meta identifiers, + * and ensures that the compartment has appropriate units and other necessary attributes set. + */ public void polish() { + // Process any external resources linked via annotations in the compartment Registry.processResources(compartment.getAnnotation()); + + // Set a default ID if not already set, otherwise clean up the ID according to BiGG specifications if (!compartment.isSetId()) { - compartment.setId("d"); // default + compartment.setId("d"); // default ID if none is set } else { - // remove C_ prefix of compartment code, not in BiGGId specification + // Attempt to remove the 'C_' prefix from the compartment ID, log a warning if the format is incorrect BiGGId.extractCompartmentCode(compartment.getId()).ifPresentOrElse(compartment::setId, () -> logger.warning(format(MESSAGES.getString("COMPARTMENT_CODE_WRONG_FORMAT"), compartment.getId()))); } - compartment.setSBOTerm(410); // implicit compartment + + // Set the SBOTerm to indicate an implicit compartment + compartment.setSBOTerm(410); + + // Set a default name if not already set if (!compartment.isSetName()) { compartment.setName("default"); } + + // Set the metaId to the compartment's ID if it has CV terms but no metaId set if (!compartment.isSetMetaId() && (compartment.getCVTermCount() > 0)) { compartment.setMetaId(compartment.getId()); } + + // Ensure the compartment's 'constant' property is set to true if not already specified if (!compartment.isSetConstant()) { compartment.setConstant(true); } + + // TODO: Implement logic to set spatial dimensions based on BiGG ID, considering special cases like surfaces if (!compartment.isSetSpatialDimensions()) { - // TODO: check with biGG id, not for surfaces etc. - // c.setSpatialDimensions(3d); + // Placeholder for future implementation + // compartment.setSpatialDimensions(3d); } + + // Set the units of the compartment to dimensionless if no specific units are set in the model if (!compartment.isSetUnits()) { Model model = compartment.getModel(); - if ((model == null) - || !(model.isSetLengthUnits() - || model.isSetAreaUnits() - || model.isSetVolumeUnits())) { + if ((model == null) || !(model.isSetLengthUnits() || model.isSetAreaUnits() || model.isSetVolumeUnits())) { compartment.setUnits(Unit.Kind.DIMENSIONLESS); } } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/GeneProductPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/GeneProductPolishing.java index 1990e417..37fda687 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/GeneProductPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/GeneProductPolishing.java @@ -5,6 +5,9 @@ import edu.ucsd.sbrg.bigg.BiGGId; +/** + * This class is responsible for polishing GeneProduct instances by processing their annotations and adjusting their identifiers and names. + */ public class GeneProductPolishing { private final GeneProduct geneProduct; @@ -15,27 +18,48 @@ public GeneProductPolishing(GeneProduct geneProduct) { /** + * Polishes the GeneProduct by processing its annotations, setting its ID and name based on certain conditions. + * The method first processes the annotations of the GeneProduct. It then determines a suitable label for the + * GeneProduct based on its existing label or ID. If no suitable label is found, the method returns early. + * If a new BiGG ID is generated and differs from the current ID, it updates the GeneProduct's ID and potentially + * its metaId if CV terms are present. Finally, if the GeneProduct does not have a name or its name is "None", + * it sets the GeneProduct's name to the determined label. */ public void polish() { + // Process the annotations associated with the gene product Registry.processResources(geneProduct.getAnnotation()); + + // Initialize label variable String label = null; + + // Determine the label from the gene product's label or ID if (geneProduct.isSetLabel() && !geneProduct.getLabel().equalsIgnoreCase("None")) { label = geneProduct.getLabel(); } else if (geneProduct.isSetId()) { label = geneProduct.getId(); } + + // If no label is determined, exit the method if (label == null) { return; } + + // Create a new BiGG ID for the gene product, if possible BiGGId.createGeneId(geneProduct.getId()).ifPresent(biggId -> { String id = biggId.toBiGGId(); + + // Update the gene product's ID if the new ID is different if (!id.equals(geneProduct.getId())) { geneProduct.setId(id); } + + // Set the metaId if there are CV terms associated with the gene product if (geneProduct.getCVTermCount() > 0) { geneProduct.setMetaId(id); } }); + + // Set the gene product's name if it is not set or is "None" if (!geneProduct.isSetName() || geneProduct.getName().equalsIgnoreCase("None")) { geneProduct.setName(label); } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishing.java index d55fba12..2641fd7c 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishing.java @@ -25,6 +25,15 @@ import static java.text.MessageFormat.format; +/** + * This class provides methods to polish an SBML model to conform to specific standards and conventions. + * It handles tasks such as processing annotations, setting meta identifiers, and polishing various model components + * like initial assignments, objectives, gene products, and parameters. The class operates on an SBML {@link Model} + * object and modifies it to enhance its structure and metadata based on the provided configurations. + * + * Progress of the polishing process can be visually tracked using an {@link AbstractProgressBar} which is updated + * throughout the various stages of the polishing process. + */ public class ModelPolishing { private static final transient Logger logger = Logger.getLogger(ModelPolishing.class.getName()); @@ -43,44 +52,76 @@ public ModelPolishing(Model model, boolean strict, AbstractProgressBar progress) this.progress = progress; } + /** + * Polishes the SBML model by processing annotations, setting meta identifiers, and polishing various components. + * This method processes the model's annotations, sets the model's meta identifier if not already set and CV terms are present. + * It conditionally polishes lists of initial assignments, objectives, gene products, and parameters based on the model's configuration. + */ public void polish() { + // Process the annotations in the model Registry.processResources(model.getAnnotation()); + // Set the metaId of the model if it is not set and there are CV terms if (!model.isSetMetaId() && (model.getCVTermCount() > 0)) { model.setMetaId(model.getId()); } + // Polish the list of initial assignments if strict mode is enabled and the list is set if (strict && model.isSetListOfInitialAssignments()) { polishListOfInitialAssignments(); } + // Check if the FBC plugin is set and proceed with polishing specific FBC components if (model.isSetPlugin(FBCConstants.shortLabel)) { FBCModelPlugin modelPlug = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); + // Polish the list of objectives if set if (modelPlug.isSetListOfObjectives()) { polishListOfObjectives(modelPlug); } + // Polish the list of gene products if set if (modelPlug.isSetListOfGeneProducts()) { polishListOfGeneProducts(modelPlug); } + // Apply strictness setting to the FBC model plugin modelPlug.setStrict(strict); } + // Polish the list of parameters in the model polishListOfParameters(model); } + /** + * Polishes the list of initial assignments in the model. + * This method iterates through each initial assignment and performs checks on the associated variable. + * If the variable is a parameter with a specific SBO term, or if it's a species reference, + * the strict mode is disabled and appropriate warnings are logged. + */ public void polishListOfInitialAssignments() { for (InitialAssignment ia : model.getListOfInitialAssignments()) { + // Update progress display progress.DisplayBar("Polishing Initial Assignments (6/9) "); + // Retrieve the variable associated with the initial assignment Variable variable = ia.getVariableInstance(); if (variable != null) { + // Check if the variable is a parameter with a specific Systems Biology Ontology (SBO) term if (variable instanceof Parameter) { if (variable.isSetSBOTerm() && SBO.isChildOf(variable.getSBOTerm(), 625)) { + // Disable strict mode and log a warning if the SBO term indicates a flux boundary condition strict = false; logger.warning(format(MESSAGES.getString("FLUX_BOUND_STRICT_CHANGE"), variable.getId())); } } else if (variable instanceof SpeciesReference) { + // Disable strict mode if the variable is a species reference strict = false; } } } } - + /** + * Polishes the list of objectives in the given FBC model plugin. + * This method checks for the presence of objectives and processes each one. + * If no objectives are present, a warning is logged. + * Each objective is checked for the presence of flux objectives, and if absent, attempts to fix them. + * Objectives without any flux objectives are removed from the model. + * + * @param modelPlug The FBCModelPlugin containing the list of objectives to be polished. + */ public void polishListOfObjectives(FBCModelPlugin modelPlug) { if (modelPlug.getObjectiveCount() == 0) { // Note: the strict attribute does not require the presence of any Objectives in the model. @@ -97,8 +138,7 @@ public void polishListOfObjectives(FBCModelPlugin modelPlug) { polishListOfFluxObjectives(objective); } } - // removed unused objectives, i.e. those without flux objectives - // modelPlug.getListOfObjectives().remove + // Identify and remove unused objectives, i.e., those without flux objectives Collection removals = modelPlug.getListOfObjectives() .stream() .filter(Predicate.not(Objective::isSetListOfFluxObjectives) @@ -108,6 +148,15 @@ public void polishListOfObjectives(FBCModelPlugin modelPlug) { } } + /** + * Polishes the list of flux objectives within a given objective. + * This method checks for the presence and validity of flux objectives and logs warnings if: + * - No flux objectives are present. + * - There are more than one flux objectives. + * - Flux objectives have invalid coefficients. + * + * @param objective The objective whose flux objectives are to be polished. + */ public void polishListOfFluxObjectives(Objective objective) { if (objective.getFluxObjectiveCount() == 0) { // Note: the strict attribute does not require the presence of any flux objectives. @@ -125,6 +174,13 @@ public void polishListOfFluxObjectives(Objective objective) { } } + /** + * Polishes the list of gene products in the given FBC model plugin. + * This method iterates through each gene product, displays the progress, + * and applies the polishing process to each gene product. + * + * @param fbcModelPlug The FBCModelPlugin containing the list of gene products to be polished. + */ public void polishListOfGeneProducts(FBCModelPlugin fbcModelPlug) { for (GeneProduct geneProduct : fbcModelPlug.getListOfGeneProducts()) { progress.DisplayBar("Polishing Gene Products (8/9) "); @@ -132,14 +188,27 @@ public void polishListOfGeneProducts(FBCModelPlugin fbcModelPlug) { } } + /** + * Iterates over all parameters in the model and polishes each one. + * Displays progress for each parameter polished. + * + * @param model The model containing the parameters to be polished. + */ public void polishListOfParameters(Model model) { - for (int i = 0; i < model.getParameterCount(); i++) { + for (Parameter parameter : model.getListOfParameters()) { progress.DisplayBar("Polishing Parameters (9/9) "); - Parameter parameter = model.getParameter(i); polish(parameter); } } + /** + * Polishes the name of a parameter if it is not already set. + * This method checks if the parameter has an ID but no name. + * If the condition is true, it sets the parameter's name to a polished version of its ID. + * The polishing is done using the {@link PolishingUtils#polishName(String)} method. + * + * @param p The parameter to be polished. + */ private void polish(Parameter p) { if (p.isSetId() && !p.isSetName()) { p.setName(PolishingUtils.polishName(p.getId())); diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/PolishingUtils.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/PolishingUtils.java index 48785d9c..0ae6c3f9 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/PolishingUtils.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/PolishingUtils.java @@ -13,25 +13,35 @@ public class PolishingUtils { private static final transient ResourceBundle MESSAGES = ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); + /** + * Processes and polishes a given identifier name by applying a series of string transformations + * to make it more readable or compliant with certain standards. + * + * @param name The original identifier name to be polished. + * @return The polished version of the identifier name. + */ public static String polishName(String name) { String newName = name; + // Remove leading "?_" if present if (name.startsWith("?_")) { newName = name.substring(2); } + // Replace patterns enclosed by double underscores with "(.*)" if (newName.matches("__.*__")) { newName = newName.replaceAll("__.*__", "(.*)"); - } else if (newName.contains("__")) { + } else if (newName.contains("__")) { // Replace standalone double underscores with a hyphen newName = newName.replace("__", "-"); } + // Replace last underscore with " - " if it's followed by a number if (newName.matches(".*_C?\\d*.*\\d*")) { - newName = - newName.substring(0, newName.lastIndexOf('_')) + " - " + newName.substring(newName.lastIndexOf('_') + 1); + newName = newName.substring(0, newName.lastIndexOf('_')) + " - " + newName.substring(newName.lastIndexOf('_') + 1); } + // Replace all remaining underscores with spaces newName = newName.replace("_", " "); + // Log the change if the name was altered if (!newName.equals(name)) { logger.fine(format(MESSAGES.getString("CHANGED_NAME"), name, newName)); } return newName; } - } diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/ReactionPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/ReactionPolishing.java index 79ac0553..ac8adccb 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/ReactionPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/ReactionPolishing.java @@ -20,6 +20,18 @@ import static java.text.MessageFormat.format; +/** + * This class provides methods to polish and validate SBML reactions according to specific rules and patterns. + * It includes functionality to: + * - Check and set SBO terms based on reaction ID patterns. + * - Polish species references and compartments. + * - Validate and set flux bounds and objectives. + * - Convert gene associations from reaction notes to FBCv2 format. + * - Check mass and atom balance of reactions. + * + * The class operates on an SBML {@link Reaction} object and modifies it to conform to standards and conventions + * used in systems biology models, particularly those related to flux balance constraints. + */ public class ReactionPolishing { private final static transient Logger logger = Logger.getLogger(ReactionPolishing.class.getName()); @@ -28,42 +40,97 @@ public class ReactionPolishing { de.zbit.util.ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); private final Reaction reaction; + /** + * Defines an enumeration for regex patterns that are used to categorize reactions based on their ID strings. + * Each enum constant represents a specific type of reaction and is associated with a regex pattern that matches + * reaction IDs corresponding to that type. + */ public enum Patterns { + /** + * Pattern for ATP maintenance reactions, which are typically denoted by IDs containing 'ATPM' in any case. + */ ATP_MAINTENANCE(".*[Aa][Tt][Pp][Mm]"), + + /** + * Case-insensitive pattern for biomass reactions, matching IDs that include the word 'biomass' in any case. + */ BIOMASS_CASE_INSENSITIVE(".*[Bb][Ii][Oo][Mm][Aa][Ss][Ss].*"), + + /** + * Case-sensitive pattern for biomass reactions, matching IDs that specifically contain 'BIOMASS'. + */ BIOMASS_CASE_SENSITIVE(".*BIOMASS.*"), + + /** + * Pattern for default flux bound reactions, matching IDs that typically start with a prefix followed by 'default_'. + */ DEFAULT_FLUX_BOUND("(.*_)?[Dd][Ee][Ff][Aa][Uu][Ll][Tt]_.*"), + + /** + * Pattern for demand reactions, identified by IDs starting with 'DM_'. + */ DEMAND_REACTION("(.*_)?[Dd][Mm]_.*"), + + /** + * Pattern for exchange reactions, identified by IDs starting with 'EX_'. + */ EXCHANGE_REACTION("(.*_)?[Ee][Xx]_.*"), + + /** + * Pattern for sink reactions, which are reactions that remove metabolites from the system, identified by IDs starting with 'SK_' or 'SINK_'. + */ SINK_REACTION("(.*_)?[Ss]([Ii][Nn])?[Kk]_.*"); + /** + * The compiled regex pattern used for matching reaction IDs. + */ private final Pattern pattern; + /** + * Constructs a new enum constant with the specified regex pattern. + * + * @param regex The regex pattern to compile. + */ Patterns(String regex) { pattern = Pattern.compile(regex); } - + /** + * Retrieves the compiled Pattern object for this enum constant. + * + * @return The compiled Pattern object. + */ public Pattern getPattern() { return pattern; } } + /** + * Constructs a new {@code ReactionPolishing} instance for the specified reaction. + * + * @param reaction The reaction to be polished. + */ public ReactionPolishing(Reaction reaction) { this.reaction = reaction; } - /** - * @return {@code true} if the given reaction qualifies for strict FBC. + * Polishes the reaction by applying various checks and modifications to ensure it conforms to + * the expected standards and conventions. This includes setting SBO terms, checking compartments, + * and ensuring proper setup of reactants and products. + * + * @return {@code true} if the reaction qualifies for strict FBC after polishing, {@code false} otherwise. */ @SuppressWarnings("deprecated") public boolean polish() { + // Process any external resources linked via annotations in the reaction Registry.processResources(reaction.getAnnotation()); + + // Retrieve and check the reaction ID String id = reaction.getId(); if (id.isEmpty()) { - // remove species with missing id, produces invalid SBML + // Log severe error and remove reaction if ID is missing if (reaction.isSetName()) { logger.severe(format(MESSAGES.getString("REACTION_MISSING_ID"), reaction.getName())); } else { @@ -72,13 +139,16 @@ public boolean polish() { reaction.getModel().removeReaction(reaction); return false; } + + // Set the SBO term based on the reaction ID pattern BiGGId.createReactionId(id).ifPresent(this::setSBOTermFromPattern); + + // Check and set the compartment of the reaction based on its reactants and products String compartmentId = reaction.isSetCompartment() ? reaction.getCompartment() : null; boolean conflict = false; if (reaction.isSetListOfReactants()) { Optional cIdFromReactants = polish(reaction.getListOfReactants(), SBO.getReactant()); conflict = cIdFromReactants.isEmpty(); - // only set compartment code if all sources agree if (!conflict && (compartmentId == null || compartmentId.equals(cIdFromReactants.get()))) { reaction.setCompartment(cIdFromReactants.get()); } @@ -86,21 +156,26 @@ public boolean polish() { if (reaction.isSetListOfProducts()) { Optional cIdFromProducts = polish(reaction.getListOfProducts(), SBO.getProduct()); conflict |= cIdFromProducts.isEmpty(); - // only set compartment code if all sources agree, else unset if (!conflict && (compartmentId == null || compartmentId.equals(cIdFromProducts.get()))) { reaction.setCompartment(cIdFromProducts.get()); } else { reaction.unsetCompartment(); } } + + // Set meta ID if not set and CV terms are present if (!reaction.isSetMetaId() && (reaction.getCVTermCount() > 0)) { reaction.setMetaId(reaction.getId()); } + + // Remove '_copy' suffix from reaction name if present String rName = reaction.getName(); if (rName.matches(".*_copy\\d*")) { rName = rName.substring(0, rName.lastIndexOf('_')); reaction.setName(rName); } + + // Ensure reaction properties are set according to SBML Level and Version if ((!reaction.isSetLevelAndVersion() || reaction.getLevelAndVersion().compareTo(ValuePair.of(3, 1)) <= 0) && !reaction.isSetFast()) { @@ -109,58 +184,63 @@ public boolean polish() { if (!reaction.isSetReversible()) { reaction.setReversible(false); } - // This is a check if we are producing invalid SBML. + + // Check for reactions without reactants or products and log severe error if found if ((reaction.getReactantCount() == 0) && (reaction.getProductCount() == 0)) { ResourceBundle bundle = ResourceManager.getBundle("org.sbml.jsbml.resources.cfg.Messages"); logger.severe(format(bundle.getString("SBMLCoreParser.reactionWithoutParticipantsError"), reaction.getId())); } else { checkBalance(); } - // bounds cannot be fetched, if no model exists, thus for such cases the default should be false + + // Initialize strict mode flag boolean strict = false; - // only run when model is present, as this code either depends on the model - // or creates children objects on the model if (reaction.getModel() != null) { + // Convert gene associations to FBCv2 format and set flux objectives from local parameters GPRParser.convertAssociationsToFBCV2(reaction, Parameters.get().omitGenericTerms()); fluxObjectiveFromLocalParameter(); associationFromNotes(); strict = checkBounds(); } + + // Check validity of reactants and products and update strict mode flag strict = checkReactantsProducts(strict); return strict; } + /** + * Sets the Systems Biology Ontology (SBO) term for a reaction based on the abbreviation of its BiGG ID. + * The method matches the abbreviation against predefined patterns to determine the appropriate SBO term. + * + * @param id The BiGGId object containing the abbreviation to be checked. + */ private void setSBOTermFromPattern(BiGGId id) { String abbrev = id.getAbbreviation(); if (Patterns.BIOMASS_CASE_INSENSITIVE.getPattern().matcher(abbrev).matches()) { - reaction.setSBOTerm(629); // biomass production + reaction.setSBOTerm(629); // Set SBO term for biomass production } else if (Patterns.DEMAND_REACTION.getPattern().matcher(abbrev).matches()) { - reaction.setSBOTerm(628); // demand reaction + reaction.setSBOTerm(628); // Set SBO term for demand reaction } else if (Patterns.EXCHANGE_REACTION.getPattern().matcher(abbrev).matches()) { - reaction.setSBOTerm(627); // exchange reaction + reaction.setSBOTerm(627); // Set SBO term for exchange reaction } else if (Patterns.ATP_MAINTENANCE.getPattern().matcher(abbrev).matches()) { - reaction.setSBOTerm(630); // ATP maintenance + reaction.setSBOTerm(630); // Set SBO term for ATP maintenance } else if (Patterns.SINK_REACTION.getPattern().matcher(abbrev).matches()) { - reaction.setSBOTerm(632); + reaction.setSBOTerm(632); // Set SBO term for sink reaction } } - /** - * Polishes {@link SpeciesReference}s, i.e. reactants or products and tries to retrieve - * a compartment code for the reaction, - * if it can be resolved unambiguously from the references + * This method polishes a list of {@link SpeciesReference} objects, which represent either reactants or products in a reaction. + * It sets default SBO terms and constant values for each species reference, and attempts to determine a common compartment + * for the reaction based on these species references. If all species references are associated with the same compartment, + * this compartment code is returned. Otherwise, it returns an empty {@link Optional}. * - * @param speciesReferences: - * List of reactants or products - * @param defaultSBOterm: - * reactant or product SBO term - * @return {@link Optional#empty()} if compartment was not set for one of the species - * or could not be resolved unambiguously, - * else {@link Optional#of}, where the wrapped string is the compartment code + * @param speciesReferences A {@link ListOf} containing reactants or products of a reaction. + * @param defaultSBOterm The default Systems Biology Ontology (SBO) term to assign to species references if not already set. + * @return An {@link Optional} containing the compartment code if it can be unambiguously determined; otherwise, {@link Optional#empty()}. */ private Optional polish(ListOf speciesReferences, int defaultSBOterm) { - // set defaults + // Assign default SBO terms and constant values to species references for (SpeciesReference sr : speciesReferences) { if (!sr.isSetSBOTerm() && !Parameters.get().omitGenericTerms()) { sr.setSBOTerm(defaultSBOterm); @@ -169,7 +249,7 @@ private Optional polish(ListOf speciesReferences, int sr.setConstant(false); } } - // determine common compartment + // Attempt to identify a common compartment for all species references Model model = speciesReferences.getModel(); if (null != model) { var modelSpecies = speciesReferences.stream() @@ -185,89 +265,121 @@ private Optional polish(ListOf speciesReferences, int return Optional.empty(); } - + /** + * Checks the balance of the reaction based on its SBO term and reactant/product counts. + * It sets the SBO term for demand reactions if not already set and checks the atom balance + * for reactions not identified as biomass production, demand, exchange, or ATP maintenance. + */ private void checkBalance() { - // TODO: change messages + // Check if the reaction SBO term is not set if (!reaction.isSetSBOTerm()) { - // The reaction has not been recognized as demand or exchange reaction + // Check if there are no reactants if (reaction.getReactantCount() == 0) { - // fixme: Messages are wrong + // Handle reversible reactions differently if (reaction.isReversible()) { - // TODO: sink reaction + // Placeholder for handling sink reactions + // TODO: Implement sink reaction handling } else if (reaction.getSBOTerm() != 628) { - // logger.info(format(mpMessageBundle.getString("REACTION_DM_NOT_IN_ID"), r.getId())); - reaction.setSBOTerm(628); // demand reaction + // Log and set SBO term for demand reaction if not already set + // logger.info(format(mpMessageBundle.getString("REACTION_DM_NOT_IN_ID"), reaction.getId())); + reaction.setSBOTerm(628); // Set as demand reaction } } else if (reaction.getProductCount() == 0) { + // Handle reversible reactions differently if (reaction.isReversible()) { - // TODO: source reaction + // Placeholder for handling source reactions + // TODO: Implement source reaction handling } else { - // logger.warning(format(mpMessageBundle.getString("REACTION_DM_NOT_IN_ID"), r.getId())); - reaction.setSBOTerm(628); // demand reaction + // Log and set SBO term for demand reaction if not already set + // logger.warning(format(mpMessageBundle.getString("REACTION_DM_NOT_IN_ID"), reaction.getId())); + reaction.setSBOTerm(628); // Set as demand reaction } } } + // Check mass balance if enabled in parameters and reaction is not a special type if (Parameters.get().checkMassBalance() && ((reaction.getSBOTerm() < 627) || (630 < reaction.getSBOTerm()))) { - // check atom balance only if the reaction is not identified as biomass - // production, demand, exchange or ATP maintenance. + // Perform atom balance check AtomCheckResult defects = AtomBalanceCheck.checkAtomBalance(reaction, 1); if ((defects != null) && (defects.hasDefects())) { + // Log warning if atom defects are found logger.warning(format(MESSAGES.getString("ATOMS_MISSING"), reaction.getId(), defects.getDefects().toString())); } else if (defects == null) { + // Log failure to check atom balance logger.fine(format(MESSAGES.getString("CHECK_ATOM_BALANCE_FAILED"), reaction.getId())); } else { + // Log successful atom balance check logger.fine(format(MESSAGES.getString("ATOMS_OK"), reaction.getId())); } } } - /** - * Set flux objective and its coefficient from reaction kinetic law, - * if no flux objective exists for the reaction + * This method sets the flux objective and its coefficient for a reaction based on the kinetic law parameters. + * If the reaction does not already have a flux objective, this method will create one and set it to maximize. + * It then checks if a flux objective already exists for the reaction. If not, it attempts to retrieve the + * "OBJECTIVE_COEFFICIENT" from the reaction's kinetic law and uses it to create and set a new flux objective + * with the retrieved coefficient value. */ private void fluxObjectiveFromLocalParameter() { + // Retrieve the FBC model plugin from the reaction's model FBCModelPlugin modelPlugin = (FBCModelPlugin) reaction.getModel().getPlugin(FBCConstants.shortLabel); + // Attempt to get the first objective, or create one if none exist Objective obj = modelPlugin.getObjective(0); if (obj == null) { obj = modelPlugin.createObjective("obj"); obj.setType(Objective.Type.MAXIMIZE); modelPlugin.getListOfObjectives().setActiveObjective(obj.getId()); } + // Check if a flux objective associated with the reaction already exists boolean foExists = obj.getListOfFluxObjectives().stream() .anyMatch(fo -> fo.getReactionInstance().equals(reaction)); if (foExists) { return; } + // Retrieve the kinetic law of the reaction, if it exists KineticLaw kl = reaction.getKineticLaw(); if (kl != null) { + // Attempt to get the objective coefficient from the kinetic law LocalParameter coefficient = kl.getLocalParameter("OBJECTIVE_COEFFICIENT"); if (coefficient != null && coefficient.getValue() != 0d) { + // Create a new flux objective with the coefficient and associate it with the reaction FluxObjective fo = obj.createFluxObjective("fo_" + reaction.getId()); fo.setCoefficient(coefficient.getValue()); fo.setReaction(reaction); } } } - - + /** - * Convert GENE_ASSOCIATION in reaction notes to FBCv2 {#GeneProductAssociation} + * This method extracts gene associations from the notes of a reaction and converts them into + * the FBCv2 GeneProductAssociation format. It specifically looks for notes tagged with "GENE_ASSOCIATION:" + * and processes them to set the gene product association for the reaction if it has not been set already. */ private void associationFromNotes() { + // Obtain the FBC plugin for the reaction to handle FBC-specific features. FBCReactionPlugin reactionPlugin = (FBCReactionPlugin) reaction.getPlugin(FBCConstants.shortLabel); + + // Check if the gene product association is not already set and if the reaction has notes. if (!reactionPlugin.isSetGeneProductAssociation() && reaction.isSetNotes()) { + // Retrieve the 'body' element from the reaction notes. XMLNode body = reaction.getNotes().getChildElement("body", null); + + // Process each paragraph within the body that contains exactly one child node. if (body != null) { for (XMLNode p : body.getChildElements("p", null)) { if (p.getChildCount() == 1) { String associationCandidate = p.getChildAt(0).getCharacters(); + + // Check if the text starts with the expected gene association tag. if (associationCandidate.startsWith("GENE_ASSOCIATION: ")) { String[] splits = associationCandidate.split("GENE_ASSOCIATION: "); + + // Ensure the string was split into exactly two parts and the second part is not empty. if (splits.length == 2) { String association = splits[1]; if (!association.isEmpty()) { + // Parse the gene product association and apply it to the reaction. GPRParser.parseGPR(reaction, association, Parameters.get().omitGenericTerms()); } } @@ -280,10 +392,10 @@ private void associationFromNotes() { /** - * Check if existing FBC flux bounds fulfill the strict requirement. - * Bounds with no instance present are tried to be inferred from the reaction {#KineticLaw} + * Checks if the existing FBC flux bounds are strictly defined and attempts to infer missing bounds from the reaction's kinetic law. + * If bounds are not set, it creates and assigns new global parameters as flux bounds according to the FBC specification. * - * @return + * @return true if both lower and upper flux bounds exist and are strictly defined, false otherwise. */ private boolean checkBounds() { FBCReactionPlugin rPlug = (FBCReactionPlugin) reaction.getPlugin(FBCConstants.shortLabel); @@ -336,7 +448,12 @@ private boolean checkBounds() { /** - * @return {@code true} if this method successfully updated the bound parameter. + * Polishes the SBO term of a flux bound parameter based on its ID. + * If the parameter's ID matches the default flux bound pattern, it sets the SBO term to 626. + * Otherwise, it sets the SBO term to 625. + * + * @param bound The parameter representing a flux bound. + * @return {@code true} if the parameter is not null and was successfully updated; {@code false} if the parameter is null. */ public boolean polishFluxBound(Parameter bound) { if (bound == null) { @@ -350,10 +467,13 @@ public boolean polishFluxBound(Parameter bound) { return true; } - /** - * @param parameterName: - * LOWER_BOUND or UPPER_BOUND + * Retrieves a local parameter from a reaction's kinetic law based on the specified parameter name. + * This method specifically looks for parameters that define either the lower or upper flux bounds. + * + * @param r The reaction from which the kinetic law and the parameter are to be retrieved. + * @param parameterName The name of the parameter to retrieve, expected to be either "LOWER_BOUND" or "UPPER_BOUND". + * @return The local parameter if found, or {@code null} if the kinetic law is not defined or the parameter does not exist. */ private LocalParameter getBoundFromLocal(Reaction r, String parameterName) { KineticLaw kl = r.getKineticLaw(); @@ -365,12 +485,15 @@ private LocalParameter getBoundFromLocal(Reaction r, String parameterName) { /** - * @param bound: - * lower or upper bound instance - * @param boundValue: - * value of {#LocalParameter} bound obtained - * from {{@link #getBoundFromLocal(Reaction, String)}} - * @return + * Retrieves a modified {@link Parameter} instance based on the specified bound value. + * This method adjusts the ID of the {@link Parameter} based on predefined threshold values. + * If the bound value matches a specific threshold, the ID is set to a corresponding default value. + * Otherwise, the ID is customized using the reaction's ID combined with the original bound's ID. + * + * @param r The {@link Reaction} instance from which the model and parameter are derived. + * @param bound The {@link Parameter} instance representing either a lower or upper bound. + * @param boundValue The numeric value of the bound, which determines how the {@link Parameter}'s ID is set. + * @return The {@link Parameter} with its ID modified based on the bound value. */ private Parameter getParameterVariant(Reaction r, Parameter bound, double boundValue) { if (boundValue == -1000d) { @@ -385,18 +508,18 @@ private Parameter getParameterVariant(Reaction r, Parameter bound, double boundV return r.getModel().getParameter(bound.getId()); } - /** - * Checks if a given bound parameter satisfies the required properties of a - * strict flux bound parameter: - *
  • not null - *
  • constant - *
  • defined value - * other than {@link Double#NaN} + * Evaluates whether a {@link Parameter} instance meets the criteria to be considered a valid strict flux bound. + * A strict flux bound parameter must: + *
      + *
    • Not be null
    • + *
    • Be constant
    • + *
    • Have a defined value that is not {@link Double#NaN}
    • + *
    + * This method is used to ensure parameters can be reliably used in strict FBC (Flux Balance Constraints) models. * - * @param bound - * @return {@code true} if the given parameter can be used as a flux bound in - * strict FBC models, {@code false} otherwise. + * @param bound The {@link Parameter} to check. + * @return {@code true} if the parameter qualifies as a strict flux bound, {@code false} otherwise. */ public boolean checkBound(Parameter bound) { return (bound != null) && bound.isConstant() @@ -405,6 +528,14 @@ public boolean checkBound(Parameter bound) { } + /** + * Checks the validity of reactants and products in a reaction based on specified criteria. + * This method evaluates whether all reactants and products meet the criteria defined in {@link #checkSpeciesReferences(ListOf)}. + * If any reactant or product does not meet the criteria, a warning is logged. + * + * @param strict A boolean flag indicating whether the check should be strictly enforced. + * @return {@code true} if all reactants and products meet the criteria when strict is {@code true}, {@code false} otherwise. + */ private boolean checkReactantsProducts(boolean strict) { if (strict && reaction.isSetListOfReactants()) { strict = checkSpeciesReferences(reaction.getListOfReactants()); @@ -420,8 +551,18 @@ private boolean checkReactantsProducts(boolean strict) { } return strict; } - - + + /** + * Checks if all species references in a list meet certain criteria. + * Each species reference must: + * - Be constant. + * - Have a set stoichiometry. + * - Have a value that is not NaN (Not a Number). + * - Have a value that is finite. + * + * @param listOfSpeciesReference The list of {@link SpeciesReference} objects to check. + * @return {@code true} if all species references in the list meet the criteria, {@code false} otherwise. + */ public boolean checkSpeciesReferences(ListOf listOfSpeciesReference) { boolean strict = true; for (SpeciesReference sr : listOfSpeciesReference) { diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/SBMLPolisher.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/SBMLPolisher.java index 18598aca..6e3531ad 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/SBMLPolisher.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/SBMLPolisher.java @@ -30,6 +30,12 @@ import static java.text.MessageFormat.format; /** + * This class provides functionality to polish an SBML (Systems Biology Markup Language) document. + * Polishing involves enhancing the document with additional annotations, setting appropriate SBO (Systems Biology Ontology) terms, + * and ensuring the document adheres to certain standards and conventions useful for computational models in systems biology. + * The class supports operations such as checking the document's structure, polishing individual model components, + * and processing external resources linked within the document. + * * @author Andreas Dräger */ public class SBMLPolisher { @@ -38,12 +44,14 @@ public class SBMLPolisher { * A {@link Logger} for this class. */ private static final transient Logger logger = Logger.getLogger(SBMLPolisher.class.getName()); + /** * Bundle for ModelPolisher logger messages */ private static final transient ResourceBundle MESSAGES = ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); + /** - * + * Progress bar to visually indicate the progress of the polishing process. */ protected AbstractProgressBar progress; @@ -55,77 +63,110 @@ public SBMLPolisher() { /** - * Entry point from #ModelPolisher class + * This method serves as the entry point from the ModelPolisher class to polish an SBML document. + * It ensures the document contains a model, performs a sanity check, polishes the model, sets the SBO term, + * marks the progress as finished if applicable, and processes any linked resources. * - * @param doc: - * SBMLDocument containing the model to polish - * @return SBMLDocument containing polished model + * @param doc The SBMLDocument containing the model to be polished. + * @return The polished SBMLDocument. */ public SBMLDocument polish(SBMLDocument doc) { + // Check if the document has a model set, log severe error if not. if (!doc.isSetModel()) { logger.severe(MESSAGES.getString("NO_MODEL_FOUND")); return doc; } + // Retrieve the model from the document. Model model = doc.getModel(); + + // Polish the model. polish(model); - doc.setSBOTerm(624); // flux balance framework + // Set the SBO term for the document to indicate a flux balance framework. + doc.setSBOTerm(624); + // If a progress bar is set, mark the progress as finished. if (progress != null) { progress.finished(); } + // Process any external resources linked in the document's annotations. Registry.processResources(doc.getAnnotation()); return doc; } /** - * Main method delegating all polishing tasks + * This method orchestrates the polishing of an SBML model by delegating tasks to specific polishing methods + * for different components of the model. It initializes a progress bar to track and display the progress of + * the polishing process. * - * @param model: - * SBML Model to polish + * @param model The SBML Model to be polished. */ public void polish(Model model) { + // Log the start of processing the model. logger.info(format(MESSAGES.getString("PROCESSING_MODEL"), model.getId())); - // initialize ProgressBar - int count = 1 // for model properties - + model.getUnitDefinitionCount() + model.getCompartmentCount() + model.getParameterCount() - + model.getReactionCount() + model.getSpeciesCount() + model.getInitialAssignmentCount(); + + // Calculate the total number of tasks to initialize the progress bar. + int count = 1 // Account for model properties + + model.getUnitDefinitionCount() + + model.getCompartmentCount() + + model.getParameterCount() + + model.getReactionCount() + + model.getSpeciesCount() + + model.getInitialAssignmentCount(); + + // Include tasks from FBC plugin if present. if (model.isSetPlugin(FBCConstants.shortLabel)) { FBCModelPlugin fbcModelPlug = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); count += fbcModelPlug.getObjectiveCount() + fbcModelPlug.getGeneProductCount(); } + + // Initialize the progress bar with the total count of tasks. progress = new ProgressBar(count); progress.DisplayBar("Polishing Model (1/9) "); + + // Delegate polishing tasks to specific methods. new UnitPolishing(model, progress).polishListOfUnitDefinitions(); polishListOfCompartments(model); polishListOfSpecies(model); boolean strict = polishListOfReactions(model); + + // Perform final polishing adjustments based on the strictness of the reactions. ModelPolishing modelPolishing = new ModelPolishing(model, strict, progress); modelPolishing.polish(); } /** - * @param model + * Polishes all compartments in the given SBML model. This method iterates through each compartment + * in the model, updates the progress display, and applies polishing operations defined in the + * CompartmentPolishing class. + * + * @param model The SBML Model containing the compartments to be polished. */ public void polishListOfCompartments(Model model) { - for (int i = 0; i < model.getCompartmentCount(); i++) { + for (Compartment compartment : model.getListOfCompartments()) { progress.DisplayBar("Polishing Compartments (3/9) "); - CompartmentPolishing compartmentPolishing = new CompartmentPolishing(model.getCompartment(i)); + CompartmentPolishing compartmentPolishing = new CompartmentPolishing(compartment); compartmentPolishing.polish(); } } /** - * @param model + * Polishes the list of species in the given SBML model. This method iterates through each species, + * applies polishing operations, and collects species that need to be removed based on the polishing results. + * Removal is based on criteria defined in the SpeciesPolishing class. + * + * @param model The SBML Model containing the species to be polished. */ public void polishListOfSpecies(Model model) { List speciesToRemove = new ArrayList<>(); for (Species species : model.getListOfSpecies()) { - progress.DisplayBar("Polishing Species (4/9) "); // "Processing species " + species.getId()); + progress.DisplayBar("Polishing Species (4/9) "); // Update progress display for each species SpeciesPolishing speciesPolishing = new SpeciesPolishing(species); + // Polish each species and collect those that need to be removed speciesPolishing.polish().ifPresent(speciesToRemove::add); } + // Remove the collected species from the model for (Species species : speciesToRemove) { model.removeSpecies(species); } @@ -133,8 +174,12 @@ public void polishListOfSpecies(Model model) { /** - * @param model - * @return + * Polishes all reactions in the given SBML model. This method iterates through each reaction, + * updates the progress display, and applies polishing operations defined in the ReactionPolishing class. + * It also aggregates a strictness flag that indicates if all reactions conform to strict FBC (Flux Balance Constraints) standards. + * + * @param model The SBML Model containing the reactions to be polished. + * @return true if all reactions are strictly defined according to FBC standards, false otherwise. */ public boolean polishListOfReactions(Model model) { boolean strict = true; diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/SpeciesPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/SpeciesPolishing.java index 585e0b3a..5e9e1eb0 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/SpeciesPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/SpeciesPolishing.java @@ -14,6 +14,11 @@ import static java.text.MessageFormat.format; +/** + * This class is responsible for polishing {@link Species} objects in an SBML model to ensure they conform to + * specific standards and completeness. It handles the annotation processing, ID validation, boundary condition settings, + * and default attribute assignments for species within the model. + */ public class SpeciesPolishing { private static final transient Logger logger = Logger.getLogger(SpeciesPolishing.class.getName()); @@ -26,16 +31,21 @@ public SpeciesPolishing(Species species) { this.species = species; } - /** - * @return {@link Optional} of a {@link Species} that should be removed - * from the model due to missing id + * Polishes the properties of a {@link Species} to ensure compliance with standards and completeness. + * This method processes annotations, checks for missing IDs, sets boundary conditions, and ensures + * that mandatory attributes are set to default values. + * + * @return {@link Optional} of a {@link Species} that should be removed from the model due to missing ID. + * If the ID is present, returns an empty {@link Optional}. */ public Optional polish() { + // Process any external resources linked via annotations in the species Registry.processResources(species.getAnnotation()); String id = species.getId(); + + // Check if the species ID is missing and log an error if so if (id.isEmpty()) { - // remove species with missing id, produces invalid SBML if (species.isSetName()) { logger.severe(format(MESSAGES.getString("SPECIES_MISSING_ID"), species.getName())); } else { @@ -44,6 +54,7 @@ public Optional polish() { return Optional.of(species); } + // Warn if the species ID indicates a boundary species but the boundary condition is not set if (species.getId().endsWith("_boundary")) { logger.warning(format(MESSAGES.getString("SPECIES_ID_INVALID"), id)); if (!species.isSetBoundaryCondition() || !species.isBoundaryCondition()) { @@ -54,7 +65,7 @@ public Optional polish() { species.setBoundaryCondition(false); } - // Set mandatory attributes to default values + // Set default values for mandatory attributes if they are not already set if (!species.isSetHasOnlySubstanceUnits()) { species.setHasOnlySubstanceUnits(true); } @@ -64,6 +75,8 @@ public Optional polish() { if ((species.getCVTermCount() > 0) && !species.isSetMetaId()) { species.setMetaId(species.getId()); } + + // Check and potentially update the compartment reference based on BiGG ID BiGGId.createMetaboliteId(id).ifPresent(biggId -> { if (biggId.isSetCompartmentCode() && species.isSetCompartment() && !biggId.getCompartmentCode().equals(species.getCompartment())) { @@ -72,37 +85,54 @@ public Optional polish() { species.setCompartment(biggId.getCompartmentCode()); } }); + + // Check the compartment of the species checkCompartment(species); return Optional.empty(); } - + /** + * Checks and sets the compartment for a given species. If the species does not have a compartment set, + * it attempts to set it using the BiGG ID compartment code. If the compartment is still not set or found, + * it logs a warning and creates a new compartment. + * + * @param species The species whose compartment needs to be checked or set. + */ public void checkCompartment(Species species) { + // Check if the compartment is already set for the species if (!species.isSetCompartment()) { + // Attempt to get the BiGG ID for the species Optional biggId = BiGGId.createMetaboliteId(species.getId()); boolean setCompartment = false; + // If BiGG ID is present, check for compartment code and set it if (biggId.isPresent()) { if (biggId.get().isSetCompartmentCode()) { species.setCompartment(biggId.get().getCompartmentCode()); setCompartment = true; } } + // If compartment is not set, exit the method if (!setCompartment) { return; } } + // Get the compartment ID from the species String cId = species.getCompartment(); + // Get the model associated with the species Model model = species.getModel(); + // If model is not available, exit the method if (model == null) { return; } + // Find the compartment in the model using the compartment ID SBase candidate = model.findUniqueNamedSBase(cId); + // If the found SBase is a Compartment, polish it if (candidate instanceof Compartment) { - // compartment can't be null here, instanceof would evaluate to false CompartmentPolishing compartmentPolishing = new CompartmentPolishing((Compartment) candidate); compartmentPolishing.polish(); } else if (candidate == null) { + // If no compartment is found, log a warning and create a new compartment logger.warning(format(MESSAGES.getString("CREATE_MISSING_COMP"), cId, species.getId(), species.getElementName())); CompartmentPolishing compartmentPolishing = new CompartmentPolishing(model.createCompartment(cId)); compartmentPolishing.polish(); diff --git a/src/main/java/edu/ucsd/sbrg/bigg/polishing/UnitPolishing.java b/src/main/java/edu/ucsd/sbrg/bigg/polishing/UnitPolishing.java index 985321d3..d7666753 100644 --- a/src/main/java/edu/ucsd/sbrg/bigg/polishing/UnitPolishing.java +++ b/src/main/java/edu/ucsd/sbrg/bigg/polishing/UnitPolishing.java @@ -10,6 +10,16 @@ import java.util.ResourceBundle; import java.util.logging.Logger; +/** + * This class is responsible for ensuring that all necessary {@link UnitDefinition}s and {@link Unit}s are correctly + * defined and present in the SBML model. It handles the creation and verification of units used in the model, + * particularly focusing on the units related to growth, substance, time, and volume. + * + * Additionally, this class is responsible for accurately assigning these defined units to specific components + * such as reactions, species, and parameters within the model as needed. + * This ensures that all these components adhere uniformly to the correct unit specifications, + * maintaining consistency and accuracy throughout the model's unit definitions. + */ public class UnitPolishing { private static final transient Logger logger = Logger.getLogger(UnitPolishing.class.getName()); @@ -38,26 +48,38 @@ public UnitPolishing(Model model, AbstractProgressBar progress) { } /** - * Check that all basic {@link UnitDefinition}s and {@link Unit}s exist and creates them, if not + * Ensures that all necessary {@link UnitDefinition}s and {@link Unit}s are present in the model. + * If any are missing, they are created and added to the model. This method also sets the model's + * extent and substance units if they are not already set. */ public void polishListOfUnitDefinitions() { - progress.DisplayBar("Polishing Unit Definitions (2/9) "); // "Processing unit definitions"); + // Update progress bar to indicate the current process stage + progress.DisplayBar("Polishing Unit Definitions (2/9) "); + + // Retrieve the total number of unit definitions in the model int udCount = model.getUnitDefinitionCount(); + // Fetch all unit definitions from the model var unitDefinitions = model.getListOfUnitDefinitions(); + // Create or retrieve a growth unit definition var growth = createGrowthUnitDefinition(); + // Assign the growth unit definition to the model setModelUnits(growth, unitDefinitions); + // Get the instance of substance units from the model UnitDefinition substanceUnits = model.getSubstanceUnitsInstance(); + // Set the extent units of the model if not already set if (!model.isSetExtentUnits()) model.setExtentUnits(substanceUnits.getId()); + // Set the substance units of the model if not already set if (!model.isSetSubstanceUnits()) model.setSubstanceUnits(substanceUnits.getId()); + // Continue updating the progress bar until all unit definitions are processed while (progress.getCallNumber() < udCount) { - progress.DisplayBar("Polishing Unit Definitions (2/9) "); + progress.DisplayBar("Polishing Unit Definitions (2/9) "); } } @@ -82,15 +104,29 @@ private UnitDefinition defaultGrowthUnitDefinition() { return growth; } + /** + * Creates a growth unit definition for the model. If an equivalent growth unit already exists, + * it modifies the ID of the existing unit to indicate it was preexisting. This method ensures + * that the growth unit is properly annotated and added to the model. + * + * @return The newly created or modified growth unit definition. + */ private UnitDefinition createGrowthUnitDefinition() { + // Create a default growth unit definition. var growth = defaultGrowthUnitDefinition(); + // Check if an equivalent growth unit already exists in the model. var otherGrowth = findGrowthUnit(model.getListOfUnitDefinitions(), growth).orElse(growth); + // Set the meta ID of the growth unit if it is not already set. if (!growth.isSetMetaId()) growth.setMetaId(growth.getId()); + // Annotate the growth unit definition with relevant metadata. annotateGrowthUnitDefinition(growth); + // If the found growth unit is the same as the newly created one and it already exists in the model, + // change its ID to indicate that it was preexisting. if (otherGrowth.equals(growth) && null != model.getUnitDefinition(GROWTH_UNIT_ID)) { model.getUnitDefinition(GROWTH_UNIT_ID).setId(GROWTH_UNIT_ID + "__preexisting"); } + // Add the growth unit definition to the model. model.addUnitDefinition(growth); return growth; } @@ -100,91 +136,148 @@ private CVTerm genericUnitAnnotation(Unit u) { u.getKind().getUnitOntologyIdentifier()); } + /** + * Annotates a growth unit definition with controlled vocabulary (CV) terms. + * This method adds specific annotations based on the unit kind and its properties. + * + * @param growth The UnitDefinition instance representing the growth unit to be annotated. + */ private void annotateGrowthUnitDefinition(UnitDefinition growth) { + // Annotate the growth unit with a general descriptor from PubMed. growth.addCVTerm(CV_TERM_DESCRIBED_BY_PUBMED_GROWTH_UNIT); - getUnitByKind(growth, Unit.Kind.MOLE).ifPresent( - u -> { - switch (u.getScale()) { - case -3: u.addCVTerm(CV_TERM_IS_UO_MMOL); break; - default: - u.addCVTerm(this.genericUnitAnnotation(u)); - } - } - ); + + // Annotate the 'mole' unit based on its scale. + getUnitByKind(growth, Unit.Kind.MOLE).ifPresent(u -> { + switch (u.getScale()) { + case -3: + // If the scale is -3, it's millimoles. + u.addCVTerm(CV_TERM_IS_UO_MMOL); + break; + default: + // For other scales, use a generic annotation. + u.addCVTerm(this.genericUnitAnnotation(u)); + } + }); + + // Annotate the 'gram' unit generically. getUnitByKind(growth, Unit.Kind.GRAM).ifPresent(this::genericUnitAnnotation); - getUnitByKind(growth, Unit.Kind.SECOND).ifPresent( - u -> { - switch (Double.valueOf(u.getMultiplier()).intValue()) { - case 1: u.addCVTerm(CV_TERM_IS_UO_SECOND); break; - case 3600: u.addCVTerm(CV_TERM_IS_UO_HOUR); break; - default: - u.addCVTerm(this.genericUnitAnnotation(u)); - } - }); + + // Annotate the 'second' unit based on its multiplier. + getUnitByKind(growth, Unit.Kind.SECOND).ifPresent(u -> { + switch (Double.valueOf(u.getMultiplier()).intValue()) { + case 1: + // If the multiplier is 1, it's seconds. + u.addCVTerm(CV_TERM_IS_UO_SECOND); + break; + case 3600: + // If the multiplier is 3600, it's hours. + u.addCVTerm(CV_TERM_IS_UO_HOUR); + break; + default: + // For other multipliers, use a generic annotation. + u.addCVTerm(this.genericUnitAnnotation(u)); + } + }); } /** - * Sets substance, volume and time units for model from the models unit definitions, - * or the growth unit, - * if not set. + * This method sets the substance, volume, and time units for the model based on the provided unit definitions + * or based on the growth unit if the specific units are not predefined in the model. + * + * @param growth The UnitDefinition instance representing the growth unit. + * @param unitDefinitions A ListOf containing predefined unit definitions for substance, time, and volume. */ private void setModelUnits(UnitDefinition growth, ListOf unitDefinitions) { + // Handle setting of substance units var substanceUnits = model.getSubstanceUnitsInstance(); - if (substanceUnits == null) - if (unitDefinitions.get(UnitDefinition.SUBSTANCE) != null) + if (substanceUnits == null) { + if (unitDefinitions.get(UnitDefinition.SUBSTANCE) != null) { model.setSubstanceUnits(UnitDefinition.SUBSTANCE); - else + } else { model.setSubstanceUnits(createSubstanceUnit(growth)); + } + } model.getSubstanceUnitsInstance().addCVTerm(CV_TERM_IS_SUBSTANCE_UNIT); + + // Handle setting of time units var timeUnits = model.getTimeUnitsInstance(); - if (timeUnits == null) - if (unitDefinitions.get(UnitDefinition.TIME) != null) + if (timeUnits == null) { + if (unitDefinitions.get(UnitDefinition.TIME) != null) { model.setTimeUnits(UnitDefinition.TIME); - else + } else { model.setTimeUnits(createTimeUnit(growth)); + } + } model.getTimeUnitsInstance().addCVTerm(CV_TERM_IS_TIME_UNIT); + + // Handle setting of volume units var volumeUnits = model.getVolumeUnitsInstance(); - if (volumeUnits == null && unitDefinitions.get(UnitDefinition.VOLUME) != null) + if (volumeUnits == null && unitDefinitions.get(UnitDefinition.VOLUME) != null) { model.setVolumeUnits(UnitDefinition.VOLUME); - if (null != volumeUnits) + } + if (volumeUnits != null) { model.getVolumeUnitsInstance().addCVTerm(CV_TERM_IS_VOLUME_UNIT); + } } + /** + * Creates a new UnitDefinition for substance units based on the growth UnitDefinition. + * If specific units for GRAM or MOLE are present in the growth definition, those are cloned. + * Otherwise, default units are created and added to the new UnitDefinition. + * + * @param growth The UnitDefinition instance representing the growth unit. + * @return The newly created UnitDefinition with appropriate substance units. + */ private UnitDefinition createSubstanceUnit(UnitDefinition growth) { - final var substanceUnits = model.createUnitDefinition(UnitDefinition.SUBSTANCE); - getUnitByKind(growth, Unit.Kind.GRAM).ifPresentOrElse( - unit -> substanceUnits.addUnit(safeClone(unit)), - () -> { - var u = substanceUnits.createUnit(Unit.Kind.GRAM); - u.setMultiplier(1); - u.setExponent(-1d); - u.setScale(0); - u.addCVTerm(CV_TERM_IS_UO_GRAM); - }); - getUnitByKind(growth, Unit.Kind.MOLE).ifPresentOrElse( - unit -> substanceUnits.addUnit(safeClone(unit)), - () -> { - var u = substanceUnits.createUnit(Unit.Kind.MOLE); - u.setMultiplier(1); - u.setExponent(1d); - u.setScale(-3); - u.addCVTerm(CV_TERM_IS_UO_MMOL); - u.addCVTerm(CV_TERM_IS_VERSION_OF_UO_MOLE); - }); - return substanceUnits; + final var substanceUnits = model.createUnitDefinition(UnitDefinition.SUBSTANCE); + + // Handle GRAM units: clone if present, otherwise create default GRAM unit + getUnitByKind(growth, Unit.Kind.GRAM).ifPresentOrElse( + unit -> substanceUnits.addUnit(safeClone(unit)), + () -> { + var u = substanceUnits.createUnit(Unit.Kind.GRAM); + u.setMultiplier(1); + u.setExponent(-1d); + u.setScale(0); + u.addCVTerm(CV_TERM_IS_UO_GRAM); + }); + + // Handle MOLE units: clone if present, otherwise create default MOLE unit + getUnitByKind(growth, Unit.Kind.MOLE).ifPresentOrElse( + unit -> substanceUnits.addUnit(safeClone(unit)), + () -> { + var u = substanceUnits.createUnit(Unit.Kind.MOLE); + u.setMultiplier(1); + u.setExponent(1d); + u.setScale(-3); + u.addCVTerm(CV_TERM_IS_UO_MMOL); + u.addCVTerm(CV_TERM_IS_VERSION_OF_UO_MOLE); + }); + + return substanceUnits; } + /** + * Creates a new UnitDefinition for time units based on the growth UnitDefinition. + * If a unit of kind SECOND is present in the growth definition, it is cloned and adjusted if necessary. + * If no such unit is present, a default unit representing an hour is created. + * + * @param growth The UnitDefinition instance representing the growth unit. + * @return The newly created UnitDefinition with appropriate time units. + */ private UnitDefinition createTimeUnit(UnitDefinition growth) { final var timeUnitDefinition = model.createUnitDefinition(UnitDefinition.TIME); getUnitByKind(growth, Unit.Kind.SECOND).ifPresentOrElse( unit -> { var timeUnit = safeClone(unit); + // Ensure the exponent is positive if(timeUnit.getExponent() < 0) timeUnit.setExponent(timeUnit.getExponent() * -1); timeUnitDefinition.addUnit(timeUnit); }, () -> { var timeUnit = timeUnitDefinition.createUnit(Unit.Kind.SECOND); + // Set properties for the default hour unit timeUnit.setMultiplier(3600); timeUnit.setScale(0); timeUnit.setExponent(1d); @@ -195,6 +288,13 @@ private UnitDefinition createTimeUnit(UnitDefinition growth) { return timeUnitDefinition; } + /** + * Retrieves the first unit of a specified kind from a given UnitDefinition. + * + * @param ud the UnitDefinition from which to retrieve the unit + * @param kind the kind of unit to retrieve + * @return an Optional containing the unit if found, or an empty Optional if no such unit exists + */ private Optional getUnitByKind(UnitDefinition ud, Unit.Kind kind) { return ud.getListOfUnits().stream() .filter(unit -> unit.getKind().equals(kind)) diff --git a/src/main/java/edu/ucsd/sbrg/db/AnnotateDB.java b/src/main/java/edu/ucsd/sbrg/db/AnnotateDB.java index ca29dfc2..03dfae8c 100644 --- a/src/main/java/edu/ucsd/sbrg/db/AnnotateDB.java +++ b/src/main/java/edu/ucsd/sbrg/db/AnnotateDB.java @@ -52,40 +52,45 @@ public static void init(String host, String port, String user, String passwd, St } - /** - * - */ + public static void close() { connector.close(); } - /** - * @return - */ + public static boolean inUse() { return connector != null; } /** - * @param type - * @param biggId - * @return + * Retrieves a set of annotated URLs based on the type and BiGG ID provided. + * This method queries the database to find matching annotations and constructs URLs using the retrieved data. + * + * @param type The type of the BiGG ID, which can be either a metabolite or a reaction. + * @param biggId The BiGG ID for which annotations are to be retrieved. The ID may be modified if it starts + * with specific prefixes or ends with an underscore. + * @return A sorted set of URLs that are annotations for the given BiGG ID. If the type is neither metabolite + * nor reaction, or if an SQL exception occurs, an empty set is returned. */ public static Set getAnnotations(String type, String biggId) { TreeSet annotations = new TreeSet<>(); + // Check if the type is valid for querying annotations if (!type.equals(BIGG_METABOLITE) && !type.equals(BIGG_REACTION)) { return annotations; } + // Adjust the BiGG ID if it starts with a known prefix if (type.equals(BIGG_METABOLITE) && biggId.startsWith(METABOLITE_PREFIX)) { biggId = biggId.substring(2); } else if (type.equals(BIGG_REACTION) && biggId.startsWith(REACTION_PREFIX)) { biggId = biggId.substring(2); } + // Remove trailing underscore from the BiGG ID if present if (biggId.endsWith("_")) { biggId = biggId.substring(0, biggId.length() - 2); } + // SQL query to fetch annotations String query = "SELECT m." + TARGET_TERM + ", ac." + URLPATTERN + " FROM " + MAPPING_VIEW + " m, " + ADB_COLLECTION + " ac WHERE m." + SOURCE_NAMESPACE + " = ? AND m." + SOURCE_TERM + " = ? AND ac." + NAMESPACE + " = m." + TARGET_NAMESPACE; @@ -95,6 +100,7 @@ public static Set getAnnotations(String type, String biggId) { pStatement.setString(1, type); pStatement.setString(2, biggId); ResultSet resultSet = pStatement.executeQuery(); + // Process each result and construct the URL while (resultSet.next()) { String uri = resultSet.getString(URLPATTERN); String id = resultSet.getString(TARGET_TERM); diff --git a/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java b/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java index 464e8185..b99fc6af 100644 --- a/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java +++ b/src/main/java/edu/ucsd/sbrg/db/BiGGDB.java @@ -69,9 +69,7 @@ public static void init(String host, String port, String user, String passwd, St } - /** - * - */ + public static void close() { connector.close(); } @@ -86,7 +84,13 @@ public static boolean inUse() { /** - * @return + * Retrieves the version date of the BiGG database. + * + * This method queries the database to fetch the date and time of the last update + * from the DATABASE_VERSION table. It returns an {@link Optional} containing the + * date if the query is successful and the date exists, otherwise it returns an empty {@link Optional}. + * + * @return {@link Optional} The date of the last database update, or an empty {@link Optional} if not available. */ public static Optional getBiGGVersion() { Optional date = Optional.empty(); @@ -107,9 +111,13 @@ public static Optional getBiGGVersion() { /** - * @param modelBiGGid - * @param reactionBiGGid - * @return + * Retrieves a list of distinct subsystems associated with a specific model and reaction BiGG IDs. + * This method executes a SQL query to fetch subsystems from the database where both the model and reaction + * match the provided BiGG IDs. Only subsystems with a non-zero length are considered. + * + * @param modelBiGGid The BiGG ID of the model. + * @param reactionBiGGid The BiGG ID of the reaction. + * @return A List of subsystem names as strings. Returns an empty list if no subsystems are found or if an error occurs. */ public static List getSubsystems(String modelBiGGid, String reactionBiGGid) { String query = "SELECT DISTINCT mr." + SUBSYSTEM + " FROM " + REACTION + " r, " + MODEL + " m, " + MODEL_REACTION @@ -135,8 +143,12 @@ public static List getSubsystems(String modelBiGGid, String reactionBiGG /** - * @param reactionBiGGid - * @return + * Retrieves a list of distinct subsystems associated with a specific reaction BiGG ID. + * This method executes a SQL query to fetch subsystems from the database where the reaction + * matches the provided BiGG ID. Only subsystems with a non-zero length are considered. + * + * @param reactionBiGGid The BiGG ID of the reaction for which subsystems are to be retrieved. + * @return A List of subsystem names as strings. Returns an empty list if no subsystems are found or if an error occurs. */ public static List getSubsystemsForReaction(String reactionBiGGid) { String query = "SELECT DISTINCT mr." + SUBSYSTEM + " FROM " + REACTION + " r, " + MODEL_REACTION + " mr WHERE r." @@ -160,11 +172,15 @@ public static List getSubsystemsForReaction(String reactionBiGGid) { /** - * Get chemical formula for unknown model id + * Retrieves the unique chemical formula for a given component within a specific compartment. + * This method queries the database to find the distinct chemical formula associated with the specified + * component and compartment IDs. It ensures that the formula is unique for the given parameters. + * If multiple distinct formulas are found, it logs a warning indicating ambiguity. * - * @param componentId - * @param compartmentId - * @return + * @param componentId The BiGG ID of the component for which the chemical formula is to be retrieved. + * @param compartmentId The BiGG ID of the compartment where the component is located. + * @return An {@link Optional} containing the chemical formula if exactly one unique formula is found, + * otherwise an empty {@link Optional} if none or multiple formulas are found. */ public static Optional getChemicalFormulaByCompartment(String componentId, String compartmentId) { String query = "SELECT DISTINCT mcc." + FORMULA + " FROM " + MCC + " mcc, " + COMPARTMENTALIZED_COMPONENT + " cc, " @@ -184,10 +200,14 @@ public static Optional getChemicalFormulaByCompartment(String componentI /** - * @param query - * @param componentId - * @param compartmentOrModelId - * @return + * Executes a database query to retrieve distinct chemical formulas based on the provided SQL query. + * This method is designed to handle queries that fetch chemical formulas for a specific component + * within either a compartment or a model, depending on the IDs provided. + * + * @param query The SQL query string that retrieves distinct chemical formulas. + * @param componentId The BiGG ID of the component for which the formula is being retrieved. + * @param compartmentOrModelId The BiGG ID of either the compartment or the model associated with the component. + * @return A set of unique chemical formulas as strings. If no valid formulas are found, returns an empty set. */ private static Set runFormulaQuery(String query, String componentId, String compartmentOrModelId) { Set results = new HashSet<>(); @@ -210,11 +230,14 @@ private static Set runFormulaQuery(String query, String componentId, Str /** - * Get chemical formula for models that are present in BiGG + * Retrieves the chemical formula for a given component within a specific model in the BiGG database. + * This method executes a SQL query to find distinct chemical formulas associated with the component and model IDs provided. + * If exactly one unique formula is found, it is returned. If none or multiple formulas are found, an empty Optional is returned. + * In case of multiple formulas, a log entry is made indicating the ambiguity. * - * @param componentId - * @param modelId - * @return + * @param componentId The BiGG ID of the component for which the formula is being retrieved. + * @param modelId The BiGG ID of the model in which the component is present. + * @return An {@link Optional} containing the chemical formula if exactly one is found, otherwise empty. */ public static Optional getChemicalFormula(String componentId, String modelId) { String query = "SELECT DISTINCT mcc." + FORMULA + "\n FROM " + COMPONENT + " c,\n" + COMPARTMENTALIZED_COMPONENT @@ -234,8 +257,12 @@ public static Optional getChemicalFormula(String componentId, String mod /** - * @param biggId - * @return + * Retrieves the name of the compartment associated with the given BiGG ID from the database. + * This method constructs a SQL query to select the compartment name where the BiGG ID matches + * and the name is not an empty string. + * + * @param biggId The BiGGId object containing the abbreviation of the compartment. + * @return An {@link Optional} containing the name of the compartment if found, otherwise empty. */ public static Optional getCompartmentName(BiGGId biggId) { String query = "SELECT " + NAME + " FROM " + COMPARTMENT + " WHERE " + BIGG_ID + " = ? AND " + NAME + " <> ''"; @@ -244,9 +271,13 @@ public static Optional getCompartmentName(BiGGId biggId) { /** - * @param query - * @param param - * @return + * Executes a SQL query with a single parameter and returns the result as an Optional. + * This method is designed to handle queries that are expected to return a single result. + * If multiple results are found, a severe log is recorded indicating the issue. + * + * @param query The SQL query to be executed. It should contain exactly one placeholder for the parameter. + * @param param The parameter value to be used in the SQL query. + * @return An {@link Optional} containing the result if exactly one result is found, otherwise empty. */ public static Optional singleParamStatement(String query, String param) { Set results = new HashSet<>(); @@ -276,18 +307,25 @@ public static Optional singleParamStatement(String query, String param) /** - * @param biggId - * @return + * Retrieves the name of the component associated with the given BiGG ID from the database. + * This method constructs a SQL query to select the component name where the BiGG ID matches + * and the name is not an empty string. + * + * @param biggId The BiGGId object containing the abbreviation of the component. + * @return An {@link Optional} containing the name of the component if found, otherwise empty. */ public static Optional getComponentName(BiGGId biggId) { String query = "SELECT " + NAME + " FROM " + COMPONENT + " WHERE " + BIGG_ID + " = ? AND " + NAME + " <> ''"; return singleParamStatement(query, biggId.getAbbreviation()); } - /** - * @param biggId - * @return + * Retrieves the type of the component associated with the given BiGG ID from the database. + * This method constructs a SQL query to select the component type where the BiGG ID matches + * and the name field is not an empty string. + * + * @param biggId The BiGGId object containing the abbreviation of the component. + * @return An {@link Optional} containing the type of the component if found, otherwise empty. */ public static Optional getComponentType(BiGGId biggId) { String query = "SELECT " + TYPE + " FROM " + COMPONENT + " WHERE " + BIGG_ID + " = ? AND " + NAME + " <> ''"; @@ -296,11 +334,12 @@ public static Optional getComponentType(BiGGId biggId) { /** - * Here we get all possible MIRIAM annotation for Gene Labels, but we ignore - * all those entries that are not MIRIAM-compliant for now. + * Retrieves all possible MIRIAM-compliant gene identifiers from the database based on a given label. + * This method queries the database for gene identifiers that match the provided label and are compliant + * with MIRIAM standards. Non-compliant entries are ignored. * - * @param label - * @return + * @param label The label used to query gene identifiers. + * @return A TreeSet containing unique, sorted MIRIAM-compliant gene identifiers. */ public static TreeSet getGeneIds(String label) { TreeSet results = new TreeSet<>(); @@ -338,8 +377,14 @@ public static TreeSet getGeneIds(String label) { /** - * @param label - * @return + * Retrieves the gene name from the database based on a given label. + * This method constructs a SQL query to fetch the synonym of a gene that matches the given label, + * ensuring that the gene is associated with a valid data source and genome region, and that the + * synonym is not empty. The query specifically looks for entries where the data source's BIGG ID + * matches the REFSEQ naming convention. + * + * @param label The label used to query the gene name, typically a BIGG ID. + * @return An {@link Optional} containing the gene name if found, otherwise an empty {@link Optional}. */ public static Optional getGeneName(String label) { String query = "SELECT s." + SYNONYM + "\n" + "FROM " + DATA_SOURCE + " d, " + SYNONYM + " s, " + GENOME_REGION @@ -350,9 +395,14 @@ public static Optional getGeneName(String label) { /** - * @param reactionId - * @param modelId - * @return + * Retrieves formatted gene reaction rules for a specific reaction and model from the database. + * This method constructs a SQL query to fetch and format the gene reaction rules associated with + * the given reaction ID and model ID. The formatting includes replacing logical operators 'or' and 'and' + * with '||' and '&&', respectively, and substituting certain characters to comply with SBML standards. + * + * @param reactionId The ID of the reaction for which gene reaction rules are to be retrieved. + * @param modelId The ID of the model associated with the reaction. + * @return A list of formatted gene reaction rules as strings. */ public static List getGeneReactionRule(String reactionId, String modelId) { return getReactionRules("SELECT REPLACE(REPLACE(RTRIM(REPLACE(REPLACE(mr." + GENE_REACTION_RULE @@ -365,10 +415,14 @@ public static List getGeneReactionRule(String reactionId, String modelId /** - * @param query - * @param reactionId - * @param modelId - * @return + * Executes a provided SQL query to retrieve gene reaction rules from the database. + * This method prepares a statement with the given query, setting the specified reactionId and modelId as parameters. + * It then executes the query and collects the results into a list of strings. + * + * @param query The SQL query string that retrieves gene reaction rules, expecting two placeholders for parameters. + * @param reactionId The ID of the reaction to be used as the first parameter in the SQL query. + * @param modelId The ID of the model to be used as the second parameter in the SQL query. + * @return A list of strings where each string is a gene reaction rule retrieved based on the given IDs. */ public static List getReactionRules(String query, String reactionId, String modelId) { List results = new ArrayList<>(); @@ -389,10 +443,14 @@ public static List getReactionRules(String query, String reactionId, Str return results; } - + /** - * @param abbreviation - * @return + * Retrieves the organism associated with a given BiGG model abbreviation from the database. + * This method constructs and executes a SQL query that joins the GENOME and MODEL tables to find the organism + * corresponding to the specified model abbreviation. + * + * @param abbreviation The abbreviation of the model for which the organism is to be retrieved. + * @return An Optional containing the organism name if found, otherwise an empty Optional. */ public static Optional getOrganism(String abbreviation) { String query = "SELECT g." + ORGANISM + " FROM " + GENOME + " g, " + MODEL + " m WHERE m." + GENOME_ID + " = g." @@ -402,8 +460,12 @@ public static Optional getOrganism(String abbreviation) { /** - * @param abbreviation - * @return + * Retrieves a list of publications associated with a given BiGG model abbreviation from the database. + * This method constructs and executes a SQL query that joins the PUBLICATION, PUBLICATION_MODEL, and MODEL tables + * to find the publications related to the specified model abbreviation. + * + * @param abbreviation The abbreviation of the model for which the publications are to be retrieved. + * @return A list of pairs where each pair consists of a publication type and its corresponding ID. */ public static List> getPublications(String abbreviation) { List> results = new LinkedList<>(); @@ -429,20 +491,28 @@ public static List> getPublications(String abbreviation) { /** - * @param abbreviation - * @return + * Retrieves the name of a reaction based on its BiGG ID abbreviation, ensuring the name is not empty. + * This method constructs and executes a SQL query that selects the reaction name from the REACTION table + * where the BIGG_ID matches the specified abbreviation and the name is not an empty string. + * + * @param abbreviation The abbreviation of the reaction for which the name is to be retrieved. + * @return An Optional containing the reaction name if found and not empty, otherwise an empty Optional. */ public static Optional getReactionName(String abbreviation) { String query = "SELECT " + NAME + " FROM " + REACTION + " WHERE " + BIGG_ID + " = ? AND " + NAME + " <> ''"; return singleParamStatement(query, abbreviation); } - + /** - * @param biggId - * @param includeAnyURI - * @param isReaction - * @return a set of external source together with external id. + * Retrieves a set of resource URLs for a given BiGG ID, optionally filtering to include only those containing 'identifiers.org'. + * This method constructs and executes a SQL query to fetch URLs from the database based on the type of BiGG ID (reaction or component). + * It then filters these URLs based on the 'includeAnyURI' parameter. + * + * @param biggId The BiGG ID object containing the abbreviation of the model component or reaction. + * @param includeAnyURI If true, all URLs are included; if false, only URLs containing 'identifiers.org' are included. + * @param isReaction If true, the BiGG ID is treated as a reaction; if false, it is treated as a component. + * @return A sorted set of URLs as strings, potentially filtered by the 'identifiers.org' domain. */ public static Set getResources(BiGGId biggId, boolean includeAnyURI, boolean isReaction) { String type = isReaction ? REACTION : COMPONENT; @@ -477,8 +547,13 @@ public static Set getResources(BiGGId biggId, boolean includeAnyURI, boo /** - * @param isReaction - * @return + * Constructs a SQL query condition based on whether the subject is a reaction or a component. + * This method dynamically generates part of a SQL WHERE clause. If the subject is a reaction, + * it matches the type strictly with 'REACTION'. If it's not a reaction, it matches the type + * with either 'COMPONENT' or 'COMPARTMENTALIZED_COMPONENT'. + * + * @param isReaction A boolean indicating if the subject is a reaction (true) or not (false). + * @return A string representing a SQL WHERE clause condition based on the type of subject. */ private static String getTypeQuery(boolean isReaction) { if (isReaction) { @@ -490,8 +565,12 @@ private static String getTypeQuery(boolean isReaction) { /** - * @param abbreviation - * @return + * Retrieves the taxonomic identifier (taxon ID) for a given model based on its abbreviation. + * This method queries the database to find the taxon ID associated with the model's abbreviation. + * If multiple taxon IDs are found for the same abbreviation, a severe log message is generated. + * + * @param abbreviation The abbreviation of the model for which the taxon ID is being queried. + * @return An {@link Optional} containing the taxon ID if found; otherwise, an empty {@link Optional}. */ public static Optional getTaxonId(String abbreviation) { Integer result = null; @@ -519,10 +598,16 @@ public static Optional getTaxonId(String abbreviation) { /** - * @param id: - * Model id present in BiGG - * @return Accession that can be resolved as using either https://identifiers.org/refseq:{$id}, - * https://www.ncbi.nlm.nih.gov/nuccore/{$id} or https://www.ncbi.nlm.nih.gov/assembly/{$id} + * Retrieves the genome accession for a given model ID from the BiGG database. + * The accession can be used to construct URLs for accessing genomic data from various sources. + * The URLs can be formed using the accession ID with the following patterns: + * - https://identifiers.org/refseq:{$id} + * - https://www.ncbi.nlm.nih.gov/nuccore/{$id} + * - https://www.ncbi.nlm.nih.gov/assembly/{$id} + * + * @param id The model ID present in BiGG. + * @return The accession string which can be appended to the base URLs mentioned above. + * If the query fails or no accession is found, an empty string is returned. */ public static String getGenomeAccesion(String id) { String query = "SELECT g." + ACCESSION_VALUE + " FROM " + GENOME + " g, " + MODEL + " m WHERE m." + BIGG_ID @@ -533,7 +618,7 @@ public static String getGenomeAccesion(String id) { PreparedStatement pStatement = connection.prepareStatement(query); pStatement.setString(1, id); ResultSet resultSet = pStatement.executeQuery(); - // should always be exactly one entry, as we check beforehand, if we have a BiGG model id + // There should always be exactly one entry, as the presence of a BiGG model ID is verified beforehand. if (resultSet.next()) { result = resultSet.getString(1); } @@ -542,14 +627,19 @@ public static String getGenomeAccesion(String id) { } catch (SQLException e) { logger.warning(format(MESSAGES.getString("GENOME_ACCESSION_FAIL"), id)); } - // Will be non empty, as query always returns exactly one result + // The result should be non-empty as the query is expected to always return exactly one result. return result; } /** - * @param table - * @return + * Retrieves a set of unique BiGG IDs from a specified table in the database. + * This method queries the database for all unique BiGG IDs in the specified table and returns them as a set. + * The IDs are ordered by their natural ordering in the database. + * + * @param table The name of the database table from which to retrieve BiGG IDs. + * @return A Set of strings containing unique BiGG IDs from the specified table. If an SQL error occurs, + * the returned set will be empty. */ public static Set getOnce(String table) { Set biggIds = new LinkedHashSet<>(); @@ -571,11 +661,14 @@ public static Set getOnce(String table) { /** - * Get charge for unknown model id + * Retrieves the charge associated with a specific component in a given compartment when the model ID is unknown. + * This method executes a SQL query to find a distinct charge value for a component based on its BiGG ID and + * the compartment's BiGG ID. The method ensures that the charge value is not empty and returns it if it is unique. * - * @param componentId - * @param compartmentId - * @return + * @param componentId The BiGG ID of the component. + * @param compartmentId The BiGG ID of the compartment. + * @return An {@link Optional} containing the charge if it is unique and present; otherwise, an empty {@link Optional}. + * If multiple unique charge values are found, a warning is logged. */ public static Optional getChargeByCompartment(String componentId, String compartmentId) { String query = "SELECT DISTINCT mcc." + CHARGE + " FROM " + MCC + " mcc, " + COMPARTMENTALIZED_COMPONENT + " cc, " @@ -595,10 +688,14 @@ public static Optional getChargeByCompartment(String componentId, Strin /** - * @param query - * @param componentId - * @param compartmentOrModelId - * @return + * Executes a SQL query to retrieve distinct charge values based on the provided query string. + * This method prepares and executes a SQL statement using the provided component ID and compartment or model ID. + * It collects the results into a set, ensuring that only non-null and non-empty values are included. + * + * @param query The SQL query string to execute, expecting placeholders for componentId and compartmentOrModelId. + * @param componentId The BiGG ID of the component, used to replace the first placeholder in the query. + * @param compartmentOrModelId The BiGG ID of the compartment or model, used to replace the second placeholder in the query. + * @return A Set of strings containing distinct charge values from the query results. If no valid results are found, returns an empty set. */ private static Set runChargeQuery(String query, String componentId, String compartmentOrModelId) { Set results = new HashSet<>(); @@ -621,11 +718,14 @@ private static Set runChargeQuery(String query, String componentId, Stri /** - * Get charge for known model id + * Retrieves the charge for a given component and model from the database. + * This method executes a SQL query to select distinct charge values associated with the specified component ID + * and model ID. It ensures that the charge value is not null. * - * @param componentId - * @param modelId - * @return + * @param componentId The BiGG ID of the component. + * @param modelId The BiGG ID of the model. + * @return An Optional containing the charge if exactly one distinct charge is found, otherwise an empty Optional. + * If multiple distinct charges are found, a warning is logged. */ public static Optional getCharge(String componentId, String modelId) { String query = "SELECT DISTINCT mcc." + CHARGE + "\n FROM " + COMPONENT + " c,\n" + COMPARTMENTALIZED_COMPONENT @@ -645,8 +745,12 @@ public static Optional getCharge(String componentId, String modelId) { /** - * @param reactionId - * @return + * Determines if a given reaction ID corresponds to a pseudoreaction in the database. + * A pseudoreaction is typically used to represent non-biochemical data flows such as biomass accumulation, + * demand reactions, or exchange reactions. + * + * @param reactionId The BiGG ID of the reaction to be checked. + * @return true if the reaction is a pseudoreaction, false otherwise. */ public static boolean isPseudoreaction(String reactionId) { String query = "SELECT " + PSEUDOREACTION + " FROM " + REACTION + " WHERE " + BIGG_ID + " = ?"; @@ -656,16 +760,22 @@ public static boolean isPseudoreaction(String reactionId) { /** - * @param synonym - * @param type - * @param dataSourceId - * @return String + * Retrieves the BiGG ID associated with a given synonym and type from the specified data source. + * This method constructs a SQL query based on the type of biological entity (species, reaction, or gene product) + * and executes it to fetch the corresponding BiGG ID from the database. + * + * @param dataSourceId The ID of the data source where the synonym is registered. + * @param synonym The synonym used to identify the entity in the data source. + * @param type The type of the entity, which can be species, reaction, or gene product. + * @return An Optional containing the BiGG ID if exactly one unique ID is found, otherwise an empty Optional. */ public static Optional getBiggIdFromSynonym(String dataSourceId, String synonym, String type) { Set results = new HashSet<>(); String query; + // Common part of the SQL query used for all types String sharedQuerySubstring = DATA_SOURCE + " d, " + SYNONYM + " s" + " WHERE " + "d." + BIGG_ID + " = ? AND d." + ID + " = s." + DATA_SOURCE_ID + " AND s." + SYNONYM_COL + " = ? AND s." + OME_ID; + // Constructing specific query based on the type switch (type) { case TYPE_SPECIES: query = "SELECT " + "c." + BIGG_ID + " FROM " + COMPONENT + " c, " + sharedQuerySubstring + " = c." + ID; @@ -692,6 +802,7 @@ public static Optional getBiggIdFromSynonym(String dataSourceId, String } catch (SQLException exc) { logger.warning(Utils.getMessage(exc)); } + // Filtering and returning the result results = results.stream().filter(biggId -> biggId != null && !biggId.isEmpty()).collect(Collectors.toSet()); if (results.size() == 1) { return Optional.of(results.iterator().next()); @@ -701,11 +812,23 @@ public static Optional getBiggIdFromSynonym(String dataSourceId, String } + /** + * Represents a reaction from an external data source mapped to the BiGG database, including its compartment details. + * "Foreign" in this context refers to the origin of the reaction data from a source outside of the primary BiGG database schema, + * typically involving cross-referencing with external databases or data sources. + */ public static class ForeignReaction { - public final String reactionId; - public final String compartmentId; - public final String compartmentName; - + public final String reactionId; // The BiGG ID of the reaction. + public final String compartmentId; // The BiGG ID of the compartment. + public final String compartmentName; // The name of the compartment. + + /** + * Constructs a new ForeignReaction instance. + * + * @param reactionId The BiGG ID of the reaction. + * @param compartmentId The BiGG ID of the compartment. + * @param compartmentName The name of the compartment. + */ public ForeignReaction(String reactionId, String compartmentId, String compartmentName) { this.reactionId = reactionId; this.compartmentId = compartmentId; @@ -714,14 +837,19 @@ public ForeignReaction(String reactionId, String compartmentId, String compartme } /** - * @param synonym - * @param dataSourceId - * @return String + * Retrieves a collection of ForeignReaction objects for a given synonym and data source ID. + * This method queries the database to find reactions and their associated compartment details + * based on the provided synonym and data source ID. The term "foreign" indicates that the reactions + * are identified using external data sources, which are then mapped to corresponding entities in the BiGG database. + * + * @param dataSourceId The ID of the data source where the synonym is registered. + * @param synonym The synonym used to identify the reaction in the data source. + * @return A collection of ForeignReaction objects containing the reaction and compartment details. */ - public static Collection getBiggIdsForReactionForeignId(String dataSourceId, - String synonym) { + public static Collection getBiggIdsForReactionForeignId(String dataSourceId, String synonym) { Set results = new HashSet<>(); + // SQL query to fetch reaction and compartment details var query = "SELECT R.BIGG_ID AS REACTION_BIGG_ID, " + "C.BIGG_ID AS COMPARTMENT_BIGG_ID, " + "C.NAME AS COMPARTMENT_NAME " diff --git a/src/main/java/edu/ucsd/sbrg/db/DBConfig.java b/src/main/java/edu/ucsd/sbrg/db/DBConfig.java index b832825d..b3405323 100644 --- a/src/main/java/edu/ucsd/sbrg/db/DBConfig.java +++ b/src/main/java/edu/ucsd/sbrg/db/DBConfig.java @@ -2,21 +2,33 @@ import de.zbit.util.prefs.SBProperties; + +/** + * This class provides configuration and initialization methods for database connections. + * It supports operations for two specific databases: ADB and BiGG. + * It includes methods to initialize these databases based on command line arguments + * and to check if the necessary parameters are provided before establishing connections. + */ public class DBConfig { + /** + * Initializes the ADB database if the conditions are met. + * It checks if annotation with ADB is required and if ADB is not already in use. + * + * @param args Command line arguments provided for database configuration. + * @param annotateWithADB Flag indicating whether to annotate with ADB. + */ public static void initADB(SBProperties args, boolean annotateWithADB) { - if (annotateWithADB && ! AnnotateDB.inUse()) { + if (annotateWithADB && !AnnotateDB.inUse()) { initADB(args); } } - /** - * Sets DB to use, depending on provided arguments: - * If annotateWithBigg is true and all arguments are provided, PostgreSQL is used + * Private helper method to initialize the ADB database connection. + * It retrieves database connection parameters from the provided arguments and initializes the connection if all parameters are valid. * - * @param args: - * Arguments from Commandline + * @param args Command line arguments containing database connection parameters. */ private static void initADB(SBProperties args) { String name = args.getProperty(ADBOptions.DBNAME); @@ -33,19 +45,24 @@ private static void initADB(SBProperties args) { } } - + /** + * Initializes the BiGG database if the conditions are met. + * It checks if annotation with BiGG is required and if BiGG is not already in use. + * + * @param args Command line arguments provided for database configuration. + * @param annotateWithBiGG Flag indicating whether to annotate with BiGG. + */ public static void initBiGG(SBProperties args, boolean annotateWithBiGG) { if (annotateWithBiGG && !BiGGDB.inUse()) { initBiGG(args); } } - /** - * If annotateWithBigg is true and all arguments are provided, connection is established, else + * Private helper method to initialize the BiGG database connection. + * It retrieves database connection parameters from the provided arguments and initializes the connection if all parameters are valid. * - * @param args: - * Arguments from Commandline + * @param args Command line arguments containing database connection parameters. */ private static void initBiGG(SBProperties args) { String name = args.getProperty(BiGGDBOptions.DBNAME); @@ -62,10 +79,11 @@ private static void initBiGG(SBProperties args) { } } - /** - * @param string - * @return + * Utility method to check if a string is neither null nor empty. + * + * @param string The string to check. + * @return true if the string is not null and not empty, false otherwise. */ private static boolean iStrNotNullOrEmpty(String string) { return !(string == null || string.isEmpty()); diff --git a/src/main/java/edu/ucsd/sbrg/miriam/CompactEntry.java b/src/main/java/edu/ucsd/sbrg/miriam/CompactEntry.java index 5ea20ec7..084a450f 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/CompactEntry.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/CompactEntry.java @@ -13,7 +13,7 @@ public class CompactEntry { private final String prefix; private final String sampleId; private final boolean deprecated; - private final boolean namespaceEmbeddedInLui; + private final boolean namespaceEmbeddedInLui; // LUI = Local Unique Identifiers private final List resources; private CompactEntry(Namespace namespace) { @@ -68,6 +68,11 @@ public boolean isDeprecated() { } + /** + * Checks if the namespace is embedded in the locally unique identifier (LUI). + * + * @return {@code true} if the namespace is embedded in the LUI, {@code false} otherwise. + */ public boolean isNamespaceEmbeddedInLui() { return namespaceEmbeddedInLui; } diff --git a/src/main/java/edu/ucsd/sbrg/miriam/CompactResource.java b/src/main/java/edu/ucsd/sbrg/miriam/CompactResource.java index ec213261..fa4e23eb 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/CompactResource.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/CompactResource.java @@ -2,15 +2,49 @@ import edu.ucsd.sbrg.miriam.models.Resource; +/** + * Represents a compact version of a Resource with essential fields only. + * This class is used to store and manage a minimal set of data attributes + * derived from a Resource object, which are necessary for specific operations + * or functionalities within the application. + */ public class CompactResource { + /** + * Unique identifier for the resource. + */ private final long id; + + /** + * Code representing the provider of the resource. + */ private final String providerCode; + + /** + * Sample identifier associated with the resource. + */ private final String sampleId; + + /** + * URL pattern that can be used to access the resource online. + */ private final String urlPattern; + + /** + * Flag indicating whether the resource is deprecated. + */ private final boolean deprecated; + + /** + * Flag indicating whether the resource is officially recognized or supported. + */ private final boolean official; + /** + * Private constructor that initializes a CompactResource object using a Resource instance. + * + * @param resource The Resource object from which to extract properties. + */ private CompactResource(Resource resource) { id = resource.getId(); providerCode = resource.getProviderCode(); @@ -20,37 +54,66 @@ private CompactResource(Resource resource) { official = resource.isOfficial(); } - + /** + * Factory method to create a CompactResource instance from a Resource object. + * + * @param resource The Resource object to convert. + * @return A new instance of CompactResource containing the essential fields from the given Resource. + */ public static CompactResource fromResource(Resource resource) { return new CompactResource(resource); } - + /** + * Gets the unique identifier for this resource. + * + * @return The unique identifier. + */ public long getId() { return id; } - + /** + * Gets the provider code of this resource. + * + * @return The provider code. + */ public String getProviderCode() { return providerCode; } - + /** + * Gets the sample identifier of this resource. + * + * @return The sample identifier. + */ public String getSampleId() { return sampleId; } - + /** + * Gets the URL pattern of this resource. + * + * @return The URL pattern. + */ public String getUrlPattern() { return urlPattern; } - + /** + * Checks if this resource is deprecated. + * + * @return True if the resource is deprecated, otherwise false. + */ public boolean isDeprecated() { return deprecated; } - + /** + * Checks if this resource is officially recognized or supported. + * + * @return True if the resource is official, otherwise false. + */ public boolean isOfficial() { return official; } diff --git a/src/main/java/edu/ucsd/sbrg/miriam/Entries.java b/src/main/java/edu/ucsd/sbrg/miriam/Entries.java index d060c1bf..4c40199c 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/Entries.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/Entries.java @@ -13,6 +13,16 @@ import de.zbit.util.ResourceManager; + +/** + * The singleton {@code Entries} class serves as a central repository for managing and querying MIRIAM namespaces and their associated resources. + * This class provides functionalityto initialize from a list of compact entries, + * retrieve collections based on URLs or provider codes, and perform pattern matching to resolve URLs to specific MIRIAM collections. + * + * MIRIAM namespaces are used to standardize the annotation of biological models, ensuring that each annotated element + * is described using a consistent and recognizable format. This class helps in managing these namespaces and provides + * methods to query and retrieve information about them. + */ public class Entries { /** @@ -35,7 +45,7 @@ static class Root { /** * Container for reduced representation tree structure. - * Each children corresponds to a MIRIAM namespace + * Each child corresponds to a MIRIAM namespace * * @param entries * entries to convert into children {@link Namespace}s @@ -124,29 +134,32 @@ public static Entries getInstance() { /** - * Tries to match a query URL uniquely to retrieve the correct MIRIAM collection for additional annotation + * Attempts to find a unique match for a given URL within the MIRIAM collections by comparing against namespace + * and resource URL patterns. This method is used to determine the correct MIRIAM collection for additional annotation. * - * @param query - * URL to match against identifiers.org namespace and resource URLPatterns - * @return Empty optional if either no match is found or the match is not unique (a message is logged in the second - * case), else the Optional contains the match + * @param query The URL to be matched against the identifiers.org namespace and resource URL patterns. + * @return A list of {@link Node} objects that match the query URL. If no matches are found, the list will be empty. + * If more than one match is found, a log message is generated indicating the URL could not be uniquely resolved. */ public List getMatchForUrl(String query) { - // strip protocol from query URL for better matching + // Remove the protocol part of the URL to improve matching accuracy if (query.startsWith("http://") || query.startsWith("https://")) { query = query.replaceAll("^https?://", ""); } List matches = new ArrayList<>(); + // Iterate over all namespaces to find matches for (Node namespace : root.children) { if (namespace.isMatch(query)) { matches.add(namespace); } + // Iterate over all resources within the namespace to find matches for (Node resource : ((Namespace) namespace).getLeaves()) { if (resource.isMatch(query)) { matches.add(resource); } } } + // Log a message if more than one match is found, indicating non-uniqueness if (matches.size() > 1) { logger.info(format("Could not resolve MIRIAM collection for URL {0} uniquely", query)); } @@ -155,32 +168,48 @@ public List getMatchForUrl(String query) { /** - * Retrieve provider code based on collection name + * Retrieves the provider code associated with a given collection name from the {@code PROVIDER_FOR_COLLECTION} map. + * In the context of MIRIAM, a provider refers to an entity + * or database that supplies the data for a specific collection. This method returns the provider code, which is a unique + * identifier for the provider associated with the specified collection. If the collection name does not have an associated + * provider code, an empty string is returned. * - * @param collection - * collection name used as key in lookup - * @return provider code, if present, else an empty {@link String} + * @param collection The name of the collection for which the provider code is to be retrieved. + * @return The provider code as a String, or an empty string if no provider code is associated with the collection. */ public String getProviderForCollection(String collection) { return PROVIDER_FOR_COLLECTION.getOrDefault(collection, ""); } + /** + * Retrieves the regex pattern associated with a given collection name. + * If the collection name does not have an associated pattern, an empty string is returned. + * + * @param collection The name of the collection for which the pattern is to be retrieved. + * @return The regex pattern as a String, or an empty string if no pattern is associated with the collection. + */ public String getPattern(String collection) { return PATTERN_FOR_COLLECTION.getOrDefault(collection, ""); } /** - * {@link String#replaceAll(String, String)} unescapes second string, i.e. it is not usable to insert a regex into a - * string - * This static method replaces the id placeholder tag with the correct regex for matching + * Replaces the identifier placeholder "{$id}" in a URL pattern with a specified regex pattern. + * This method is designed to facilitate the matching of URLs against a dynamic regex pattern that represents + * an identifier within a MIRIAM namespace or its child resources. * - * @param url - * URL Pattern for MIRIAM namespace or child thereof - * @param pattern - * Identifier RegEx for matching - * @return url with id placeholder tag replaced with pattern + * The method first attempts to find the "{$id}" placeholder within the provided URL. If found, it splits the URL + * around this placeholder and reassembles it with the given regex pattern in place of the placeholder. If the + * placeholder is not found, the URL is returned as is, but quoted to ensure it is treated as a literal string in regex + * operations. + * + * Note: The placeholder "{$id}" can optionally be surrounded by curly braces, which are considered during the + * replacement but do not affect the functionality. + * + * @param url The URL pattern containing the "{$id}" placeholder. This pattern represents a MIRIAM namespace or a related child namespace. + * @param pattern The regex pattern that should replace the "{$id}" placeholder in the URL pattern. + * @return A string representing the URL with the "{$id}" placeholder replaced by the provided regex pattern. If no placeholder is found, the URL is returned unchanged but quoted. */ static String replaceIdTag(String url, String pattern) { Pattern id = Pattern.compile("\\{?\\{\\$id}}?"); @@ -198,16 +227,15 @@ static String replaceIdTag(String url, String pattern) { /** - * {@link String#replaceAll(String, String)} unescapes second string, i.e. it is not usable to insert a regex into a - * string - * This static method replaces the id placeholder tag with the correct regex for matching also dealing with namespace - * prefixes in the RegEx pattern + * Replaces the identifier placeholder in a URL pattern with a specified regex pattern, considering a namespace prefix. + * This method is useful when the URL pattern contains placeholders that need to be dynamically replaced with + * identifier patterns for pattern matching operations. The method also handles namespace prefixes which are part of + * the placeholder in the URL. * - * @param url - * URL Pattern for MIRIAM namespace or child thereof - * @param pattern - * Identifier RegEx for matching - * @return url with id placeholder tag replaced with pattern + * @param url The URL pattern containing the placeholder for the identifier. This pattern represents a MIRIAM namespace or a related child namespace. + * @param pattern The regex pattern that should replace the identifier placeholder in the URL pattern. + * @param prefix The namespace prefix that might precede the identifier placeholder in the URL pattern. + * @return A string representing the URL with the identifier placeholder replaced by the provided regex pattern. If no placeholder is found, the URL is returned unchanged. */ static String replaceIdTagWithPrefix(String url, String pattern, String prefix) { Pattern id = Pattern.compile(prefix + "\\{?\\{\\$id}}?"); @@ -225,7 +253,11 @@ static String replaceIdTagWithPrefix(String url, String pattern, String prefix) /** - * @return number of namespaces + * Retrieves the number of namespaces managed by this Entries instance. + * This method accesses the 'children' list of the 'root' object, which contains + * all the Namespace instances, and returns its size. + * + * @return The total count of Namespace instances contained in the 'children' list of 'root'. */ public int size() { return root.children.size(); diff --git a/src/main/java/edu/ucsd/sbrg/miriam/Namespace.java b/src/main/java/edu/ucsd/sbrg/miriam/Namespace.java index 5be65348..9dbbbf06 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/Namespace.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/Namespace.java @@ -7,8 +7,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; + /** - * Corresponds to a MIRIAM namespace entry, reduced to data needed by ModelPolisher + * Represents a MIRIAM namespace entry, encapsulating the essential data required by ModelPolisher. + * A namespace in MIRIAM is a collection of identifiers under a common context, typically representing + * a specific database or a type of biological entity. This class provides methods to manage and interact + * with these identifiers, including pattern matching, URL resolution, and checking deprecation status. */ class Namespace implements Node { @@ -29,6 +33,8 @@ class Namespace implements Node { /** + * Retrieves the list of resources associated with this namespace. + * * @return List of resources for a namespace */ public List getLeaves() { @@ -54,11 +60,16 @@ public String getSampleId() { } + /** + * Determines if the namespace is embedded within the locally unique identifier (LUI). + * + * @return {@code true} if the namespace is embedded in the LUI, {@code false} otherwise. + */ public boolean isNamespaceEmbeddedInLui() { return entry.isNamespaceEmbeddedInLui(); } - + public boolean matchesPattern(String query) { return Pattern.compile(getPattern()).matcher(query).matches(); } @@ -91,15 +102,25 @@ public String getURLWithPattern() { return "identifiers.org/" + getPrefix() + "/" + idPattern; } - + /** + * Resolves the full URL for a given identifier based on whether the namespace is embedded in the LUI. + * If the namespace is embedded, it extracts the prefix pattern from the namespace pattern and constructs the URL. + * If the namespace is not embedded, it directly appends the identifier to the base URL. + * + * @param id The identifier to be resolved. + * @return The fully resolved URL as a String. + * @throws IllegalStateException if the prefix cannot be extracted when expected. + */ @Override public String resolveID(String id) { if (isNamespaceEmbeddedInLui()) { + // Compile a pattern to extract the prefix from the namespace pattern. Pattern prefixPattern = Pattern.compile("\\(?[\\w\\\\]+?[:]\\)?"); + // Quickfix for escaped colon in CCO and possibly others Matcher prefixMatcher = prefixPattern.matcher(getPattern().replaceAll("\\^|\\$", "")); if (prefixMatcher.find()) { String pattern = prefixMatcher.group(); - // Quickfix for escaped colon in CCO and possibly others + // Handle the special case where the colon might be escaped in the pattern. if (pattern.endsWith("\\:")) { pattern = pattern.substring(0, pattern.length() - 2) + ":"; } @@ -107,6 +128,7 @@ public String resolveID(String id) { } throw new IllegalStateException("Could not extract prefix, this should not happen."); } else { + // Construct the URL directly if the namespace is not embedded in the LUI. return "https://identifiers.org/" + getPrefix() + "/" + id; } } diff --git a/src/main/java/edu/ucsd/sbrg/miriam/Registry.java b/src/main/java/edu/ucsd/sbrg/miriam/Registry.java index abddb585..4fb5bea4 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/Registry.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/Registry.java @@ -20,6 +20,14 @@ import edu.ucsd.sbrg.bigg.BiGGId; import edu.ucsd.sbrg.miriam.models.Miriam; +/** + * The {@code Registry} class serves as a central hub for managing and processing identifiers related to the MIRIAM registry. + * MIRIAM is a standard for annotating computational models in biology with machine-readable information. + * + * This class provides static methods and utilities to handle, validate, + * and correct resource URLs based on the MIRIAM standards. It ensures that identifiers and URLs conform to recognized formats and corrects common errors in identifiers from various + * biological databases. The class also initializes necessary resources and configurations at the start through a static block. + */ public class Registry { /** @@ -31,38 +39,66 @@ public class Registry { */ private static final transient ResourceBundle MESSAGES = ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); /* - * Static initializer for Miriam, read registry once and convert to compact representation + * Static initializer block for the Registry class. + * This block is executed once when the Registry class is loaded. + * It performs the following operations: + * 1. Retrieves an instance of Miriam from the RegistryProvider. + * 2. Converts the namespaces fetched from Miriam into a list of CompactEntry objects. + * This conversion only retains essential information needed for the ModelPolisher, optimizing memory usage. + * 3. Initializes the Entries class with this list of CompactEntry objects. + * 4. Closes the RegistryProvider to free up resources. */ static { + // Fetch the Miriam instance from the RegistryProvider Miriam miriam = RegistryProvider.getInstance().getMiriam(); - // convert namespaces to CompactEntries, holding only the necessary information for ModelPolisher + // Convert the namespaces to CompactEntries and initialize Entries Entries.initFromList( miriam.getNamespaces().values().parallelStream().map(CompactEntry::fromNamespace).collect(Collectors.toList())); - // Close unneeded resources + // Close the RegistryProvider to release resources RegistryProvider.close(); } + /** + * Checks and processes a given resource URL to ensure it conforms to expected formats and corrections. + * This method handles specific cases such as URLs containing "omim", "ncbigi", and "reactome". + * It also processes general identifiers.org URLs and other alternative formats. + * + * @param resource The URL to be checked and potentially modified. + * @return An {@link Optional} containing the processed URL if valid, or empty if the URL should be skipped. + */ public static Optional checkResourceUrl(String resource) { - // remove trailing whitespaces + // Remove trailing whitespaces from the URL resource = resource.stripTrailing(); + + // Check if the URL starts with the OMIM prefix which is known to be invalid if (resource.startsWith("https://identifiers.org/omim")) { - // omim is present in BiGGDB, but is not valid, skip + // OMIM is present in BiGGDB, but is not valid, skip return Optional.empty(); } - // no longer supported by identifiers.org, but should still resolve, keep and fix missing id prefix + + // Handle URLs containing the NCBI GI identifier which needs prefix correction if (resource.contains("ncbigi")) { return Optional.of(handleNCBIGI(resource)); } + + // Correct the Reactome ID if necessary if (resource.contains("reactome")) { resource = fixReactomeId(resource); } + + // Compile a pattern to match identifiers.org URLs Pattern identifiersURL = Pattern.compile("(?:https?://)?identifiers.org/(?:(?.*?)/)?(?.*)"); Matcher urlMatcher = identifiersURL.matcher(resource); + + // Check if the URL matches the identifiers.org pattern and handle accordingly if (urlMatcher.matches()) { resource = handleIdentifiersURL(resource, urlMatcher); } else { + // Handle alternative URL formats that do not match identifiers.org pattern resource = handleAlternativeURL(resource); } + + // Return the processed URL if it is not null, otherwise return an empty Optional if (resource != null) { return Optional.of(resource); } else { @@ -72,9 +108,12 @@ public static Optional checkResourceUrl(String resource) { /** - * @param resource - * annotation URL - * @return resource with corrected NCBI GI id + * Processes a resource URL to correct the NCBI GI identifier by ensuring it is properly prefixed. + * If the identifier is found to start with "gi:", it is corrected to "GI:". If the identifier lacks + * any prefix, "GI:" is prepended to it. + * + * @param resource The URL containing the NCBI GI identifier. + * @return The URL with the corrected NCBI GI identifier. */ private static String handleNCBIGI(String resource) { String[] split = resource.split("/"); @@ -91,97 +130,120 @@ private static String handleNCBIGI(String resource) { /** - * @param resource - * annotation url - * @return resource with corrected reactome id + * Corrects the Reactome ID in the given resource URL if it follows the pattern "R-ALL-REACT_". + * This method checks if the resource contains the specified pattern and attempts to correct it by + * ensuring it starts with "R-ALL-" followed by digits. If the pattern is not found or if the ID + * after "R-ALL-REACT_" is missing, the original resource URL is returned unchanged. + * + * @param resource The URL containing the Reactome ID to be corrected. + * @return The URL with the corrected Reactome ID, or the original URL if no correction was needed. */ private static String fixReactomeId(String resource) { String identifier = ""; if (resource.contains("R-ALL-REACT_")) { - String[] splits = identifier.split("R-ALL-REACT_"); - if(splits.length !=2){ - // missing id, is handled down the line, just return original resource here + String[] splits = resource.split("R-ALL-REACT_"); + if(splits.length != 2){ + // If the ID is missing after "R-ALL-REACT_", return the original resource return resource; } identifier = splits[1]; if (Character.isDigit(identifier.charAt(0))) { + // Ensure the identifier starts with "R-ALL-" followed by the digits identifier = "R-ALL-" + identifier; } + // Replace the faulty part of the URL with the corrected identifier return resource.replaceAll("R-ALL-REACT_.*$", identifier); } + // Return the original resource if it does not contain the "R-ALL-REACT_" pattern return resource; } /** - * Create and check identifiers.org URLs + * Processes an identifiers.org URL to ensure it conforms to the expected format and corrects common errors in the + * provider code or identifier. This method uses a {@link Matcher} to extract the provider and identifier from the URL. + * It then validates and possibly corrects the provider code, checks the identifier against a known pattern for the provider, + * and reconstructs the URL if necessary. * - * @param resource - * identifiers.org URL to be checked - * @param urlMatcher - * {@link Matcher} used to verify valid identifiers.org URL used to extract provider and and id - * @return Valid identifiers.org URL, uncorrected URL if collection is unknown or {@code null}, if id can not be - * corrected + * @param resource The identifiers.org URL to be checked and corrected. + * @param urlMatcher A {@link Matcher} used to extract the 'provider' and 'id' from the URL. + * @return A corrected identifiers.org URL if possible, the original URL if the provider's collection is unknown, + * or {@code null} if the identifier cannot be corrected. */ public static String handleIdentifiersURL(String resource, Matcher urlMatcher) { + // Extract provider and identifier using the Matcher String provider = urlMatcher.group("provider"); provider = provider == null ? "" : provider; String identifier = urlMatcher.group("id"); - // handle case mismatch in provider code + + // Normalize provider code to lowercase if it is in uppercase if (provider.matches("[A-Z]+")) { provider = provider.toLowerCase(); } + + // Singleton instance of Entries to access collections and providers Entries entries = Entries.getInstance(); - // Get provider by checking for uniquely matching Regex + + // Attempt to find a matching provider if the extracted one is empty if (provider.isEmpty()) { Node collection = entries.getCollection(resource); if (collection != null) { provider = entries.getProviderForCollection(collection.getName()); } } - // handle cases where provider and collection name have been mixed up + + // Correct cases where provider and collection names might have been confused if (entries.getCollectionForProvider(provider).isEmpty() && !entries.getProviderForCollection(provider).isEmpty()) { provider = entries.getProviderForCollection(provider); } else if (entries.getCollectionForProvider(provider).isEmpty()) { logger.severe(format(MESSAGES.getString("UNCAUGHT_URI"), resource)); return resource; } + + // Reconstruct the URL with the possibly corrected provider resource = createURI(provider, identifier); String collection = entries.getCollectionForProvider(provider); String regexp = entries.getPattern(collection); + + // Validate the identifier against the expected pattern boolean correct = checkPattern(identifier, regexp); String report_resource = resource; if (!correct) { logger.info(format(MESSAGES.getString("PATTERN_MISMATCH_INFO"), identifier, regexp, collection)); resource = fixResource(resource, identifier); } + + // Log and handle cases where the URL could not be corrected if (resource == null) { logger.warning(format(MESSAGES.getString("CORRECTION_FAILED_DROP"), report_resource, collection)); } else { logger.finer(format("Added resource {0}", resource)); } + return resource; } /** - * Retrieve identifiers.org URL from non identifiers.org URL, if possible + * Attempts to convert a non-identifiers.org URL into a corresponding identifiers.org URL by extracting + * the identifier from the original URL and matching it to a known collection in the MIRIAM registry. + * If a matching collection is found and an identifier is successfully extracted, the method constructs + * and returns an identifiers.org URL using the provider associated with the collection. If no matching + * collection is found or the identifier cannot be extracted, the original URL is returned. * - * @param resource - * non identifiers.org URL - * @return identifiers.org URL, if retrieval was possible, else original resource + * @param resource The original non-identifiers.org URL to be converted. + * @return A valid identifiers.org URL if possible, otherwise the original URL. */ public static String handleAlternativeURL(String resource) { Entries entries = Entries.getInstance(); Node collection = entries.getCollection(resource); if (collection == null) { - // can't retrieve a corresponding identifiers.org URL in this case, so return the original URL instead - // Some fixing might be possible, however this is not implemented for now, as non identifiers URL identifier - // extraction without knowing the proper collection is not trivial + // No corresponding collection found in the registry, return the original URL return resource; } Optional identifier = collection.extractId(resource); if (identifier.isPresent()) { + // A valid identifier was extracted, construct the identifiers.org URL String provider = entries.getProviderForCollection(collection.getName()); resource = createURI(provider, identifier); } @@ -210,9 +272,14 @@ public static String createShortURI(Object id) { /** - * @param resource - * @param identifier - * @return + * Attempts to correct a given resource URL based on its identifier. This method handles various cases + * specific to different databases like ChEBI, EC-Code, GO, HMDB, KEGG, Reactome, RefSeq, and Rhea. + * If the identifier is empty or if no specific conditions are met for the databases, the method returns null. + * Otherwise, it returns the corrected resource URL. + * + * @param resource The original resource URL that may need correction. + * @param identifier The identifier that may dictate the specific corrections needed. + * @return The corrected resource URL, or null if no correction was possible. */ private static String fixResource(String resource, String identifier) { if(identifier.isEmpty()){ @@ -253,9 +320,13 @@ private static String fixResource(String resource, String identifier) { /** - * @param resource - * @param identifier - * @return + * Adjusts the ChEBI identifier in the resource string by ensuring it is correctly prefixed with "CHEBI:". + * If the identifier consists solely of digits, it is prefixed with "CHEBI:" and the resource string is updated. + * This method logs the addition of the prefix. + * + * @param resource The original resource string that may contain the ChEBI identifier. + * @param identifier The ChEBI identifier that may need to be prefixed. + * @return The updated resource string with the correctly prefixed ChEBI identifier, or the original resource if no change was needed. */ private static String fixChEBI(String resource, String identifier) { if (Pattern.compile("\\d+").matcher(identifier).matches()) { @@ -278,9 +349,13 @@ public static String replace(String resource, String pattern, String replacement /** - * @param resource - * @param identifier - * @return + * Adjusts an EC (Enzyme Commission) code in the resource string by ensuring it has the correct number of segments. + * EC codes should have four parts separated by three dots (e.g., 1.2.3.4). If the provided identifier has fewer than + * three dots, this method appends the necessary number of ".-" to make up the difference. + * + * @param resource The original resource string that may contain the EC code. + * @param identifier The EC code that may be incomplete. + * @return The updated resource string with the corrected EC code, or null if the identifier has no dots. */ private static String fixECCode(String resource, String identifier) { int missingDots = identifier.length() - identifier.replace(".", "").length(); @@ -291,11 +366,13 @@ private static String fixECCode(String resource, String identifier) { return replace(resource, identifier, identifier + ".-".repeat(Math.max(0, 3 - missingDots))); } - /** - * @param resource - * @param identifier - * @return + * Ensures that the given identifier for a Gene Ontology (GO) term is correctly prefixed with "GO:" if it is not already. + * If the identifier lacks the prefix, it is added, and the resource string is updated to include this prefixed identifier. + * + * @param resource The original resource string that may contain the unprefixed identifier. + * @param identifier The GO identifier that may or may not start with "GO:". + * @return The updated resource string with the identifier correctly prefixed, or the original resource if no change was needed. */ private static String fixGO(String resource, String identifier) { if (!identifier.toLowerCase().startsWith("go:")) { @@ -307,12 +384,19 @@ private static String fixGO(String resource, String identifier) { /** - * @param resource - * @param identifier - * @return + * Adjusts the KEGG collection type in the resource string based on the identifier provided. + * This method first normalizes the identifier by capitalizing the first letter. It then checks + * the starting character of the identifier to determine the specific KEGG collection type. + * If the identifier starts with 'D', it is associated with "kegg.drug", and if it starts with 'G', + * it is associated with "kegg.glycan". The resource string is updated to reflect the correct + * collection type. + * + * @param resource The original resource string that may contain an incorrect KEGG collection type. + * @param identifier The identifier that determines the correct KEGG collection type. + * @return The updated resource string with the correct KEGG collection type. */ private static String fixKEGGCollection(String resource, String identifier) { - // identifiers with case mismatch exist, correct them + // Correct case mismatch in identifiers char first = identifier.charAt(0); identifier = Character.toUpperCase(first) + identifier.substring(1); if (identifier.startsWith("D")) { @@ -327,9 +411,17 @@ private static String fixKEGGCollection(String resource, String identifier) { /** - * @param resource - * @param identifier - * @return + * Adjusts and validates a Reactome identifier, then constructs a URI if the identifier is valid. + * + * This method processes the given identifier to ensure it conforms to expected Reactome formatting. + * If the identifier starts with "R-ALL-REACT_", it strips this prefix and uses the remaining part. + * If the identifier starts with a digit after any initial processing, it prepends "R-ALL-" to it. + * Finally, it checks if the processed identifier matches the Reactome pattern. If it does, it creates + * and returns a URI using the 'reactome' namespace; otherwise, it returns null. + * + * @param resource The original resource string (not directly used in the current implementation). + * @param identifier The identifier that needs to be processed and validated. + * @return A string representing the URI if the identifier is valid, otherwise null. */ private static String fixReactome(String resource, String identifier) { if (identifier.startsWith("R-ALL-REACT_")) { @@ -347,10 +439,12 @@ private static String fixReactome(String resource, String identifier) { /** - * Run all object annotations to possibly fix IDs and/or obtain additional identifiers.org URL + * Processes the annotations of an SBML entity to potentially correct identifiers and/or retrieve additional identifiers.org URLs. + * This method iterates over all Controlled Vocabulary (CV) Terms in the provided Annotation object. For each resource URL in a CV Term, + * it checks and possibly corrects the URL or adds new URLs from identifiers.org. It then updates the CV Term with the corrected and/or + * additional URLs. * - * @param annotation - * {@link Annotation} present on an SBML entity to process + * @param annotation The {@link Annotation} object associated with an SBML entity that contains CV Terms to be processed. */ public static void processResources(Annotation annotation) { for (CVTerm term : annotation.getListOfCVTerms()) { @@ -358,39 +452,42 @@ public static void processResources(Annotation annotation) { for (String resource : term.getResources()) { Optional checkedResource = Registry.checkResourceUrl(resource); if (checkedResource.isEmpty()) { - // could not verify, keep annotation for now + // The resource URL could not be verified, so it is retained as is. resources.add(resource); } else { String newResource = checkedResource.get(); if (newResource.equals(resource)) { - // no changes + // The resource URL is correct and requires no changes. resources.add(resource); } else if (newResource.contains("identifiers.org") && !resource.contains("identifiers.org")) { - // identifiers.org URL was obtained + // A new identifiers.org URL has been obtained, add both the original and new URL. resources.add(resource); resources.add(newResource); } else { - // some errors were corrected + // Corrections were made to the resource URL. resources.add(newResource); } } } - // remove old resources + // Remove all existing resources from the CV Term. for (int i = 0; i < term.getResourceCount(); i++) { term.removeResource(i); } - // add fixed/additional resources + // Add the updated set of resources, sorted alphabetically, back to the CV Term. term.addResources(resources.stream().sorted().toArray(String[]::new)); } } /** - * Extracts provider and id from identifiers.org url + * Extracts the provider code and identifier from a valid identifiers.org URL. + * This method parses the URL to separate the namespace prefix and the specific identifier. + * It uses a regular expression to capture these parts from the URL. * - * @param url - * vallid identifiers.org URL - * @return provider code and id + * @param url A valid identifiers.org URL from which the provider code and identifier are to be extracted. + * @return A list containing the provider code and identifier, if the URL is valid and matches the expected format. + * The list will contain the provider code as the first element and the identifier as the second element. + * If the URL does not match the expected format, the returned list will be empty. */ public static List getPartsFromIdentifiersURI(String url) { List parts = new ArrayList<>(); diff --git a/src/main/java/edu/ucsd/sbrg/miriam/RegistryParser.java b/src/main/java/edu/ucsd/sbrg/miriam/RegistryParser.java index 4c3033e0..1c2a7a40 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/RegistryParser.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/RegistryParser.java @@ -12,18 +12,40 @@ import edu.ucsd.sbrg.miriam.models.Namespace; import edu.ucsd.sbrg.miriam.models.Root; +/** + * The {@code RegistryParser} class is a singleton that provides functionality to parse the MIRIAM registry + * from a JSON file and convert it into a {@code Miriam} object. This class ensures that only one instance + * of the parser is created and used throughout the application. + */ public class RegistryParser { private static final Logger logger = Logger.getLogger(RegistryParser.class.getName()); + + /** + * Singleton instance of {@code RegistryParser}. + */ private static RegistryParser parser; + + /** + * InputStream to read the MIRIAM registry JSON file. + */ private static InputStream registry; + /** + * Private constructor to prevent instantiation from outside this class. + * Initializes the InputStream for the MIRIAM registry JSON file. + */ private RegistryParser() { super(); registry = RegistryParser.class.getResourceAsStream("IdentifiersOrg-Registry.json"); } - + /** + * Provides the singleton instance of {@code RegistryParser}. + * If the instance is not already created, it initializes a new one. + * + * @return The singleton instance of {@code RegistryParser}. + */ public static RegistryParser getInstance() { if (parser == null) { parser = new RegistryParser(); @@ -31,7 +53,13 @@ public static RegistryParser getInstance() { return parser; } - + /** + * Parses the MIRIAM registry JSON file into a {@code Miriam} object. + * It reads the JSON structure, extracts namespaces, and maps them by their prefixes. + * + * @return A {@code Miriam} object initialized with the parsed namespaces. + * @throws IOException If there is an error reading the JSON file. + */ Miriam parse() throws IOException { logger.fine("Parsing MIRIAM registry"); ObjectMapper mapper = new ObjectMapper(); diff --git a/src/main/java/edu/ucsd/sbrg/miriam/RegistryProvider.java b/src/main/java/edu/ucsd/sbrg/miriam/RegistryProvider.java index 1a1df840..cb370045 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/RegistryProvider.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/RegistryProvider.java @@ -10,7 +10,9 @@ public class RegistryProvider { private static Miriam miriam; /** - * + * Private constructor for the singleton RegistryProvider class. + * It initializes the Miriam instance by parsing data using the RegistryParser. + * If an IOException occurs during parsing, it prints the stack trace. */ private RegistryProvider() { super(); @@ -21,9 +23,11 @@ private RegistryProvider() { } } - /** - * @return + * Provides access to the singleton instance of RegistryProvider. + * If the instance does not exist, it creates a new one. + * + * @return The singleton instance of RegistryProvider. */ public static RegistryProvider getInstance() { if (provider == null) { @@ -32,18 +36,19 @@ public static RegistryProvider getInstance() { return provider; } - /** - * + * Resets the singleton instance of RegistryProvider and its associated Miriam instance to null. + * This method can be used to release resources or reinitialize the instances. */ public static void close() { provider = null; miriam = null; } - /** - * @return + * Retrieves the current Miriam instance. + * + * @return The current Miriam instance. */ public Miriam getMiriam() { return miriam; diff --git a/src/main/java/edu/ucsd/sbrg/miriam/models/Miriam.java b/src/main/java/edu/ucsd/sbrg/miriam/models/Miriam.java index 59ebbebf..ef95769f 100644 --- a/src/main/java/edu/ucsd/sbrg/miriam/models/Miriam.java +++ b/src/main/java/edu/ucsd/sbrg/miriam/models/Miriam.java @@ -2,29 +2,56 @@ import java.util.Map; +/** + * The {@code Miriam} class is a singleton that manages a collection of namespaces. + * It provides a centralized access point to namespaces mapped by their prefixes. + * This class ensures that only one instance of itself is created and used throughout the application. + */ public class Miriam { + /** + * The singleton instance of {@code Miriam}. + */ private static final Miriam miriam = new Miriam(); + + /** + * A map of namespace prefixes to their corresponding {@code Namespace} objects. + */ private static Map namespaces; private Miriam() { super(); } - + /** + * Initializes the singleton instance with a map of namespaces. + * + * @param namespaces A map of namespace prefixes to their corresponding {@code Namespace} objects. + * @return The singleton instance of {@code Miriam}. + */ public static Miriam initFrom(Map namespaces) { Miriam.namespaces = namespaces; return miriam; } - + /** + * Provides the singleton instance of {@code Miriam}. + * + * @return The singleton instance of {@code Miriam}. + * @throws IllegalStateException if the singleton instance has not been initialized. + */ public Miriam getInstance() { if (miriam == null) { - throw new IllegalStateException(); + throw new IllegalStateException("Instance not initialized."); } return miriam; } + /** + * Retrieves the map of namespaces. + * + * @return A map of namespace prefixes to their corresponding {@code Namespace} objects. + */ public Map getNamespaces(){ return namespaces; } diff --git a/src/main/java/edu/ucsd/sbrg/util/CombineArchive.java b/src/main/java/edu/ucsd/sbrg/util/CombineArchive.java index 950feac9..59c42634 100644 --- a/src/main/java/edu/ucsd/sbrg/util/CombineArchive.java +++ b/src/main/java/edu/ucsd/sbrg/util/CombineArchive.java @@ -29,6 +29,23 @@ import static java.text.MessageFormat.format; +/** + * The {@code CombineArchive} class provides functionality to create a COMBINE archive from an SBML document. + * It supports writing an RDF glossary derived from the SBML document and packaging both the SBML file and its + * RDF glossary into a single COMBINE archive file. This class handles the creation, formatting, and management + * of files necessary for the archive, ensuring they adhere to the COMBINE specification. + * + *

    Key functionalities include:

    + *
      + *
    • Generating RDF glossary from the SBML document annotations.
    • + *
    • Writing the RDF glossary to a file with proper formatting using JTidy.
    • + *
    • Creating a COMBINE archive that includes the SBML file and the RDF glossary.
    • + *
    • Handling file operations such as checking for existing files, deleting, and writing new files.
    • + *
    + * + *

    This class is essential for users looking to export SBML models and their annotations in a standardized + * archive format that can be easily shared and processed by various bioinformatics tools.

    + */ public class CombineArchive { /** @@ -61,24 +78,35 @@ public void write() throws IOException, XMLStreamException { /** - * @throws XMLStreamException: - * propagated from {@link #getGlossary(SBMLDocument)} and #TidySBMLWriter.write - * @throws IOException: - * propagated from {@link #writeTidyRDF(File, String)} + * Writes the RDF glossary to a file. The glossary is generated from the SBML document associated with this instance. + * The RDF file is named based on the output file's name with "_glossary.rdf" appended. + * + * @throws XMLStreamException if there is an error in generating the glossary from the SBML document. + * @throws IOException if there is an error writing the glossary to the file system. */ private void writeGlossary() throws XMLStreamException, IOException { + // Generate the glossary from the SBML document String glossary = getGlossary(doc); + // Determine the location for the RDF file String glossaryLocation = output.getAbsolutePath().substring(0, output.getAbsolutePath().lastIndexOf('.')) + "_glossary.rdf"; + // Log the location where the RDF file will be written logger.info(format(MESSAGES.getString("WRITE_RDF_FILE_INFO"), glossaryLocation)); + // Write the glossary to the specified RDF file writeTidyRDF(new File(glossaryLocation), glossary); } /** - * @param doc: - * SBMLDocument to produce glossary for - * @return Glossary as XMLString or empty string, if either model is null or has no children + * Generates an RDF glossary for the given SBMLDocument. This method parses the document's model + * and its components (species, reactions, compartments, and gene products) to construct an RDF + * representation of annotations. If the model or any essential components are not annotated, + * it returns an empty string. + * + * @param doc The SBMLDocument for which the glossary is to be generated. + * @return A string containing the RDF glossary in XML format, or an empty string if the model + * or required annotations are missing. + * @throws XMLStreamException If there is an error during the XML processing. */ private String getGlossary(SBMLDocument doc) throws XMLStreamException { SBMLRDFAnnotationParser rdfParser = new SBMLRDFAnnotationParser(); @@ -120,31 +148,38 @@ private String getGlossary(SBMLDocument doc) throws XMLStreamException { /** + * Creates a COMBINE archive containing the SBML model and its associated RDF glossary. + * The method first checks if an existing archive file is present and deletes it if found. + * It then creates a new archive, adds the SBML model and RDF glossary as entries, and finally packs and closes the archive. + * After packing, it cleans up the original files used in the archive. */ private void writeCombineArchive() { try { + // Determine the base file path without extension String baseLocation = output.getAbsolutePath().substring(0, output.getAbsolutePath().lastIndexOf('.')); + // Specify the locations for the glossary RDF and the COMBINE archive String glossaryLocation = baseLocation + "_glossary.rdf"; String combineArcLocation = baseLocation + ".zip"; - // check if archive file exists and delete + + // Check if the COMBINE archive file already exists and attempt to delete it File caFile = new File(combineArcLocation); - if (caFile.exists()) { - if (!caFile.delete()) { - logger.severe(format("Failed to delete archive file \"{0}\"", caFile.getPath())); - } + if (caFile.exists() && !caFile.delete()) { + logger.severe(format("Failed to delete existing archive file \"{0}\"", caFile.getPath())); } - // build and pack archive + + // Create a new COMBINE archive and add entries for the model XML and glossary RDF de.unirostock.sems.cbarchive.CombineArchive ca = new de.unirostock.sems.cbarchive.CombineArchive(caFile); File outputXML = new File(output.getAbsolutePath()); File outputRDF = new File(glossaryLocation); ca.addEntry(outputXML, "model.xml", new URI("http://identifiers.org/combine.specifications/sbml"), true); - ca.addEntry(outputRDF, "glossary.rdf", - // generated from https://sems.uni-rostock.de/trac/combine-ext/wiki/CombineFormatizer - new URI("http://purl.org/NET/mediatypes/application/rdf+xml"), true); + ca.addEntry(outputRDF, "glossary.rdf", new URI("http://purl.org/NET/mediatypes/application/rdf+xml"), true); logger.info(format(MESSAGES.getString("WRITE_RDF_FILE_INFO"), combineArcLocation)); + + // Pack and close the archive ca.pack(); ca.close(); - // clean up original of packed files + + // Delete the original files that were packed into the archive boolean rdfDeleted = outputRDF.delete(); boolean outputXMLDeleted = outputXML.delete(); logger.info(format(MESSAGES.getString("DELETE_FILE"), outputXML.getParent(), outputXMLDeleted)); @@ -157,8 +192,12 @@ private void writeCombineArchive() { /** - * @param outputFile, - * rdfString + * Writes a formatted RDF string to a file using the JTidy library to ensure the RDF content is well-formed XML. + * This method configures the Tidy parser for XML output and sets various formatting options to enhance readability. + * + * @param outputFile The file to which the formatted RDF content will be written. + * @param rdfString The RDF content in string format that needs to be formatted and written to the file. + * @throws FileNotFoundException if the outputFile cannot be opened for writing. */ private void writeTidyRDF(File outputFile, String rdfString) throws FileNotFoundException { Tidy tidy = new Tidy(); // obtain a new Tidy instance diff --git a/src/main/java/edu/ucsd/sbrg/util/GPRParser.java b/src/main/java/edu/ucsd/sbrg/util/GPRParser.java index a8fc7398..938a1949 100644 --- a/src/main/java/edu/ucsd/sbrg/util/GPRParser.java +++ b/src/main/java/edu/ucsd/sbrg/util/GPRParser.java @@ -33,6 +33,20 @@ import de.zbit.util.Utils; import edu.ucsd.sbrg.bigg.BiGGId; +/** + * The {@code GPRParser} class provides methods to parse gene product associations (GPRs) from gene reaction rules + * and integrate them into SBML models using JSBML. It supports converting textual gene reaction rules into structured + * {@link Association} objects, handling logical operators, and merging associations into existing models. + * It also includes utilities for converting associations to the FBC v2 format and managing gene product references. + * + *

    This class is designed to be used in scenarios where gene reaction rules need to be parsed from various formats + * and integrated into computational models in a structured and standardized form. It provides comprehensive support + * for handling complex logical structures in gene product associations, such as nested AND/OR conditions.

    + * + *

    Utility methods in this class are static, allowing direct invocation without needing an instance of {@code GPRParser}. + * This class heavily relies on the JSBML library to manipulate elements of SBML files, particularly those related to + * the FBC (Flux Balance Constraints) package.

    + */ public class GPRParser { /** @@ -59,8 +73,14 @@ public static void clearAssociationMap() { /** - * @param r - * @param geneReactionRule + * Parses the gene product association (GPR) from a gene reaction rule string and associates it with a given reaction. + * This method first converts the gene reaction rule string into an Association object using a formula parser. + * If the conversion is successful and the Association object is not null, it further processes the association + * by parsing it into the reaction's gene product association. + * + * @param r The reaction to which the gene product association will be linked. + * @param geneReactionRule The gene reaction rule string representing the association of gene products. + * @param omitGenericTerms Flag indicating whether to omit generic terms (e.g., SBO terms) in the association. */ public static void parseGPR(Reaction r, String geneReactionRule, boolean omitGenericTerms) { if ((geneReactionRule != null) && (geneReactionRule.length() > 0)) { @@ -80,10 +100,16 @@ public static void parseGPR(Reaction r, String geneReactionRule, boolean omitGen /** - * @param ast - * @param reactionId - * @param model - * @return + * Converts an ASTNode representing a gene product association into an Association object. + * This method handles the logical structure of the gene product association, creating appropriate + * logical operators (AND, OR) based on the ASTNode type. It also manages the inclusion of SBO terms + * if they are not omitted. + * + * @param ast The ASTNode to be converted, representing the logical structure of the gene product association. + * @param reactionId The ID of the reaction associated with this gene product association. + * @param model The SBML model to which the reaction belongs, used to determine the level and version for new elements. + * @param omitGenericTerms A boolean flag indicating whether to omit SBO terms in the resulting Association. + * @return An Association object representing the gene product association, which could be a LogicalOperator or a direct GeneProductRef. */ public static Association convertToAssociation(ASTNode ast, String reactionId, Model model, boolean omitGenericTerms) { @@ -104,7 +130,7 @@ public static Association convertToAssociation(ASTNode ast, String reactionId, M for (ASTNode child : ast.getListOfNodes()) { Association tmp = convertToAssociation(child, reactionId, model, omitGenericTerms); if (tmp.getClass().equals(operator.getClass())) { - // flatten binary trees to compact representation + // Flatten binary trees to compact representation LogicalOperator lo = (LogicalOperator) tmp; for (int i = lo.getAssociationCount() - 1; i >= 0; i--) { operator.addAssociation(lo.removeAssociation(i)); @@ -120,36 +146,48 @@ public static Association convertToAssociation(ASTNode ast, String reactionId, M /** - * @param identifier - * @param reactionId - * @param model - * @return + * Creates a GeneProductRef instance for a given identifier within a specific reaction context in the model. + * This method first checks if the identifier exists in the model, either with or without a "G_" prefix. + * If the identifier does not exist, it attempts to create a new GeneProduct in the model. + * If the identifier exists, it updates the existing GeneProduct's ID. + * + * @param identifier The identifier for the gene product, which may or may not start with "G_". + * @param reactionId The ID of the reaction associated with this gene product. + * @param model The SBML model containing the reaction and potentially the gene product. + * @return A GeneProductRef object linked to the gene product identified or created. */ public static GeneProductRef createGPR(String identifier, String reactionId, Model model) { - // TODO: check if this could return an empty gpr in real cases + // Determine the SBML document level and version for creating new elements. int level = model.getLevel(), version = model.getVersion(); GeneProductRef gpr = new GeneProductRef(level, version); - // check if this id exists in the model + + // Normalize the identifier to include "G_" prefix if missing. String oldId = identifier.startsWith("G_") ? identifier : "G_" + identifier; boolean containsOldId = !model.containsUniqueNamedSBase(oldId); + + // Attempt to create or find the GeneProduct using a standardized identifier. BiGGId.createGeneId(identifier).map(BiGGId::toBiGGId).ifPresent(id -> { if (!model.containsUniqueNamedSBase(id)) { GeneProduct gp; + // Check if the old ID exists, if so, retrieve the GeneProduct, otherwise use the new ID. if (containsOldId) { gp = (GeneProduct) model.findUniqueNamedSBase(oldId); } else { gp = (GeneProduct) model.findUniqueNamedSBase(id); } + // If the GeneProduct does not exist, create a new one and log a warning. if (gp == null) { logger.warning(format(MESSAGES.getString("CREATE_MISSING_GPR"), id, reactionId)); FBCModelPlugin fbcPlug = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); gp = fbcPlug.createGeneProduct(id); gp.setLabel(id); } else { + // If the GeneProduct exists, update its ID and log the update. logger.info(format(MESSAGES.getString("UPDATE_GP_ID"), gp.getId(), id)); gp.setId(id); } } + // Set the GeneProduct reference in the GeneProductRef. gpr.setGeneProduct(id); }); return gpr; @@ -157,9 +195,13 @@ public static GeneProductRef createGPR(String identifier, String reactionId, Mod /** - * @param r - * @param association - * @param omitGenericTerms + * Parses the Gene Product Representation (GPR) for a given reaction and updates the reaction's gene product association. + * If the reaction does not have an existing gene product association, a new one is created and set. + * If an association already exists and it is not equivalent to the provided association, the associations are merged. + * + * @param r The reaction for which the GPR is being parsed. + * @param association The association to be parsed and potentially merged into the reaction's gene product association. + * @param omitGenericTerms A boolean flag indicating whether generic terms should be omitted during the merging process. */ private static void parseGPR(Reaction r, Association association, boolean omitGenericTerms) { FBCReactionPlugin plugin = (FBCReactionPlugin) r.getPlugin(FBCConstants.shortLabel); @@ -207,12 +249,19 @@ private static boolean areEqual(Association gpa1, Association gpa2) { /** - * @return + * Converts an Association object into a human-readable string representation. + * This method handles different types of associations including GeneProductRef, And, and Or. + * For And and Or associations, it recursively calls itself to handle nested associations. + * + * @param association The Association object to be converted into a string. + * @return A string representation of the Association object. */ public static String stringify(Association association) { if (association instanceof GeneProductRef) { + // Directly return the gene product identifier for GeneProductRef instances. return ((GeneProductRef) association).getGeneProduct(); } else if (association instanceof And) { + // Handle the 'And' type association by iterating over its children. List children = ((And) association).getListOfAssociations(); int numChildren = ((And) association).getAssociationCount(); StringBuilder sb = new StringBuilder(); @@ -222,6 +271,7 @@ public static String stringify(Association association) { } return sb.toString(); } else { + // Handle the 'Or' type association by iterating over its children. List children = ((Or) association).getListOfAssociations(); int numChildren = ((Or) association).getAssociationCount(); StringBuilder sb = new StringBuilder(); @@ -235,10 +285,14 @@ public static String stringify(Association association) { /** - * @param r - * @param association - * @param plugin - * @param omitGenericTerms + * Merges a new association into an existing gene product association for a reaction. + * This method handles the merging of associations by considering different types of associations (AND, OR, GeneProductRef). + * It ensures that duplicate gene products are not added and maintains the logical structure of the association. + * + * @param r The reaction for which the gene product association is being merged. + * @param association The new association to merge into the existing gene product association. + * @param plugin The FBCReactionPlugin instance associated with the reaction. + * @param omitGenericTerms Flag indicating whether to omit generic terms in the association. */ private static void mergeAssociation(Reaction r, Association association, FBCReactionPlugin plugin, boolean omitGenericTerms) { @@ -295,7 +349,12 @@ private static void mergeAssociation(Reaction r, Association association, FBCRea /** - * @param reaction + * Converts gene product associations from a given reaction to the FBC v2 format. + * This method processes the non-RDF annotations of the reaction's model to update or create + * gene product associations according to the FBC v2 specification. + * + * @param reaction The reaction whose gene product associations are to be converted. + * @param omitGenericTerms A boolean flag indicating whether to omit generic terms (SBO terms) in the association. */ public static void convertAssociationsToFBCV2(Reaction reaction, boolean omitGenericTerms) { Model model = reaction.getModel(); @@ -330,8 +389,13 @@ public static void convertAssociationsToFBCV2(Reaction reaction, boolean omitGen /** - * @param association - * @return + * Processes an XMLNode representing a gene product association and converts it into a list of Association objects. + * This method handles the logical operators "and" and "or", as well as gene product references. + * + * @param association The XMLNode representing the gene product association. + * @param model The SBML model to which the association belongs. + * @param omitGenericTerms A boolean flag indicating whether to omit generic terms (SBO terms) in the association. + * @return A list of Association objects representing the processed gene product association. */ private static List processAssociation(XMLNode association, Model model, boolean omitGenericTerms) { int level = model.getLevel(), version = model.getVersion(); diff --git a/src/main/java/edu/ucsd/sbrg/util/SBMLUtils.java b/src/main/java/edu/ucsd/sbrg/util/SBMLUtils.java index ae3cdb70..4e025de2 100644 --- a/src/main/java/edu/ucsd/sbrg/util/SBMLUtils.java +++ b/src/main/java/edu/ucsd/sbrg/util/SBMLUtils.java @@ -40,14 +40,16 @@ public class SBMLUtils { */ public static final String SUBSYSTEM_LINK = "SUBSYSTEM_LINK"; /** - * HashMap holding all gene product references in a model for updating + * A static map that holds references to all gene products within a model, facilitating quick updates. */ private static Map geneProductReferences = new HashMap<>(); /** - * Apply updated GeneID to geneProductReferenece + * Updates the reference of a gene product in the geneProductReferences map using the gene product's ID. + * If the gene product ID starts with "G_", it strips this prefix before updating. + * If the map is initially empty, it initializes the map with the current model's reactions. * - * @param gp + * @param gp The GeneProduct whose reference needs to be updated. */ public static void updateGeneProductReference(GeneProduct gp) { if (geneProductReferences.isEmpty()) { @@ -63,9 +65,11 @@ public static void updateGeneProductReference(GeneProduct gp) { } } - /** - * @param reactions + * Initializes the geneProductReferences map by traversing through all reactions and their associated gene products. + * It handles both direct gene product references and nested logical operators that might contain gene product references. + * + * @param reactions The list of reactions to scan for gene product associations. */ private static void initGPRMap(ListOf reactions) { for (Reaction r : reactions) { @@ -84,9 +88,8 @@ private static void initGPRMap(ListOf reactions) { } } - /** - * + * Clears the geneProductReferences map, removing all entries. */ public static void clearGPRMap() { geneProductReferences = new HashMap<>(); @@ -94,7 +97,12 @@ public static void clearGPRMap() { /** - * @param association + * Recursively processes nested associations to map gene products to their references. + * This method traverses through each child of the given association. If the child is a + * LogicalOperator, it recursively processes it. If the child is a GeneProductRef, it + * adds it to the geneProductReferences map. + * + * @param association The association to process, which can contain nested associations. */ private static void processNested(Association association) { for (int idx = 0; idx < association.getChildCount(); idx++) { @@ -102,18 +110,21 @@ private static void processNested(Association association) { if (child instanceof LogicalOperator) { processNested((Association) child); } else { - // has to GeneProductReference + // This child is assumed to be a GeneProductRef GeneProductRef gpr = (GeneProductRef) child; geneProductReferences.put(gpr.getGeneProduct(), gpr); } } } - /** - * @param oldId - * @param newId - * @param fbcModelPlug + * Updates the reaction reference ID from an old ID to a new ID within the FluxObjectives of a given FBCModelPlugin. + * This method iterates through all objectives and their associated flux objectives in the model plugin, + * replacing the old reaction ID with the new one wherever found. + * + * @param oldId The old reaction ID to be replaced. + * @param newId The new reaction ID to replace the old one. + * @param fbcModelPlug The FBCModelPlugin containing the objectives and flux objectives to be updated. */ public static void updateReactionRef(String oldId, String newId, FBCModelPlugin fbcModelPlug) { if ((fbcModelPlug != null) && fbcModelPlug.isSetListOfObjectives()) { @@ -132,18 +143,22 @@ public static void updateReactionRef(String oldId, String newId, FBCModelPlugin /** - * Add a direct link from the reaction to the member pointing to that - * reaction. + * Establishes a link between a reaction and a subsystem member by setting the member's reference to the reaction. + * Additionally, it ensures that the reaction maintains a set of all members linked to it. If the set does not exist, + * it is created and the member is added to it. * - * @param r - * @param member + * @param r The reaction object to which the member should be linked. + * @param member The subsystem member that should be linked to the reaction. */ @SuppressWarnings("unchecked") public static void createSubsystemLink(Reaction r, Member member) { + // Set the member's reference ID to the reaction. member.setIdRef(r); + // Check if the reaction has an existing set of members; if not, create one. if (r.getUserObject(SUBSYSTEM_LINK) == null) { r.putUserObject(SUBSYSTEM_LINK, new HashSet()); } + // Add the member to the reaction's set of linked members. ((Set) r.getUserObject(SUBSYSTEM_LINK)).add(member); } } diff --git a/src/main/java/edu/ucsd/sbrg/util/UpdateListener.java b/src/main/java/edu/ucsd/sbrg/util/UpdateListener.java index 0441c1fc..4600be1e 100644 --- a/src/main/java/edu/ucsd/sbrg/util/UpdateListener.java +++ b/src/main/java/edu/ucsd/sbrg/util/UpdateListener.java @@ -24,9 +24,17 @@ import java.util.Set; import java.util.logging.Logger; + /** - * This class keeps track of changes to the model and tries to keep cross - * references etc. consistent. + * The {@code UpdateListener} class implements the {@link TreeNodeChangeListener} to monitor and respond to changes + * within an SBML model's structure. This class specifically handles updates to + * identifiers (IDs) of model elements like reactions and gene products, ensuring that all references remain consistent + * across the model. It also manages the addition of new nodes to the model, particularly focusing on gene product + * references, and maintains a mapping from gene identifiers to their associated gene product references. + * + * The {@link TreeNodeChangeListener} base class provides the interface for receiving notifications when changes occur + * to any node within a tree structure, which in the context of SBML, corresponds to elements within the model's + * hierarchical structure. * * @author Andreas Dräger */ @@ -41,46 +49,55 @@ public class UpdateListener implements TreeNodeChangeListener { */ private static final transient ResourceBundle MESSAGES = ResourceManager.getBundle("edu.ucsd.sbrg.polisher.Messages"); /** - * Stores links from geneIds to {@link Association} objects where these are - * used. + * A map that maintains associations between gene identifiers and sets of {@link GeneProductRef} objects. + * This map is used to track which gene products are associated with specific gene identifiers throughout the model. */ private final Map> geneIdToAssociation; /** - * + * Constructs an {@code UpdateListener} instance and initializes the {@code geneIdToAssociation} map. */ public UpdateListener() { geneIdToAssociation = new HashMap<>(); } - /* - * (non-Javadoc) - * @see java.beans.PropertyChangeListener#propertyChange(java.beans. - * PropertyChangeEvent) + /** + * Responds to property change events, specifically focusing on changes to the ID property of tree nodes. + * This method handles the update of IDs within the model, ensuring that all references to the old ID + * are updated to the new ID across various components such as reactions and gene products. + * + * @param evt The property change event that contains information about the old and new values of the property. */ @SuppressWarnings("unchecked") @Override public void propertyChange(PropertyChangeEvent evt) { + // Check if the property change is related to the ID of a node. if (evt.getPropertyName().equals(TreeNodeChangeEvent.id)) { String oldId = (String) evt.getOldValue(); + // Proceed only if there is an actual change in the ID. if (oldId != null) { // There is only a need to do some further change if the id is updated // to a new id. String newId = (String) evt.getNewValue(); NamedSBase nsb = (NamedSBase) evt.getSource(); + // Handle the ID change for reactions. if (nsb instanceof Reaction) { Reaction r = (Reaction) nsb; Model model = r.getModel(); FBCModelPlugin fbcModelPlug = (FBCModelPlugin) model.getPlugin(FBCConstants.shortLabel); + // Update reaction references in the FBC model plugin. SBMLUtils.updateReactionRef(oldId, newId, fbcModelPlug); + // Update subsystem references if any. Set subsystems = (Set) r.getUserObject(SBMLUtils.SUBSYSTEM_LINK); if (subsystems != null) { for (Member m : subsystems) { m.setIdRef(newId); } } - } else if (nsb instanceof GeneProduct) { + } + // Handle the ID change for gene products. + else if (nsb instanceof GeneProduct) { Set geneRefs = geneIdToAssociation.remove(oldId); if (geneRefs != null) { for (GeneProductRef ref : geneRefs) { @@ -88,7 +105,9 @@ public void propertyChange(PropertyChangeEvent evt) { } geneIdToAssociation.put(newId, geneRefs); } - } else { + } + // Log a severe message if the ID change cannot be handled. + else { logger.severe( MessageFormat.format(MESSAGES.getString("ID_CHANGE_WARNING"), nsb.getElementName(), oldId, newId)); } @@ -97,10 +116,13 @@ public void propertyChange(PropertyChangeEvent evt) { } - /* - * (non-Javadoc) - * @see org.sbml.jsbml.util.TreeNodeChangeListener#nodeAdded(javax.swing.tree. - * TreeNode) + /** + * Handles the event when a new node is added to the TreeNode structure. + * Specifically, when a GeneProductRef node is added, this method updates the + * geneIdToAssociation map to include this new association. It ensures that each + * gene product ID is mapped to a set of its associated GeneProductRefs. + * + * @param node The TreeNode that has been added. Expected to be an instance of GeneProductRef. */ @Override public void nodeAdded(TreeNode node) { @@ -108,17 +130,20 @@ public void nodeAdded(TreeNode node) { // being added. if (node instanceof GeneProductRef) { GeneProductRef gpr = (GeneProductRef) node; + // Retrieve or create a set of GeneProductRefs associated with the gene product ID. Set geneRefs = geneIdToAssociation.get(gpr.getGeneProduct()); if (geneRefs == null) { - geneRefs = new HashSet(); + geneRefs = new HashSet<>(); geneIdToAssociation.put(gpr.getGeneProduct(), geneRefs); } geneRefs.add(gpr); + // The following commented code would link the gene product instance directly to its associations. // GeneProduct gene = gpr.getGeneProductInstance(); // if (gene != null) { - // gene.putUserObject("ASSOCIATION_LINK", geneRefs); + // gene.putUserObject("ASSOCIATION_LINK", geneRefs); // } } + // Log the addition of the node. logger.fine(node.toString()); } diff --git a/src/test/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishingTest.java b/src/test/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishingTest.java index e0a27553..2583fd1e 100644 --- a/src/test/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishingTest.java +++ b/src/test/java/edu/ucsd/sbrg/bigg/polishing/ModelPolishingTest.java @@ -119,6 +119,11 @@ public void biomassIsUsedToInferFluxObjectives() { .collect(Collectors.toSet())); } + /** + * Test to ensure that objectives without any flux objectives are removed from the model. + * This test initializes a model with a single objective that has no flux objectives set, + * runs the polishing process, and then checks that the objective count is zero. + */ @Test public void emptyObjectivesAreRemoved() { var m = new Model(3,2);