From f42b784e73c589378998fb24829722a610df0d2b Mon Sep 17 00:00:00 2001 From: Anushya Muruganujan Date: Mon, 4 Mar 2024 15:30:42 -0800 Subject: [PATCH 1/4] For #2246 --- ontobio/io/assocparser.py | 1 + ontobio/io/gafparser.py | 7 +++++ ontobio/model/association.py | 54 +++++++++++++++++++----------------- tests/test_assoc_writer.py | 2 +- tests/test_gafparser.py | 5 ++-- 5 files changed, 40 insertions(+), 29 deletions(-) diff --git a/ontobio/io/assocparser.py b/ontobio/io/assocparser.py index 28728900..ce91af76 100644 --- a/ontobio/io/assocparser.py +++ b/ontobio/io/assocparser.py @@ -347,6 +347,7 @@ class Report(object): VIOLATES_GO_RULE = "Violates GO Rule" RULE_PASS = "Passes GO Rule" INVALID_REFERENCES = "Only one reference per ID space allowed" + INVALID_SUBJECT_TYPE = "Invalid subject type" def __init__(self, group="unknown", dataset="unknown", config=None): self.messages = [] diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py index 306bfde8..451d65ed 100644 --- a/ontobio/io/gafparser.py +++ b/ontobio/io/gafparser.py @@ -385,6 +385,9 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u DB_OBJECT_SYMBOL = 2 TAXON_INDEX = 12 REFERENCE_INDEX = 5 + DEFAULT_SUBJECT_TYPE = 'gene_product' + STR_DEFAULT_SUBJECT_TYPE_CURIE = str(association.map_gp_type_label_to_curie(DEFAULT_SUBJECT_TYPE)) + if gaf_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) @@ -417,7 +420,11 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u interacting_taxon = parsed_taxons_result.parsed[1] if len(parsed_taxons_result.parsed) == 2 else None subject_curie = association.Curie(gaf_line[0], gaf_line[1]) + type_label = gaf_line[11] subject = association.Subject(subject_curie, gaf_line[2], [gaf_line[9]], gaf_line[10].split("|"), [association.map_gp_type_label_to_curie(gaf_line[11])], taxon) + # Output warnig, if system is defaulting to gene_product + if DEFAULT_SUBJECT_TYPE != type_label and len(subject.type) == 1 and STR_DEFAULT_SUBJECT_TYPE_CURIE == str(subject.type[0]): + report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "defaulting to 'gene_product'", taxon=gaf_line[TAXON_INDEX], rule=1) gpi_entity = bio_entities.get(subject_curie) if gpi_entity is not None and subject != gpi_entity: subject = gpi_entity diff --git a/ontobio/model/association.py b/ontobio/model/association.py index 968dffab..53d32b85 100644 --- a/ontobio/model/association.py +++ b/ontobio/model/association.py @@ -210,21 +210,20 @@ def fullname_field(self, max=None) -> str: # =============================================================================== __default_entity_type_to_curie_mapping = bidict.bidict({ "protein_coding_gene": Curie.from_str("SO:0001217"), + "protein": Curie.from_str("PR:000000001"), + "gene_product": Curie.from_str("CHEBI:33695"), "snRNA": Curie.from_str("SO:0000274"), "ncRNA": Curie.from_str("SO:0000655"), "rRNA": Curie.from_str("SO:0000252"), "mRNA": Curie.from_str("SO:0000234"), - "lnc_RNA": Curie.from_str("SO:0001877"), "lincRNA": Curie.from_str("SO:0001463"), "tRNA": Curie.from_str("SO:0000253"), "snoRNA": Curie.from_str("SO:0000275"), "miRNA": Curie.from_str("SO:0000276"), - "RNA": Curie.from_str("SO:0000356"), "scRNA": Curie.from_str("SO:0000013"), "piRNA": Curie.from_str("SO:0001035"), "tmRNA": Curie.from_str("SO:0000584"), - "SRP_RNA": Curie.from_str("SO:0000590"), - "primary_transcript": Curie.from_str("SO:0000185"), + "SRP_RNA": Curie.from_str("SO:0000590"), "ribozyme": Curie.from_str("SO:0000374"), "telomerase_RNA": Curie.from_str("SO:0000390"), "RNase_P_RNA": Curie.from_str("SO:0000386"), @@ -232,32 +231,35 @@ def fullname_field(self, max=None) -> str: "RNase_MRP_RNA": Curie.from_str("SO:0000385"), "guide_RNA": Curie.from_str("SO:0000602"), "hammerhead_ribozyme": Curie.from_str("SO:0000380"), - "protein": Curie.from_str("PR:000000001"), "pseudogene": Curie.from_str("SO:0000336"), - "gene": Curie.from_str("SO:0000704"), - "biological region": Curie.from_str("SO:0001411"), - "protein_complex": Curie.from_str("GO:0032991"), - "transcript": Curie.from_str("SO:0000673"), - "gene_product": Curie.from_str("CHEBI:33695"), + "protein_complex": Curie.from_str("GO:0032991"), "antisense_lncRNA": Curie.from_str("SO:0001904"), - "transposable_element_gene": Curie.from_str("SO:0000111"), "gene_segment": Curie.from_str("SO:3000000"), "genetic_marker": Curie.from_str("SO:0001645"), - "lincRNA_gene": Curie.from_str("SO:0001641"), - "lncRNA_gene": Curie.from_str("SO:0002127"), - "miRNA_gene": Curie.from_str("SO:0001265"), - "ncRNA_gene": Curie.from_str("SO:0001263"), - "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"), - "RNase_P_RNA_gene": Curie.from_str("SO:0001639"), - "rRNA_gene": Curie.from_str("SO:0001637"), - "scRNA_gene": Curie.from_str("SO:0001266"), - "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"), - "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"), - "snoRNA_gene": Curie.from_str("SO:0001267"), - "snRNA_gene": Curie.from_str("SO:0001268"), - "SRP_RNA_gene": Curie.from_str("SO:0001269"), - "telomerase_RNA_gene": Curie.from_str("SO:0001643"), - "tRNA_gene": Curie.from_str("SO:0001272") + "biological region": Curie.from_str("SO:0001411"), + "transposable_element_gene": Curie.from_str("SO:0000111") + # "RNA": Curie.from_str("SO:0000356"), + # "lincRNA_gene": Curie.from_str("SO:0001641"), + # "lncRNA_gene": Curie.from_str("SO:0002127"), + # "miRNA_gene": Curie.from_str("SO:0001265"), + # "ncRNA_gene": Curie.from_str("SO:0001263"), + # "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"), + # "RNase_P_RNA_gene": Curie.from_str("SO:0001639"), + # "rRNA_gene": Curie.from_str("SO:0001637"), + # "scRNA_gene": Curie.from_str("SO:0001266"), + # "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"), + # "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"), + # "snoRNA_gene": Curie.from_str("SO:0001267"), + # "snRNA_gene": Curie.from_str("SO:0001268"), + # "SRP_RNA_gene": Curie.from_str("SO:0001269"), + # "telomerase_RNA_gene": Curie.from_str("SO:0001643"), + # "tRNA_gene": Curie.from_str("SO:0001272"), + # "lnc_RNA": Curie.from_str("SO:0001877"), + # "primary_transcript": Curie.from_str("SO:0000185"), + # "gene": Curie.from_str("SO:0000704"), + # "transcript": Curie.from_str("SO:0000673"), + + }) def map_gp_type_label_to_curie(type_label: str) -> Curie: diff --git a/tests/test_assoc_writer.py b/tests/test_assoc_writer.py index eb0bc030..80fd6dd5 100644 --- a/tests/test_assoc_writer.py +++ b/tests/test_assoc_writer.py @@ -195,7 +195,7 @@ def test_gaf_2_2_extensions(): def test_full_gaf_2_2_write(): - line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t" + line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene_product\ttaxon:6239\t20060302\tWB\t\t" parser = gafparser.GafParser() parser.version = "2.2" out = io.StringIO() diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index 9c907408..9efad4d2 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -216,7 +216,7 @@ def test_qualifiers_gaf(): def test_gaf_2_1_unknown_qualifier(): - line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene", "taxon:83333", "20081208", "EcoliWiki"] + line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"] parsed = gafparser.to_association(line) assert parsed.skipped is True @@ -582,7 +582,8 @@ def test_gaf_gpi_bridge(): association = gafparser.to_association(gaf, qualifier_parser=assocparser.Qualifier2_2()).associations[0] bridge = gafgpibridge.GafGpiBridge() entity = bridge.convert_association(association) - assert entity.get("type") == ["gene"] + #gene gets updated to gene_product!!!! + assert entity.get("type") == ["gene_product"] if __name__ == "__main__": From 610b113777598bef50e938fcf1affe624ed38667 Mon Sep 17 00:00:00 2001 From: Anushya Muruganujan Date: Mon, 10 Jun 2024 18:47:08 -0700 Subject: [PATCH 2/4] For #2246 --- ontobio/io/gafparser.py | 3 +++ ontobio/model/association.py | 45 ++++++++++++++++++------------------ tests/test_gafparser.py | 29 +++++++++++++++++++++++ 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py index 451d65ed..44b9d9a8 100644 --- a/ontobio/io/gafparser.py +++ b/ontobio/io/gafparser.py @@ -425,6 +425,9 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u # Output warnig, if system is defaulting to gene_product if DEFAULT_SUBJECT_TYPE != type_label and len(subject.type) == 1 and STR_DEFAULT_SUBJECT_TYPE_CURIE == str(subject.type[0]): report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "defaulting to 'gene_product'", taxon=gaf_line[TAXON_INDEX], rule=1) + if association.map_gp_type_label_to_repair_curie(type_label) is not None: + report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "has been repaired", taxon=gaf_line[TAXON_INDEX], rule=1) + gpi_entity = bio_entities.get(subject_curie) if gpi_entity is not None and subject != gpi_entity: subject = gpi_entity diff --git a/ontobio/model/association.py b/ontobio/model/association.py index 53d32b85..33a9d113 100644 --- a/ontobio/model/association.py +++ b/ontobio/model/association.py @@ -212,17 +212,23 @@ def fullname_field(self, max=None) -> str: "protein_coding_gene": Curie.from_str("SO:0001217"), "protein": Curie.from_str("PR:000000001"), "gene_product": Curie.from_str("CHEBI:33695"), + "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"), "snRNA": Curie.from_str("SO:0000274"), "ncRNA": Curie.from_str("SO:0000655"), "rRNA": Curie.from_str("SO:0000252"), "mRNA": Curie.from_str("SO:0000234"), + "pre_miRNA": Curie.from_str("SO:0001244"), "lincRNA": Curie.from_str("SO:0001463"), + "lncRNA": Curie.from_str("SO:0001877"), "tRNA": Curie.from_str("SO:0000253"), "snoRNA": Curie.from_str("SO:0000275"), "miRNA": Curie.from_str("SO:0000276"), "scRNA": Curie.from_str("SO:0000013"), "piRNA": Curie.from_str("SO:0001035"), + "pre_miRNA": Curie.from_str("SO:0001244"), "tmRNA": Curie.from_str("SO:0000584"), + "scaRNA": Curie.from_str("SO:0002095"), + "siRNA": Curie.from_str("SO:0000646"), "SRP_RNA": Curie.from_str("SO:0000590"), "ribozyme": Curie.from_str("SO:0000374"), "telomerase_RNA": Curie.from_str("SO:0000390"), @@ -230,36 +236,23 @@ def fullname_field(self, max=None) -> str: "antisense_RNA": Curie.from_str("SO:0000644"), "RNase_MRP_RNA": Curie.from_str("SO:0000385"), "guide_RNA": Curie.from_str("SO:0000602"), + "vault_RNA": Curie.from_str("SO:0000404"), + "Y_RNA": Curie.from_str("SO:0000405"), "hammerhead_ribozyme": Curie.from_str("SO:0000380"), "pseudogene": Curie.from_str("SO:0000336"), + "pseudogenic_transcript": Curie.from_str("SO:0000516"), "protein_complex": Curie.from_str("GO:0032991"), "antisense_lncRNA": Curie.from_str("SO:0001904"), + "antisense_lncRNA_gene": Curie.from_str("SO:0002182"), "gene_segment": Curie.from_str("SO:3000000"), "genetic_marker": Curie.from_str("SO:0001645"), "biological region": Curie.from_str("SO:0001411"), "transposable_element_gene": Curie.from_str("SO:0000111") - # "RNA": Curie.from_str("SO:0000356"), - # "lincRNA_gene": Curie.from_str("SO:0001641"), - # "lncRNA_gene": Curie.from_str("SO:0002127"), - # "miRNA_gene": Curie.from_str("SO:0001265"), - # "ncRNA_gene": Curie.from_str("SO:0001263"), - # "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"), - # "RNase_P_RNA_gene": Curie.from_str("SO:0001639"), - # "rRNA_gene": Curie.from_str("SO:0001637"), - # "scRNA_gene": Curie.from_str("SO:0001266"), - # "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"), - # "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"), - # "snoRNA_gene": Curie.from_str("SO:0001267"), - # "snRNA_gene": Curie.from_str("SO:0001268"), - # "SRP_RNA_gene": Curie.from_str("SO:0001269"), - # "telomerase_RNA_gene": Curie.from_str("SO:0001643"), - # "tRNA_gene": Curie.from_str("SO:0001272"), - # "lnc_RNA": Curie.from_str("SO:0001877"), - # "primary_transcript": Curie.from_str("SO:0000185"), - # "gene": Curie.from_str("SO:0000704"), - # "transcript": Curie.from_str("SO:0000673"), - - +}) + +# =============================================================================== +__repair_entity_type_to_curie_mapping = bidict.bidict({ + "lnc_RNA": Curie.from_str("SO:0001877") }) def map_gp_type_label_to_curie(type_label: str) -> Curie: @@ -269,9 +262,13 @@ def map_gp_type_label_to_curie(type_label: str) -> Curie: This is a measure to upgrade the pseudo-labels into proper Curies. Present here are the existing set of labels in current use, and how they should be mapped into CURIEs. + Repair Sequence Ontology (SO) labels if possible """ # normalized_label = type_label.translate() global __default_entity_type_to_curie_mapping + global __repair_entity_type_to_curie_mapping + if type_label not in __default_entity_type_to_curie_mapping and type_label in __repair_entity_type_to_curie_mapping: + return __repair_entity_type_to_curie_mapping.get(type_label) return __default_entity_type_to_curie_mapping.get(type_label, __default_entity_type_to_curie_mapping["gene_product"]) def gp_type_label_to_curie(type: Curie) -> str: @@ -281,6 +278,10 @@ def gp_type_label_to_curie(type: Curie) -> str: global __default_entity_type_to_curie_mapping return __default_entity_type_to_curie_mapping.inverse.get(type, "gene_product") +def map_gp_type_label_to_repair_curie(type_label: str) -> Curie: + global __repair_entity_type_to_curie_mapping + return __repair_entity_type_to_curie_mapping.get(type_label) + @dataclass(unsafe_hash=True) class Term: """ diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index 9efad4d2..9cf13a2b 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -471,7 +471,36 @@ def test_obsolete_replair_of_withfrom(): assoc_result = p.parse_line(obsolete_no_replacement_line) assert assoc_result.associations == [] assert p.report.to_report_json()["messages"]["gorule-0000020"][0]["obj"] == "GO:0016458" + + +def test_invalid_db_type(): + #gene_product gets mapped to gene_product + line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"] + parsed = gafparser.to_association(line) + assoc = parsed.associations[0] + assert assoc.subject.type == [association.map_gp_type_label_to_curie('gene_product')] + #protein gets mapped to protein + line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "protein", "taxon:83333", "20081208", "EcoliWiki"] + parsed = gafparser.to_association(line) + assoc = parsed.associations[0] + assert assoc.subject.type == [association.map_gp_type_label_to_curie('protein')] + + #Unhandled types get mapped to 'gene_product' + line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "invalid_gene_product", "taxon:83333", "20081208", "EcoliWiki"] + parsed = gafparser.to_association(line) + assoc = parsed.associations[0] + assert assoc.subject.type == [association.map_gp_type_label_to_curie('gene_product')] + assert parsed.report.to_report_json()["messages"]["gorule-0000001"][0]["type"] == parsed.report.INVALID_SUBJECT_TYPE + + #'lnc_RNA' gets repaired to 'lncRNA' + line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "lnc_RNA", "taxon:83333", "20081208", "EcoliWiki"] + parsed = gafparser.to_association(line) + assoc = parsed.associations[0] + assert assoc.subject.type == [association.map_gp_type_label_to_curie('lncRNA')] + assert parsed.report.to_report_json()["messages"]["gorule-0000001"][0]["type"] == parsed.report.INVALID_SUBJECT_TYPE + + def test_subject_extensions_bad_curie(): """ From fe46253f3e881d40242563d45a395b139ad55450 Mon Sep 17 00:00:00 2001 From: Anushya Muruganujan Date: Tue, 11 Jun 2024 09:55:19 -0700 Subject: [PATCH 3/4] For #2246 --- ontobio/model/association.py | 44 +++++++++++++++++++++++++----------- tests/test_gafparser.py | 3 +-- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/ontobio/model/association.py b/ontobio/model/association.py index 9f0f1543..66fbfded 100644 --- a/ontobio/model/association.py +++ b/ontobio/model/association.py @@ -209,45 +209,63 @@ def fullname_field(self, max=None) -> str: # =============================================================================== __default_entity_type_to_curie_mapping = bidict.bidict({ + "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"), "protein_coding_gene": Curie.from_str("SO:0001217"), - "protein": Curie.from_str("PR:000000001"), - "gene_product": Curie.from_str("CHEBI:33695"), - "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"), "snRNA": Curie.from_str("SO:0000274"), "ncRNA": Curie.from_str("SO:0000655"), "rRNA": Curie.from_str("SO:0000252"), "mRNA": Curie.from_str("SO:0000234"), - "pre_miRNA": Curie.from_str("SO:0001244"), "lincRNA": Curie.from_str("SO:0001463"), - "lncRNA": Curie.from_str("SO:0001877"), + "lncRNA": Curie.from_str("SO:0001877"), "tRNA": Curie.from_str("SO:0000253"), "snoRNA": Curie.from_str("SO:0000275"), "miRNA": Curie.from_str("SO:0000276"), + "RNA": Curie.from_str("SO:0000356"), "scRNA": Curie.from_str("SO:0000013"), "piRNA": Curie.from_str("SO:0001035"), "pre_miRNA": Curie.from_str("SO:0001244"), "tmRNA": Curie.from_str("SO:0000584"), "scaRNA": Curie.from_str("SO:0002095"), - "siRNA": Curie.from_str("SO:0000646"), - "SRP_RNA": Curie.from_str("SO:0000590"), + "siRNA": Curie.from_str("SO:0000646"), + "SRP_RNA": Curie.from_str("SO:0000590"), + "primary_transcript": Curie.from_str("SO:0000185"), "ribozyme": Curie.from_str("SO:0000374"), "telomerase_RNA": Curie.from_str("SO:0000390"), "RNase_P_RNA": Curie.from_str("SO:0000386"), "antisense_RNA": Curie.from_str("SO:0000644"), "RNase_MRP_RNA": Curie.from_str("SO:0000385"), "guide_RNA": Curie.from_str("SO:0000602"), - "vault_RNA": Curie.from_str("SO:0000404"), - "Y_RNA": Curie.from_str("SO:0000405"), "hammerhead_ribozyme": Curie.from_str("SO:0000380"), + "protein": Curie.from_str("PR:000000001"), "pseudogene": Curie.from_str("SO:0000336"), "pseudogenic_transcript": Curie.from_str("SO:0000516"), - "protein_complex": Curie.from_str("GO:0032991"), + "gene": Curie.from_str("SO:0000704"), + "biological region": Curie.from_str("SO:0001411"), + "protein_complex": Curie.from_str("GO:0032991"), + "transcript": Curie.from_str("SO:0000673"), + "gene_product": Curie.from_str("CHEBI:33695"), "antisense_lncRNA": Curie.from_str("SO:0001904"), - "antisense_lncRNA_gene": Curie.from_str("SO:0002182"), + "antisense_lncRNA_gene": Curie.from_str("SO:0002182"), + "transposable_element_gene": Curie.from_str("SO:0000111"), "gene_segment": Curie.from_str("SO:3000000"), "genetic_marker": Curie.from_str("SO:0001645"), - "biological region": Curie.from_str("SO:0001411"), - "transposable_element_gene": Curie.from_str("SO:0000111") + "lincRNA_gene": Curie.from_str("SO:0001641"), + "lncRNA_gene": Curie.from_str("SO:0002127"), + "miRNA_gene": Curie.from_str("SO:0001265"), + "ncRNA_gene": Curie.from_str("SO:0001263"), + "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"), + "RNase_P_RNA_gene": Curie.from_str("SO:0001639"), + "rRNA_gene": Curie.from_str("SO:0001637"), + "scRNA_gene": Curie.from_str("SO:0001266"), + "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"), + "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"), + "snoRNA_gene": Curie.from_str("SO:0001267"), + "snRNA_gene": Curie.from_str("SO:0001268"), + "SRP_RNA_gene": Curie.from_str("SO:0001269"), + "telomerase_RNA_gene": Curie.from_str("SO:0001643"), + "tRNA_gene": Curie.from_str("SO:0001272"), + "vault_RNA": Curie.from_str("SO:0000404"), + "Y_RNA": Curie.from_str("SO:0000405") }) # =============================================================================== diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index bf212d9c..45aeb6f3 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -611,8 +611,7 @@ def test_gaf_gpi_bridge(): association = gafparser.to_association(gaf, qualifier_parser=assocparser.Qualifier2_2()).associations[0] bridge = gafgpibridge entity = bridge.convert_association(association) - #gene gets updated to gene_product!!!! - assert entity.get("type") == ["gene_product"] + assert entity.get("type") == ["gene"] if __name__ == "__main__": From 0f913982dee9d13734c5958f65d31e00ae5e34fe Mon Sep 17 00:00:00 2001 From: Anushya Muruganujan Date: Tue, 11 Jun 2024 10:12:23 -0700 Subject: [PATCH 4/4] For #2246 --- tests/test_assoc_writer.py | 2 +- tests/test_gafparser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_assoc_writer.py b/tests/test_assoc_writer.py index c1aa37c9..6d175af2 100644 --- a/tests/test_assoc_writer.py +++ b/tests/test_assoc_writer.py @@ -201,7 +201,7 @@ def test_gaf_2_2_extensions(): def test_full_gaf_2_2_write(): - line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene_product\ttaxon:6239\t20060302\tWB\t\t" + line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t" parser = gafparser.GafParser() parser.version = "2.2" out = io.StringIO() diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index 45aeb6f3..a1c300a9 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -216,7 +216,7 @@ def test_qualifiers_gaf(): def test_gaf_2_1_unknown_qualifier(): - line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"] + line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene", "taxon:83333", "20081208", "EcoliWiki"] parsed = gafparser.to_association(line) assert parsed.skipped is True