From f42b784e73c589378998fb24829722a610df0d2b Mon Sep 17 00:00:00 2001
From: Anushya Muruganujan <muruganu@usc.edu>
Date: Mon, 4 Mar 2024 15:30:42 -0800
Subject: [PATCH 1/4] For #2246

---
 ontobio/io/assocparser.py    |  1 +
 ontobio/io/gafparser.py      |  7 +++++
 ontobio/model/association.py | 54 +++++++++++++++++++-----------------
 tests/test_assoc_writer.py   |  2 +-
 tests/test_gafparser.py      |  5 ++--
 5 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/ontobio/io/assocparser.py b/ontobio/io/assocparser.py
index 28728900..ce91af76 100644
--- a/ontobio/io/assocparser.py
+++ b/ontobio/io/assocparser.py
@@ -347,6 +347,7 @@ class Report(object):
     VIOLATES_GO_RULE = "Violates GO Rule"
     RULE_PASS = "Passes GO Rule"
     INVALID_REFERENCES = "Only one reference per ID space allowed"
+    INVALID_SUBJECT_TYPE = "Invalid subject type"
 
     def __init__(self, group="unknown", dataset="unknown", config=None):
         self.messages = []
diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py
index 306bfde8..451d65ed 100644
--- a/ontobio/io/gafparser.py
+++ b/ontobio/io/gafparser.py
@@ -385,6 +385,9 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
     DB_OBJECT_SYMBOL = 2
     TAXON_INDEX = 12
     REFERENCE_INDEX = 5
+    DEFAULT_SUBJECT_TYPE = 'gene_product'
+    STR_DEFAULT_SUBJECT_TYPE_CURIE = str(association.map_gp_type_label_to_curie(DEFAULT_SUBJECT_TYPE))
+    
     if gaf_line[DB_INDEX] == "":
         report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
         return assocparser.ParseResult(source_line, [], True, report=report)
@@ -417,7 +420,11 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
 
     interacting_taxon = parsed_taxons_result.parsed[1] if len(parsed_taxons_result.parsed) == 2 else None
     subject_curie = association.Curie(gaf_line[0], gaf_line[1])
+    type_label = gaf_line[11]
     subject = association.Subject(subject_curie, gaf_line[2], [gaf_line[9]], gaf_line[10].split("|"), [association.map_gp_type_label_to_curie(gaf_line[11])], taxon)
+    # Output warnig, if system is defaulting to gene_product
+    if DEFAULT_SUBJECT_TYPE != type_label and len(subject.type) == 1 and STR_DEFAULT_SUBJECT_TYPE_CURIE == str(subject.type[0]):
+        report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "defaulting to 'gene_product'", taxon=gaf_line[TAXON_INDEX], rule=1)
     gpi_entity = bio_entities.get(subject_curie)
     if gpi_entity is not None and subject != gpi_entity:
         subject = gpi_entity
diff --git a/ontobio/model/association.py b/ontobio/model/association.py
index 968dffab..53d32b85 100644
--- a/ontobio/model/association.py
+++ b/ontobio/model/association.py
@@ -210,21 +210,20 @@ def fullname_field(self, max=None) -> str:
 # ===============================================================================
 __default_entity_type_to_curie_mapping = bidict.bidict({
     "protein_coding_gene": Curie.from_str("SO:0001217"),
+    "protein": Curie.from_str("PR:000000001"),
+    "gene_product": Curie.from_str("CHEBI:33695"),
     "snRNA": Curie.from_str("SO:0000274"),
     "ncRNA": Curie.from_str("SO:0000655"),
     "rRNA": Curie.from_str("SO:0000252"),
     "mRNA": Curie.from_str("SO:0000234"),
-    "lnc_RNA": Curie.from_str("SO:0001877"),
     "lincRNA": Curie.from_str("SO:0001463"),
     "tRNA": Curie.from_str("SO:0000253"),
     "snoRNA": Curie.from_str("SO:0000275"),
     "miRNA": Curie.from_str("SO:0000276"),
-    "RNA": Curie.from_str("SO:0000356"),
     "scRNA": Curie.from_str("SO:0000013"),
     "piRNA": Curie.from_str("SO:0001035"),
     "tmRNA": Curie.from_str("SO:0000584"),
-    "SRP_RNA": Curie.from_str("SO:0000590"),
-    "primary_transcript": Curie.from_str("SO:0000185"),
+    "SRP_RNA": Curie.from_str("SO:0000590"),    
     "ribozyme": Curie.from_str("SO:0000374"),
     "telomerase_RNA": Curie.from_str("SO:0000390"),
     "RNase_P_RNA": Curie.from_str("SO:0000386"),
@@ -232,32 +231,35 @@ def fullname_field(self, max=None) -> str:
     "RNase_MRP_RNA": Curie.from_str("SO:0000385"),
     "guide_RNA": Curie.from_str("SO:0000602"),
     "hammerhead_ribozyme": Curie.from_str("SO:0000380"),
-    "protein": Curie.from_str("PR:000000001"),
     "pseudogene": Curie.from_str("SO:0000336"),
-    "gene": Curie.from_str("SO:0000704"),
-    "biological region": Curie.from_str("SO:0001411"),
-    "protein_complex": Curie.from_str("GO:0032991"),
-    "transcript": Curie.from_str("SO:0000673"),
-    "gene_product": Curie.from_str("CHEBI:33695"),
+    "protein_complex": Curie.from_str("GO:0032991"),    
     "antisense_lncRNA": Curie.from_str("SO:0001904"),
-    "transposable_element_gene": Curie.from_str("SO:0000111"),
     "gene_segment": Curie.from_str("SO:3000000"),
     "genetic_marker": Curie.from_str("SO:0001645"),
-    "lincRNA_gene": Curie.from_str("SO:0001641"),
-    "lncRNA_gene": Curie.from_str("SO:0002127"),
-    "miRNA_gene": Curie.from_str("SO:0001265"),
-    "ncRNA_gene": Curie.from_str("SO:0001263"),
-    "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"),
-    "RNase_P_RNA_gene": Curie.from_str("SO:0001639"),
-    "rRNA_gene": Curie.from_str("SO:0001637"),
-    "scRNA_gene": Curie.from_str("SO:0001266"),
-    "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"),
-    "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"),
-    "snoRNA_gene": Curie.from_str("SO:0001267"),
-    "snRNA_gene": Curie.from_str("SO:0001268"),
-    "SRP_RNA_gene": Curie.from_str("SO:0001269"),
-    "telomerase_RNA_gene": Curie.from_str("SO:0001643"),
-    "tRNA_gene": Curie.from_str("SO:0001272")
+    "biological region": Curie.from_str("SO:0001411"),
+    "transposable_element_gene": Curie.from_str("SO:0000111")
+    # "RNA": Curie.from_str("SO:0000356"),         
+    # "lincRNA_gene": Curie.from_str("SO:0001641"),
+    # "lncRNA_gene": Curie.from_str("SO:0002127"),
+    # "miRNA_gene": Curie.from_str("SO:0001265"),
+    # "ncRNA_gene": Curie.from_str("SO:0001263"),
+    # "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"),
+    # "RNase_P_RNA_gene": Curie.from_str("SO:0001639"),
+    # "rRNA_gene": Curie.from_str("SO:0001637"),
+    # "scRNA_gene": Curie.from_str("SO:0001266"),
+    # "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"),
+    # "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"),
+    # "snoRNA_gene": Curie.from_str("SO:0001267"),
+    # "snRNA_gene": Curie.from_str("SO:0001268"),
+    # "SRP_RNA_gene": Curie.from_str("SO:0001269"),
+    # "telomerase_RNA_gene": Curie.from_str("SO:0001643"),
+    # "tRNA_gene": Curie.from_str("SO:0001272"),
+    #  "lnc_RNA": Curie.from_str("SO:0001877"),
+    # "primary_transcript": Curie.from_str("SO:0000185"),
+    # "gene": Curie.from_str("SO:0000704"),
+    # "transcript": Curie.from_str("SO:0000673"),
+    
+   
 })
 
 def map_gp_type_label_to_curie(type_label: str) -> Curie:
diff --git a/tests/test_assoc_writer.py b/tests/test_assoc_writer.py
index eb0bc030..80fd6dd5 100644
--- a/tests/test_assoc_writer.py
+++ b/tests/test_assoc_writer.py
@@ -195,7 +195,7 @@ def test_gaf_2_2_extensions():
 
 
 def test_full_gaf_2_2_write():
-    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
+    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene_product\ttaxon:6239\t20060302\tWB\t\t"
     parser = gafparser.GafParser()
     parser.version = "2.2"
     out = io.StringIO()
diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py
index 9c907408..9efad4d2 100644
--- a/tests/test_gafparser.py
+++ b/tests/test_gafparser.py
@@ -216,7 +216,7 @@ def test_qualifiers_gaf():
 
 
 def test_gaf_2_1_unknown_qualifier():
-    line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene", "taxon:83333", "20081208", "EcoliWiki"]
+    line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"]
     parsed = gafparser.to_association(line)
 
     assert parsed.skipped is True
@@ -582,7 +582,8 @@ def test_gaf_gpi_bridge():
     association = gafparser.to_association(gaf, qualifier_parser=assocparser.Qualifier2_2()).associations[0]
     bridge = gafgpibridge.GafGpiBridge()
     entity = bridge.convert_association(association)
-    assert entity.get("type") == ["gene"]
+    #gene gets updated to gene_product!!!!
+    assert entity.get("type") == ["gene_product"]
 
 
 if __name__ == "__main__":

From 610b113777598bef50e938fcf1affe624ed38667 Mon Sep 17 00:00:00 2001
From: Anushya Muruganujan <muruganu@usc.edu>
Date: Mon, 10 Jun 2024 18:47:08 -0700
Subject: [PATCH 2/4] For #2246

---
 ontobio/io/gafparser.py      |  3 +++
 ontobio/model/association.py | 45 ++++++++++++++++++------------------
 tests/test_gafparser.py      | 29 +++++++++++++++++++++++
 3 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py
index 451d65ed..44b9d9a8 100644
--- a/ontobio/io/gafparser.py
+++ b/ontobio/io/gafparser.py
@@ -425,6 +425,9 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
     # Output warnig, if system is defaulting to gene_product
     if DEFAULT_SUBJECT_TYPE != type_label and len(subject.type) == 1 and STR_DEFAULT_SUBJECT_TYPE_CURIE == str(subject.type[0]):
         report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "defaulting to 'gene_product'", taxon=gaf_line[TAXON_INDEX], rule=1)
+    if association.map_gp_type_label_to_repair_curie(type_label) is not None:
+        report.warning(source_line, Report.INVALID_SUBJECT_TYPE, type_label, "has been repaired", taxon=gaf_line[TAXON_INDEX], rule=1)
+             
     gpi_entity = bio_entities.get(subject_curie)
     if gpi_entity is not None and subject != gpi_entity:
         subject = gpi_entity
diff --git a/ontobio/model/association.py b/ontobio/model/association.py
index 53d32b85..33a9d113 100644
--- a/ontobio/model/association.py
+++ b/ontobio/model/association.py
@@ -212,17 +212,23 @@ def fullname_field(self, max=None) -> str:
     "protein_coding_gene": Curie.from_str("SO:0001217"),
     "protein": Curie.from_str("PR:000000001"),
     "gene_product": Curie.from_str("CHEBI:33695"),
+    "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"),    
     "snRNA": Curie.from_str("SO:0000274"),
     "ncRNA": Curie.from_str("SO:0000655"),
     "rRNA": Curie.from_str("SO:0000252"),
     "mRNA": Curie.from_str("SO:0000234"),
+    "pre_miRNA": Curie.from_str("SO:0001244"),
     "lincRNA": Curie.from_str("SO:0001463"),
+    "lncRNA": Curie.from_str("SO:0001877"),    
     "tRNA": Curie.from_str("SO:0000253"),
     "snoRNA": Curie.from_str("SO:0000275"),
     "miRNA": Curie.from_str("SO:0000276"),
     "scRNA": Curie.from_str("SO:0000013"),
     "piRNA": Curie.from_str("SO:0001035"),
+    "pre_miRNA": Curie.from_str("SO:0001244"),    
     "tmRNA": Curie.from_str("SO:0000584"),
+    "scaRNA": Curie.from_str("SO:0002095"),
+    "siRNA": Curie.from_str("SO:0000646"),         
     "SRP_RNA": Curie.from_str("SO:0000590"),    
     "ribozyme": Curie.from_str("SO:0000374"),
     "telomerase_RNA": Curie.from_str("SO:0000390"),
@@ -230,36 +236,23 @@ def fullname_field(self, max=None) -> str:
     "antisense_RNA": Curie.from_str("SO:0000644"),
     "RNase_MRP_RNA": Curie.from_str("SO:0000385"),
     "guide_RNA": Curie.from_str("SO:0000602"),
+    "vault_RNA": Curie.from_str("SO:0000404"),
+    "Y_RNA": Curie.from_str("SO:0000405"),         
     "hammerhead_ribozyme": Curie.from_str("SO:0000380"),
     "pseudogene": Curie.from_str("SO:0000336"),
+    "pseudogenic_transcript": Curie.from_str("SO:0000516"),    
     "protein_complex": Curie.from_str("GO:0032991"),    
     "antisense_lncRNA": Curie.from_str("SO:0001904"),
+    "antisense_lncRNA_gene": Curie.from_str("SO:0002182"),  
     "gene_segment": Curie.from_str("SO:3000000"),
     "genetic_marker": Curie.from_str("SO:0001645"),
     "biological region": Curie.from_str("SO:0001411"),
     "transposable_element_gene": Curie.from_str("SO:0000111")
-    # "RNA": Curie.from_str("SO:0000356"),         
-    # "lincRNA_gene": Curie.from_str("SO:0001641"),
-    # "lncRNA_gene": Curie.from_str("SO:0002127"),
-    # "miRNA_gene": Curie.from_str("SO:0001265"),
-    # "ncRNA_gene": Curie.from_str("SO:0001263"),
-    # "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"),
-    # "RNase_P_RNA_gene": Curie.from_str("SO:0001639"),
-    # "rRNA_gene": Curie.from_str("SO:0001637"),
-    # "scRNA_gene": Curie.from_str("SO:0001266"),
-    # "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"),
-    # "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"),
-    # "snoRNA_gene": Curie.from_str("SO:0001267"),
-    # "snRNA_gene": Curie.from_str("SO:0001268"),
-    # "SRP_RNA_gene": Curie.from_str("SO:0001269"),
-    # "telomerase_RNA_gene": Curie.from_str("SO:0001643"),
-    # "tRNA_gene": Curie.from_str("SO:0001272"),
-    #  "lnc_RNA": Curie.from_str("SO:0001877"),
-    # "primary_transcript": Curie.from_str("SO:0000185"),
-    # "gene": Curie.from_str("SO:0000704"),
-    # "transcript": Curie.from_str("SO:0000673"),
-    
-   
+})
+
+# ===============================================================================
+__repair_entity_type_to_curie_mapping = bidict.bidict({
+    "lnc_RNA": Curie.from_str("SO:0001877")
 })
 
 def map_gp_type_label_to_curie(type_label: str) -> Curie:
@@ -269,9 +262,13 @@ def map_gp_type_label_to_curie(type_label: str) -> Curie:
 
     This is a measure to upgrade the pseudo-labels into proper Curies. Present here are
     the existing set of labels in current use, and how they should be mapped into CURIEs.
+    Repair Sequence Ontology (SO) labels if possible
     """
     # normalized_label = type_label.translate()
     global __default_entity_type_to_curie_mapping
+    global __repair_entity_type_to_curie_mapping
+    if type_label not in __default_entity_type_to_curie_mapping and type_label in __repair_entity_type_to_curie_mapping:
+        return __repair_entity_type_to_curie_mapping.get(type_label)
     return __default_entity_type_to_curie_mapping.get(type_label, __default_entity_type_to_curie_mapping["gene_product"])
 
 def gp_type_label_to_curie(type: Curie) -> str:
@@ -281,6 +278,10 @@ def gp_type_label_to_curie(type: Curie) -> str:
     global __default_entity_type_to_curie_mapping
     return __default_entity_type_to_curie_mapping.inverse.get(type, "gene_product")
 
+def map_gp_type_label_to_repair_curie(type_label: str) -> Curie:
+    global __repair_entity_type_to_curie_mapping
+    return __repair_entity_type_to_curie_mapping.get(type_label)
+
 @dataclass(unsafe_hash=True)
 class Term:
     """
diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py
index 9efad4d2..9cf13a2b 100644
--- a/tests/test_gafparser.py
+++ b/tests/test_gafparser.py
@@ -471,7 +471,36 @@ def test_obsolete_replair_of_withfrom():
     assoc_result = p.parse_line(obsolete_no_replacement_line)
     assert assoc_result.associations == []
     assert p.report.to_report_json()["messages"]["gorule-0000020"][0]["obj"] == "GO:0016458"
+    
+    
+def test_invalid_db_type():
+    #gene_product gets mapped to gene_product
+    line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"]
+    parsed = gafparser.to_association(line)
+    assoc = parsed.associations[0]
+    assert assoc.subject.type == [association.map_gp_type_label_to_curie('gene_product')]
 
+    #protein gets mapped to protein
+    line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "protein", "taxon:83333", "20081208", "EcoliWiki"]
+    parsed = gafparser.to_association(line)
+    assoc = parsed.associations[0]
+    assert assoc.subject.type == [association.map_gp_type_label_to_curie('protein')]
+    
+    #Unhandled types get mapped to 'gene_product'
+    line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "invalid_gene_product", "taxon:83333", "20081208", "EcoliWiki"]
+    parsed = gafparser.to_association(line)
+    assoc = parsed.associations[0]
+    assert assoc.subject.type == [association.map_gp_type_label_to_curie('gene_product')]
+    assert parsed.report.to_report_json()["messages"]["gorule-0000001"][0]["type"] == parsed.report.INVALID_SUBJECT_TYPE
+    
+    #'lnc_RNA' gets repaired to 'lncRNA'
+    line = ["UniProtKB", "P0AFI2", "parC", "", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "lnc_RNA", "taxon:83333", "20081208", "EcoliWiki"]
+    parsed = gafparser.to_association(line)
+    assoc = parsed.associations[0]
+    assert assoc.subject.type == [association.map_gp_type_label_to_curie('lncRNA')]                
+    assert parsed.report.to_report_json()["messages"]["gorule-0000001"][0]["type"] == parsed.report.INVALID_SUBJECT_TYPE
+    
+ 
 
 def test_subject_extensions_bad_curie():
     """

From fe46253f3e881d40242563d45a395b139ad55450 Mon Sep 17 00:00:00 2001
From: Anushya Muruganujan <muruganu@usc.edu>
Date: Tue, 11 Jun 2024 09:55:19 -0700
Subject: [PATCH 3/4] For #2246

---
 ontobio/model/association.py | 44 +++++++++++++++++++++++++-----------
 tests/test_gafparser.py      |  3 +--
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/ontobio/model/association.py b/ontobio/model/association.py
index 9f0f1543..66fbfded 100644
--- a/ontobio/model/association.py
+++ b/ontobio/model/association.py
@@ -209,45 +209,63 @@ def fullname_field(self, max=None) -> str:
 
 # ===============================================================================
 __default_entity_type_to_curie_mapping = bidict.bidict({
+    "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"),
     "protein_coding_gene": Curie.from_str("SO:0001217"),
-    "protein": Curie.from_str("PR:000000001"),
-    "gene_product": Curie.from_str("CHEBI:33695"),
-    "autocatalytically_spliced_intron": Curie.from_str("SO:0000588"),    
     "snRNA": Curie.from_str("SO:0000274"),
     "ncRNA": Curie.from_str("SO:0000655"),
     "rRNA": Curie.from_str("SO:0000252"),
     "mRNA": Curie.from_str("SO:0000234"),
-    "pre_miRNA": Curie.from_str("SO:0001244"),
     "lincRNA": Curie.from_str("SO:0001463"),
-    "lncRNA": Curie.from_str("SO:0001877"),    
+    "lncRNA": Curie.from_str("SO:0001877"),     
     "tRNA": Curie.from_str("SO:0000253"),
     "snoRNA": Curie.from_str("SO:0000275"),
     "miRNA": Curie.from_str("SO:0000276"),
+    "RNA": Curie.from_str("SO:0000356"),
     "scRNA": Curie.from_str("SO:0000013"),
     "piRNA": Curie.from_str("SO:0001035"),
     "pre_miRNA": Curie.from_str("SO:0001244"),    
     "tmRNA": Curie.from_str("SO:0000584"),
     "scaRNA": Curie.from_str("SO:0002095"),
-    "siRNA": Curie.from_str("SO:0000646"),         
-    "SRP_RNA": Curie.from_str("SO:0000590"),    
+    "siRNA": Curie.from_str("SO:0000646"),        
+    "SRP_RNA": Curie.from_str("SO:0000590"),
+    "primary_transcript": Curie.from_str("SO:0000185"),
     "ribozyme": Curie.from_str("SO:0000374"),
     "telomerase_RNA": Curie.from_str("SO:0000390"),
     "RNase_P_RNA": Curie.from_str("SO:0000386"),
     "antisense_RNA": Curie.from_str("SO:0000644"),
     "RNase_MRP_RNA": Curie.from_str("SO:0000385"),
     "guide_RNA": Curie.from_str("SO:0000602"),
-    "vault_RNA": Curie.from_str("SO:0000404"),
-    "Y_RNA": Curie.from_str("SO:0000405"),         
     "hammerhead_ribozyme": Curie.from_str("SO:0000380"),
+    "protein": Curie.from_str("PR:000000001"),
     "pseudogene": Curie.from_str("SO:0000336"),
     "pseudogenic_transcript": Curie.from_str("SO:0000516"),    
-    "protein_complex": Curie.from_str("GO:0032991"),    
+    "gene": Curie.from_str("SO:0000704"),
+    "biological region": Curie.from_str("SO:0001411"),
+    "protein_complex": Curie.from_str("GO:0032991"),
+    "transcript": Curie.from_str("SO:0000673"),
+    "gene_product": Curie.from_str("CHEBI:33695"),
     "antisense_lncRNA": Curie.from_str("SO:0001904"),
-    "antisense_lncRNA_gene": Curie.from_str("SO:0002182"),  
+    "antisense_lncRNA_gene": Curie.from_str("SO:0002182"),    
+    "transposable_element_gene": Curie.from_str("SO:0000111"),
     "gene_segment": Curie.from_str("SO:3000000"),
     "genetic_marker": Curie.from_str("SO:0001645"),
-    "biological region": Curie.from_str("SO:0001411"),
-    "transposable_element_gene": Curie.from_str("SO:0000111")
+    "lincRNA_gene": Curie.from_str("SO:0001641"),
+    "lncRNA_gene": Curie.from_str("SO:0002127"),
+    "miRNA_gene": Curie.from_str("SO:0001265"),
+    "ncRNA_gene": Curie.from_str("SO:0001263"),
+    "RNase_MRP_RNA_gene": Curie.from_str("SO:0001640"),
+    "RNase_P_RNA_gene": Curie.from_str("SO:0001639"),
+    "rRNA_gene": Curie.from_str("SO:0001637"),
+    "scRNA_gene": Curie.from_str("SO:0001266"),
+    "sense_intronic_ncRNA_gene": Curie.from_str("SO:0002184"),
+    "sense_overlap_ncRNA_gene": Curie.from_str("SO:0002183"),
+    "snoRNA_gene": Curie.from_str("SO:0001267"),
+    "snRNA_gene": Curie.from_str("SO:0001268"),
+    "SRP_RNA_gene": Curie.from_str("SO:0001269"),
+    "telomerase_RNA_gene": Curie.from_str("SO:0001643"),
+    "tRNA_gene": Curie.from_str("SO:0001272"),
+    "vault_RNA": Curie.from_str("SO:0000404"),
+    "Y_RNA": Curie.from_str("SO:0000405")    
 })
 
 # ===============================================================================
diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py
index bf212d9c..45aeb6f3 100644
--- a/tests/test_gafparser.py
+++ b/tests/test_gafparser.py
@@ -611,8 +611,7 @@ def test_gaf_gpi_bridge():
     association = gafparser.to_association(gaf, qualifier_parser=assocparser.Qualifier2_2()).associations[0]
     bridge = gafgpibridge
     entity = bridge.convert_association(association)
-    #gene gets updated to gene_product!!!!
-    assert entity.get("type") == ["gene_product"]
+    assert entity.get("type") == ["gene"]
 
 
 if __name__ == "__main__":

From 0f913982dee9d13734c5958f65d31e00ae5e34fe Mon Sep 17 00:00:00 2001
From: Anushya Muruganujan <muruganu@usc.edu>
Date: Tue, 11 Jun 2024 10:12:23 -0700
Subject: [PATCH 4/4] For #2246

---
 tests/test_assoc_writer.py | 2 +-
 tests/test_gafparser.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_assoc_writer.py b/tests/test_assoc_writer.py
index c1aa37c9..6d175af2 100644
--- a/tests/test_assoc_writer.py
+++ b/tests/test_assoc_writer.py
@@ -201,7 +201,7 @@ def test_gaf_2_2_extensions():
 
 
 def test_full_gaf_2_2_write():
-    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene_product\ttaxon:6239\t20060302\tWB\t\t"
+    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
     parser = gafparser.GafParser()
     parser.version = "2.2"
     out = io.StringIO()
diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py
index 45aeb6f3..a1c300a9 100644
--- a/tests/test_gafparser.py
+++ b/tests/test_gafparser.py
@@ -216,7 +216,7 @@ def test_qualifiers_gaf():
 
 
 def test_gaf_2_1_unknown_qualifier():
-    line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene_product", "taxon:83333", "20081208", "EcoliWiki"]
+    line = ["UniProtKB", "P0AFI2", "parC", "Contributes_to", "GO:0003916", "PMID:1334483", "IDA", "", "F", "", "", "gene", "taxon:83333", "20081208", "EcoliWiki"]
     parsed = gafparser.to_association(line)
 
     assert parsed.skipped is True