From a300b8e3d21c9dda0560a3c98457e5beaf15d5dc Mon Sep 17 00:00:00 2001 From: Mihai Surdeanu Date: Thu, 4 Jul 2024 05:31:29 -0700 Subject: [PATCH] updated data to combined hexa dependencies --- encoder/src/main/python/averaging_trainer.py | 8 ++++++-- encoder/src/main/python/clu_trainer.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/encoder/src/main/python/averaging_trainer.py b/encoder/src/main/python/averaging_trainer.py index 6990431..dce21f0 100644 --- a/encoder/src/main/python/averaging_trainer.py +++ b/encoder/src/main/python/averaging_trainer.py @@ -124,9 +124,13 @@ def print_some_params(self, model: TokenClassificationModel, msg: str) -> None: ShortTaskDef("NER", "conll-ner/", "train.txt", "dev.txt", "test.txt"), ShortTaskDef("POS", "pos/", "train.txt", "dev.txt", "test.txt"), ShortTaskDef("Chunking", "chunking/", "train.txt", "test.txt", "test.txt"), + #ShortTaskDef("Hexa Term", "deps-wsj/", "train.labels.hexaterms", "dev.labels.hexaterms", "test.labels.hexaterms"), + #ShortTaskDef("Hexa NonTerm", "deps-wsj/", "train.labels.hexanonterms", "dev.labels.hexanonterms", "test.labels.hexanonterms") + ShortTaskDef("Hexa Term", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels.hexaterms", "test.labels.hexaterms", "test.labels.hexaterms"), # dev is included in train + ShortTaskDef("Hexa NonTerm", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels.hexanonterms", "test.labels.hexanonterms", "test.labels.hexanonterms") # dev is included in train #ShortTaskDef("Deps Head", "deps-wsj/", "train.heads", "dev.heads", "test.heads"), #ShortTaskDef("Deps Label", "deps-wsj/", "train.labels", "dev.labels", "test.labels", dual_mode=True) - ShortTaskDef("Deps Head", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.heads", "test.heads", "test.heads"), - ShortTaskDef("Deps Label", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels", "test.labels", "test.labels", dual_mode=True) + #ShortTaskDef("Deps Head", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.heads", "test.heads", "test.heads"), + #ShortTaskDef("Deps Label", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels", "test.labels", "test.labels", dual_mode=True) ]) AveragingTrainer(tokenizer).train(tasks) diff --git a/encoder/src/main/python/clu_trainer.py b/encoder/src/main/python/clu_trainer.py index d1073fe..6bafef7 100644 --- a/encoder/src/main/python/clu_trainer.py +++ b/encoder/src/main/python/clu_trainer.py @@ -90,8 +90,12 @@ def compute_metrics(self, eval_pred: EvalPrediction) -> Dict[str, float]: ShortTaskDef("NER", "conll-ner/", "train.txt", "dev.txt", "test.txt"), ShortTaskDef("POS", "pos/", "train.txt", "dev.txt", "test.txt"), ShortTaskDef("Chunking", "chunking/", "train.txt", "test.txt", "test.txt"), # this dataset has no dev - ShortTaskDef("Deps Head", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.heads", "test.heads", "test.heads"), # dev is included in train - ShortTaskDef("Deps Label", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels", "test.labels", "test.labels", dual_mode=True) # dev is included in train + #ShortTaskDef("Hexa Term", "deps-wsj/", "train.labels.hexaterms", "dev.labels.hexaterms", "test.labels.hexaterms"), + #ShortTaskDef("Hexa NonTerm", "deps-wsj/", "train.labels.hexanonterms", "dev.labels.hexanonterms", "test.labels.hexanonterms"), + ShortTaskDef("Hexa Term", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels.hexaterms", "test.labels.hexaterms", "test.labels.hexaterms"), # dev is included in train + ShortTaskDef("Hexa NonTerm", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels.hexanonterms", "test.labels.hexanonterms", "test.labels.hexanonterms") # dev is included in train + #ShortTaskDef("Deps Head", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.heads", "test.heads", "test.heads"), # dev is included in train + #ShortTaskDef("Deps Label", "deps-combined/", "wsjtrain-wsjdev-geniatrain-geniadev.labels", "test.labels", "test.labels", dual_mode=True) # dev is included in train #ShortTaskDef("Deps Head", "deps-wsj/", "train.heads", "dev.heads", "test.heads"), #ShortTaskDef("Deps Label", "deps-wsj/", "train.labels", "dev.labels", "test.labels", dual_mode=True) ])