From 66322532540ff8eb7e5d3c9a6f2f51435cc06efd Mon Sep 17 00:00:00 2001 From: Jiyang Date: Mon, 4 Dec 2023 13:26:42 -0600 Subject: [PATCH] Config files for running models --- python/configs/codeT5.yaml | 55 +++++++++++++++++++++++++++++++++++++ python/configs/coditT5.yaml | 55 +++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 python/configs/codeT5.yaml create mode 100644 python/configs/coditT5.yaml diff --git a/python/configs/codeT5.yaml b/python/configs/codeT5.yaml new file mode 100644 index 0000000..1d09f62 --- /dev/null +++ b/python/configs/codeT5.yaml @@ -0,0 +1,55 @@ +data: + batch_size: 1 + eval_batch_size: 1 + +model: + pretrained_model: Salesforce/codet5-base + pretrained_tokenizer: Salesforce/codet5-base + skip_special_token_when_generate: False + beam_size: 20 + +trainer: + auto_select_gpus: true + gpus: -1 + strategy: ddp + # find_unused_parameters: false + precision: 16 + + # max_steps: 50_000 + # fast_dev_run: true + max_epochs: 30 + accumulate_grad_batches: 4 # effective batch size 1*4(gpu)*4(accumulate) = 32 + + callbacks: + - class_path: pytorch_lightning.callbacks.EarlyStopping + init_args: + monitor: bleu/val + mode: max + min_delta: 0 + patience: 5 + verbose: true + # - class_path: pytorch_lightning.callbacks.StochasticWeightAveraging # Incompatible with EarlyStopping + - class_path: pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor + init_args: + logging_interval: step + +optimizer: + class_path: transformers.optimization.AdamW + init_args: + lr: 0.00005 + eps: 1e-8 + weight_decay: 0.01 + +lr_scheduler: + class_path: torch.optim.lr_scheduler.OneCycleLR + init_args: + max_lr: 0.00005 + pct_start: 0.1 + div_factor: 1 + total_steps: 30 + anneal_strategy: linear + +ckpt: + save_top_k: 1 + monitor: bleu/val + mode: max diff --git a/python/configs/coditT5.yaml b/python/configs/coditT5.yaml new file mode 100644 index 0000000..64a2bf8 --- /dev/null +++ b/python/configs/coditT5.yaml @@ -0,0 +1,55 @@ +data: + batch_size: 1 + eval_batch_size: 1 + +model: + pretrained_model: ../models/pretrain/model/ + pretrained_tokenizer: ../models/codeT5Tokenizer + beam_size: 20 + skip_special_token_when_generate: False + +trainer: + auto_select_gpus: true + gpus: -1 + strategy: ddp + # find_unused_parameters: false + precision: 16 + + # max_steps: 50_000 + # fast_dev_run: true + max_epochs: 30 + accumulate_grad_batches: 12 # effective batch size 1*4(gpu)*12(accumulate) = 48 + + callbacks: + - class_path: pytorch_lightning.callbacks.EarlyStopping + init_args: + monitor: bleu/val + mode: max + min_delta: 0 + patience: 5 + verbose: true + # - class_path: pytorch_lightning.callbacks.StochasticWeightAveraging # Incompatible with EarlyStopping + - class_path: pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor + init_args: + logging_interval: step + +optimizer: + class_path: transformers.optimization.AdamW + init_args: + lr: 0.00005 + eps: 1e-8 + weight_decay: 0.01 + +lr_scheduler: + class_path: torch.optim.lr_scheduler.OneCycleLR + init_args: + max_lr: 0.00005 + pct_start: 0.1 + div_factor: 1 + total_steps: 50 + anneal_strategy: linear + +ckpt: + save_top_k: 1 + monitor: bleu/val + mode: max