From 94a45d2ab74b86a7a405127a332fddf37ea56bdd Mon Sep 17 00:00:00 2001 From: Enrique Manjavacas Date: Thu, 9 May 2019 17:07:43 +0200 Subject: [PATCH] fixes and refactors --- pie/data/dataset.py | 5 ++--- pie/models/base_model.py | 8 +++++--- pie/models/decoder.py | 43 ++++++++++++++++++++-------------------- pie/models/embedding.py | 26 +----------------------- pie/models/highway.py | 39 ++++++++++++++++++++++++++++++++++++ pie/scripts/evaluate.py | 2 +- pie/scripts/optimize.py | 21 ++++++++++++-------- pie/scripts/train.py | 6 +++++- pie/trainer.py | 2 +- 9 files changed, 89 insertions(+), 63 deletions(-) create mode 100644 pie/models/highway.py diff --git a/pie/data/dataset.py b/pie/data/dataset.py index b2ce1f0..800167b 100644 --- a/pie/data/dataset.py +++ b/pie/data/dataset.py @@ -303,9 +303,8 @@ def from_settings(cls, settings, tasks=None): for task in settings.tasks: if tasks is not None and task['settings']['target'] not in tasks: - logging.warning( - "Ignoring task [{}]: no available data".format(task['target'])) - continue + raise ValueError("No available data for task [{}]".format( + task['settings']['target'])) le.add_task(task['name'], level=task['level'], **task['settings']) return le diff --git a/pie/models/base_model.py b/pie/models/base_model.py index ea37b04..3f5f536 100644 --- a/pie/models/base_model.py +++ b/pie/models/base_model.py @@ -27,7 +27,10 @@ class BaseModel(nn.Module): def __init__(self, label_encoder, tasks, *args, **kwargs): self.label_encoder = label_encoder # prepare input task data from task settings - self.tasks = {task['name']: task for task in tasks if not task.get('read_only')} + if isinstance(tasks, list): + tasks = {task['name']: task for task in tasks} + # drop read-only tasks + self.tasks = {t: task for t, task in tasks.items() if not task.get('read_only')} super().__init__() def loss(self, batch_data): @@ -189,8 +192,7 @@ def load(fpath): # load state_dict model.load_state_dict( - torch.load(tar.extractfile('state_dict.pt'), - map_location='cpu')) + torch.load(tar.extractfile('state_dict.pt'), map_location='cpu')) model.eval() diff --git a/pie/models/decoder.py b/pie/models/decoder.py index fa0be77..3bd4e72 100644 --- a/pie/models/decoder.py +++ b/pie/models/decoder.py @@ -10,37 +10,38 @@ from .beam_search import Beam from .attention import Attention +from .highway import Highway -class Highway(nn.Module): +class ConditionEmbedding(nn.Module): """ - Highway network + Embed tags and project onto a fixed-size tag embedding """ - def __init__(self, in_features, num_layers, act='relu'): - self.in_features = in_features - - self.act = act + def __init__(self, label_encoders, emb_dim, out_features, dropout=0): + self.dropout = dropout super().__init__() - self.layers = nn.ModuleList( - [nn.Linear(in_features, in_features*2) for _ in range(num_layers)]) + self.embs = nn.ModuleDict({ + le.name: nn.Embedding(len(le), emb_dim, padding_idx=le.get_pad()) + for le in label_encoders}) + self.proj = nn.Linear(len(label_encoders) * emb_dim, out_features) self.init() def init(self): - for layer in self.layers: - initialization.init_linear(layer) - # bias gate to let information go untouched - nn.init.constant_(layer.bias[self.in_features:], 1.) + for emb in self.embs.values(): + initialization.init_embeddings(emb) + initialization.init_linear(self.proj) - def forward(self, inp): - current = inp - for layer in self.layers: - inp, gate = layer(current).chunk(2, dim=-1) - inp, gate = getattr(F, self.act)(inp), F.sigmoid(gate) - current = gate * current + (1 - gate) * inp + def forward(self, **conds): + """t (seq_len x batch) or (batch), tlen""" + embs = torch.cat( + [emb(conds[name]) for name, emb in sorted(self.embs.items())], + dim=-1) - return current + embs = F.dropout(embs, p=self.dropout, training=self.training) + + return self.proj(embs) class LinearDecoder(nn.Module): @@ -131,7 +132,7 @@ def init(self): nn.init.normal_(self.end_transition) def forward(self, enc_outs): - "get logits of the input features" + """get logits of the input features""" # (seq_len x batch x vocab) if self.highway is not None: enc_outs = self.highway(enc_outs) @@ -211,7 +212,7 @@ def loss(self, logits, targets, lengths): def predict(self, enc_outs, lengths): # (seq_len x batch x vocab) - logits = self.projection(enc_outs) + logits = self(enc_outs) seq_len, _, vocab = logits.size() start_tag, end_tag = vocab, vocab + 1 diff --git a/pie/models/embedding.py b/pie/models/embedding.py index d9978ac..d47891d 100644 --- a/pie/models/embedding.py +++ b/pie/models/embedding.py @@ -7,31 +7,7 @@ from pie import initialization from .lstm import CustomBiLSTM - - -class Highway(torch.nn.Module): - def __init__(self, input_dim, num_layers=1, activation=torch.nn.functional.relu): - super(Highway, self).__init__() - - self.layers = torch.nn.ModuleList( - [torch.nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)]) - self.activation = activation - - for layer in self.layers: - layer.bias[input_dim:].data.fill_(1) - - def forward(self, inputs): - current_input = inputs - - for layer in self.layers: - projected_input = layer(current_input) - linear_part = current_input - nonlinear_part, gate = projected_input.chunk(2, dim=-1) - nonlinear_part = self.activation(nonlinear_part) - gate = torch.sigmoid(gate) - current_input = gate * linear_part + (1 - gate) * nonlinear_part - - return current_input +from .highway import Highway class CNNEmbedding(nn.Module): diff --git a/pie/models/highway.py b/pie/models/highway.py new file mode 100644 index 0000000..e74db14 --- /dev/null +++ b/pie/models/highway.py @@ -0,0 +1,39 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from pie import initialization + + +class Highway(nn.Module): + """ + Highway network + """ + def __init__(self, in_features, num_layers, act='relu'): + self.in_features = in_features + + self.act = act + super().__init__() + + self.layers = nn.ModuleList( + [nn.Linear(in_features, in_features*2) for _ in range(num_layers)]) + + self.init() + + def init(self): + for layer in self.layers: + initialization.init_linear(layer) + # bias gate to let information go untouched + nn.init.constant_(layer.bias[self.in_features:], 1.) + + def forward(self, inp): + current = inp + for layer in self.layers: + inp, gate = layer(current).chunk(2, dim=-1) + inp, gate = getattr(F, self.act)(inp), F.sigmoid(gate) + current = gate * current + (1 - gate) * inp + + return current + + diff --git a/pie/scripts/evaluate.py b/pie/scripts/evaluate.py index e410f4c..586a86e 100644 --- a/pie/scripts/evaluate.py +++ b/pie/scripts/evaluate.py @@ -13,7 +13,6 @@ def run(model_path, test_path, train_path, model = BaseModel.load(model_path).to(device) if model_info: print(model) - if hasattr(model, '_settings'): # new models should all have _settings settings = model._settings elif settings: @@ -27,6 +26,7 @@ def run(model_path, test_path, train_path, settings.batch_size = batch_size settings.buffer_size = buffer_size settings.device = device + settings.shuffle = False # avoid shuffling trainset = None if train_path: diff --git a/pie/scripts/optimize.py b/pie/scripts/optimize.py index 310084d..17356ec 100644 --- a/pie/scripts/optimize.py +++ b/pie/scripts/optimize.py @@ -7,7 +7,7 @@ import scipy.stats as stats from pie import utils -from pie.settings import settings_from_file, Settings +from pie import settings # available distributions @@ -60,6 +60,8 @@ def parse_opt(obj, opt_key): raise ValueError("Unknown distribution: ", v[opt_key]) else: opt[param] = parse_opt(v, opt_key) + else: + opt[param] = v return opt @@ -85,6 +87,8 @@ def sample_from_config(opt): output[param] = sample_from_config(dist) elif isinstance(dist, list): output[param] = [sample_from_config(d) for d in dist] + elif isinstance(dist, (str, float, int, bool)): + output[param] = dist # no sampling else: output[param] = dist.rvs() @@ -98,23 +102,24 @@ def run(config, opt, n_iter): print() print("::: Starting optimization run {} :::".format(i + 1)) print() - sampled_config = sample_from_config(opt) - merged = utils.recursive_merge(dict(config), sampled_config, overwrite=True) - print(yaml.dump(dict(config))) - print(yaml.dump(merged)) - train.run(Settings(merged)) + sampled = sample_from_config(opt) + merged = settings.Settings( + utils.recursive_merge(dict(config), sampled, overwrite=True)) + print("::: Sampled config :::") + print(yaml.dump(dict(merged))) + train.run(settings.check_settings(settings.merge_task_defaults(merged))) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('config_path', default='config.json') - parser.add_argument('opt_path') + parser.add_argument('opt_path', help='Path to optimization file (see opt.json)') parser.add_argument('--n_iter', type=int, default=20) args = parser.parse_args() with utils.shutup(): - config = settings_from_file(args.config_path) + config = settings.settings_from_file(args.config_path) opt = read_opt(args.opt_path) diff --git a/pie/scripts/train.py b/pie/scripts/train.py index a577958..054fc2d 100644 --- a/pie/scripts/train.py +++ b/pie/scripts/train.py @@ -32,6 +32,10 @@ def get_fname_infix(settings): def run(settings): + # read settings if input is path + if isinstance(settings, str): + settings = settings_from_file(settings) + # seeding now = datetime.now() seed = now.hour * 10000 + now.minute * 100 + now.second @@ -190,4 +194,4 @@ def run(settings): parser = argparse.ArgumentParser() parser.add_argument('config_path', nargs='?', default='config.json') args = parser.parse_args() - run(settings_from_file(args.config_path)) + run(args.config_path) diff --git a/pie/trainer.py b/pie/trainer.py index bc7a219..5118f7e 100644 --- a/pie/trainer.py +++ b/pie/trainer.py @@ -230,7 +230,7 @@ def evaluate(self, dataset): total_losses, total_batches = collections.defaultdict(float), 0 # get all tasks - tasks = list(self.model.label_encoder.tasks) + tasks = list(self.model.tasks) for batch in tqdm.tqdm(dataset.batch_generator()): total_batches += 1