Skip to content

Commit

Permalink
fixes and refactors
Browse files Browse the repository at this point in the history
  • Loading branch information
emanjavacas committed May 9, 2019
1 parent eb6e6f6 commit 94a45d2
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 63 deletions.
5 changes: 2 additions & 3 deletions pie/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,8 @@ def from_settings(cls, settings, tasks=None):

for task in settings.tasks:
if tasks is not None and task['settings']['target'] not in tasks:
logging.warning(
"Ignoring task [{}]: no available data".format(task['target']))
continue
raise ValueError("No available data for task [{}]".format(

This comment has been minimized.

Copy link
@PonteIneptique

PonteIneptique May 9, 2019

Contributor

Thanks for this correction. I should have proposed a PR for that a long time ago...

task['settings']['target']))
le.add_task(task['name'], level=task['level'], **task['settings'])

return le
Expand Down
8 changes: 5 additions & 3 deletions pie/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ class BaseModel(nn.Module):
def __init__(self, label_encoder, tasks, *args, **kwargs):
self.label_encoder = label_encoder
# prepare input task data from task settings
self.tasks = {task['name']: task for task in tasks if not task.get('read_only')}
if isinstance(tasks, list):
tasks = {task['name']: task for task in tasks}
# drop read-only tasks
self.tasks = {t: task for t, task in tasks.items() if not task.get('read_only')}
super().__init__()

def loss(self, batch_data):
Expand Down Expand Up @@ -189,8 +192,7 @@ def load(fpath):

# load state_dict
model.load_state_dict(
torch.load(tar.extractfile('state_dict.pt'),
map_location='cpu'))
torch.load(tar.extractfile('state_dict.pt'), map_location='cpu'))

model.eval()

Expand Down
43 changes: 22 additions & 21 deletions pie/models/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,38 @@

from .beam_search import Beam
from .attention import Attention
from .highway import Highway


class Highway(nn.Module):
class ConditionEmbedding(nn.Module):
"""
Highway network
Embed tags and project onto a fixed-size tag embedding
"""
def __init__(self, in_features, num_layers, act='relu'):
self.in_features = in_features

self.act = act
def __init__(self, label_encoders, emb_dim, out_features, dropout=0):
self.dropout = dropout
super().__init__()

self.layers = nn.ModuleList(
[nn.Linear(in_features, in_features*2) for _ in range(num_layers)])
self.embs = nn.ModuleDict({
le.name: nn.Embedding(len(le), emb_dim, padding_idx=le.get_pad())
for le in label_encoders})
self.proj = nn.Linear(len(label_encoders) * emb_dim, out_features)

self.init()

def init(self):
for layer in self.layers:
initialization.init_linear(layer)
# bias gate to let information go untouched
nn.init.constant_(layer.bias[self.in_features:], 1.)
for emb in self.embs.values():
initialization.init_embeddings(emb)
initialization.init_linear(self.proj)

def forward(self, inp):
current = inp
for layer in self.layers:
inp, gate = layer(current).chunk(2, dim=-1)
inp, gate = getattr(F, self.act)(inp), F.sigmoid(gate)
current = gate * current + (1 - gate) * inp
def forward(self, **conds):
"""t (seq_len x batch) or (batch), tlen"""
embs = torch.cat(
[emb(conds[name]) for name, emb in sorted(self.embs.items())],
dim=-1)

return current
embs = F.dropout(embs, p=self.dropout, training=self.training)

return self.proj(embs)


class LinearDecoder(nn.Module):
Expand Down Expand Up @@ -131,7 +132,7 @@ def init(self):
nn.init.normal_(self.end_transition)

def forward(self, enc_outs):
"get logits of the input features"
"""get logits of the input features"""
# (seq_len x batch x vocab)
if self.highway is not None:
enc_outs = self.highway(enc_outs)
Expand Down Expand Up @@ -211,7 +212,7 @@ def loss(self, logits, targets, lengths):

def predict(self, enc_outs, lengths):
# (seq_len x batch x vocab)
logits = self.projection(enc_outs)
logits = self(enc_outs)
seq_len, _, vocab = logits.size()
start_tag, end_tag = vocab, vocab + 1

Expand Down
26 changes: 1 addition & 25 deletions pie/models/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,7 @@
from pie import initialization

from .lstm import CustomBiLSTM


class Highway(torch.nn.Module):
def __init__(self, input_dim, num_layers=1, activation=torch.nn.functional.relu):
super(Highway, self).__init__()

self.layers = torch.nn.ModuleList(
[torch.nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)])
self.activation = activation

for layer in self.layers:
layer.bias[input_dim:].data.fill_(1)

def forward(self, inputs):
current_input = inputs

for layer in self.layers:
projected_input = layer(current_input)
linear_part = current_input
nonlinear_part, gate = projected_input.chunk(2, dim=-1)
nonlinear_part = self.activation(nonlinear_part)
gate = torch.sigmoid(gate)
current_input = gate * linear_part + (1 - gate) * nonlinear_part

return current_input
from .highway import Highway


class CNNEmbedding(nn.Module):
Expand Down
39 changes: 39 additions & 0 deletions pie/models/highway.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import torch
import torch.nn as nn
import torch.nn.functional as F

from pie import initialization


class Highway(nn.Module):
"""
Highway network
"""
def __init__(self, in_features, num_layers, act='relu'):
self.in_features = in_features

self.act = act
super().__init__()

self.layers = nn.ModuleList(
[nn.Linear(in_features, in_features*2) for _ in range(num_layers)])

self.init()

def init(self):
for layer in self.layers:
initialization.init_linear(layer)
# bias gate to let information go untouched
nn.init.constant_(layer.bias[self.in_features:], 1.)

def forward(self, inp):
current = inp
for layer in self.layers:
inp, gate = layer(current).chunk(2, dim=-1)
inp, gate = getattr(F, self.act)(inp), F.sigmoid(gate)
current = gate * current + (1 - gate) * inp

return current


2 changes: 1 addition & 1 deletion pie/scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def run(model_path, test_path, train_path,
model = BaseModel.load(model_path).to(device)
if model_info:
print(model)

if hasattr(model, '_settings'): # new models should all have _settings
settings = model._settings
elif settings:
Expand All @@ -27,6 +26,7 @@ def run(model_path, test_path, train_path,
settings.batch_size = batch_size
settings.buffer_size = buffer_size
settings.device = device
settings.shuffle = False # avoid shuffling

trainset = None
if train_path:
Expand Down
21 changes: 13 additions & 8 deletions pie/scripts/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import scipy.stats as stats

from pie import utils
from pie.settings import settings_from_file, Settings
from pie import settings


# available distributions
Expand Down Expand Up @@ -60,6 +60,8 @@ def parse_opt(obj, opt_key):
raise ValueError("Unknown distribution: ", v[opt_key])
else:
opt[param] = parse_opt(v, opt_key)
else:
opt[param] = v

return opt

Expand All @@ -85,6 +87,8 @@ def sample_from_config(opt):
output[param] = sample_from_config(dist)
elif isinstance(dist, list):
output[param] = [sample_from_config(d) for d in dist]
elif isinstance(dist, (str, float, int, bool)):
output[param] = dist # no sampling
else:
output[param] = dist.rvs()

Expand All @@ -98,23 +102,24 @@ def run(config, opt, n_iter):
print()
print("::: Starting optimization run {} :::".format(i + 1))
print()
sampled_config = sample_from_config(opt)
merged = utils.recursive_merge(dict(config), sampled_config, overwrite=True)
print(yaml.dump(dict(config)))
print(yaml.dump(merged))
train.run(Settings(merged))
sampled = sample_from_config(opt)
merged = settings.Settings(
utils.recursive_merge(dict(config), sampled, overwrite=True))
print("::: Sampled config :::")
print(yaml.dump(dict(merged)))
train.run(settings.check_settings(settings.merge_task_defaults(merged)))


if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('config_path', default='config.json')
parser.add_argument('opt_path')
parser.add_argument('opt_path', help='Path to optimization file (see opt.json)')
parser.add_argument('--n_iter', type=int, default=20)
args = parser.parse_args()

with utils.shutup():
config = settings_from_file(args.config_path)
config = settings.settings_from_file(args.config_path)

opt = read_opt(args.opt_path)

Expand Down
6 changes: 5 additions & 1 deletion pie/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def get_fname_infix(settings):


def run(settings):
# read settings if input is path
if isinstance(settings, str):
settings = settings_from_file(settings)

# seeding
now = datetime.now()
seed = now.hour * 10000 + now.minute * 100 + now.second
Expand Down Expand Up @@ -190,4 +194,4 @@ def run(settings):
parser = argparse.ArgumentParser()
parser.add_argument('config_path', nargs='?', default='config.json')
args = parser.parse_args()
run(settings_from_file(args.config_path))
run(args.config_path)
2 changes: 1 addition & 1 deletion pie/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def evaluate(self, dataset):
total_losses, total_batches = collections.defaultdict(float), 0

# get all tasks
tasks = list(self.model.label_encoder.tasks)
tasks = list(self.model.tasks)

for batch in tqdm.tqdm(dataset.batch_generator()):
total_batches += 1
Expand Down

0 comments on commit 94a45d2

Please sign in to comment.