Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow custom model when given by the user. #72

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aifeynman/RPN_to_eq.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def RPN_to_eq(expr):
elif i == "0":
stack = np.append(stack,"0")
elif i == "1":
stack = np.append(stack,"1")
stack = np.append(stack,"1")
else:
stack = np.append(stack,"x" + str(ord(i)-97))
elif i in operations_2:
Expand Down
30 changes: 9 additions & 21 deletions aifeynman/S_NN_eval.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import print_function
from typing import Any, Callable, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
Expand All @@ -12,6 +13,8 @@
from matplotlib import pyplot as plt
import time

from aifeynman.model import DefaultSimpleNet

is_cuda = torch.cuda.is_available()

bs = 2048
Expand Down Expand Up @@ -40,7 +43,7 @@ def rmse_loss(pred, targ):
return torch.sqrt(F.mse_loss(pred, targ))/denom


def NN_eval(pathdir,filename):
def NN_eval(pathdir,filename, torch_model_class: Optional[Callable[[Any], nn.Module]]=None):
try:
n_variables = np.loadtxt(pathdir+filename, dtype='str').shape[1]-1
variables = np.loadtxt(pathdir+filename, usecols=(0,))
Expand Down Expand Up @@ -76,35 +79,20 @@ def NN_eval(pathdir,filename):
else:
factors_val = factors_val
factors_val = factors_val.float()
product_val = torch.from_numpy(f_dependent[int(5*len(variables)/6):int(len(variables))])
product_val = torch.from_numpy(f_dependent[int(5*len(variables)/6):int(len(variables))])
if is_cuda:
product_val = product_val.cuda()
else:
product_val = product_val
product_val = product_val.float()

class SimpleNet(nn.Module):
def __init__(self, ni):
super().__init__()
self.linear1 = nn.Linear(ni, 128)
self.linear2 = nn.Linear(128, 128)
self.linear3 = nn.Linear(128, 64)
self.linear4 = nn.Linear(64,64)
self.linear5 = nn.Linear(64,1)

def forward(self, x):
x = F.tanh(self.linear1(x))
x = F.tanh(self.linear2(x))
x = F.tanh(self.linear3(x))
x = F.tanh(self.linear4(x))
x = self.linear5(x)
return x
Net = torch_model_class or DefaultSimpleNet

if is_cuda:
model = SimpleNet(n_variables).cuda()
model = Net(n_variables).cuda()
else:
model = SimpleNet(n_variables)
model = Net(n_variables)

model.load_state_dict(torch.load("results/NN_trained_models/models/"+filename+".h5"))
model.eval()
return(rmse_loss(model(factors_val),product_val),model)
Expand Down
41 changes: 12 additions & 29 deletions aifeynman/S_NN_train.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
from __future__ import print_function
from typing import Any, Callable, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
import torch
from torch.utils import data
import pickle
from matplotlib import pyplot as plt
import torch.utils.data as utils
import time
import os

from aifeynman.model import DefaultSimpleNet

bs = 2048
wd = 1e-2

Expand Down Expand Up @@ -40,7 +39,7 @@ def rmse_loss(pred, targ):
denom = torch.sqrt(denom.sum()/len(denom))
return torch.sqrt(F.mse_loss(pred, targ))/denom

def NN_train(pathdir, filename, epochs=1000, lrs=1e-2, N_red_lr=4, pretrained_path=""):
def NN_train(pathdir, filename, epochs=1000, lrs=1e-2, N_red_lr=4, pretrained_path="", torch_model_class: Optional[Callable[[Any], nn.Module]]=None):
try:
os.mkdir("results/NN_trained_models/")
except:
Expand All @@ -54,9 +53,8 @@ def NN_train(pathdir, filename, epochs=1000, lrs=1e-2, N_red_lr=4, pretrained_pa
n_variables = np.loadtxt(pathdir+"%s" %filename, dtype='str').shape[1]-1
variables = np.loadtxt(pathdir+"%s" %filename, usecols=(0,))

# epochs = 200*n_variables
epochs = 200*n_variables
if len(variables)<5000:
print('WARNING: tripling epochs since len(variables)<5000...')
epochs = epochs*3

if n_variables==0 or n_variables==1:
Expand Down Expand Up @@ -84,30 +82,15 @@ def NN_train(pathdir, filename, epochs=1000, lrs=1e-2, N_red_lr=4, pretrained_pa
product = product
product = product.float()

class SimpleNet(nn.Module):
def __init__(self, ni):
super().__init__()
self.linear1 = nn.Linear(ni, 128)
self.linear2 = nn.Linear(128, 128)
self.linear3 = nn.Linear(128, 64)
self.linear4 = nn.Linear(64,64)
self.linear5 = nn.Linear(64,1)

def forward(self, x):
x = F.tanh(self.linear1(x))
x = F.tanh(self.linear2(x))
x = F.tanh(self.linear3(x))
x = F.tanh(self.linear4(x))
x = self.linear5(x)
return x
Net = torch_model_class or DefaultSimpleNet

my_dataset = utils.TensorDataset(factors,product) # create your datset
my_dataloader = utils.DataLoader(my_dataset, batch_size=bs, shuffle=True) # create your dataloader

if is_cuda:
model_feynman = SimpleNet(n_variables).cuda()
model_feynman = Net(n_variables).cuda()
else:
model_feynman = SimpleNet(n_variables)
model_feynman = Net(n_variables)

if pretrained_path!="":
model_feynman.load_state_dict(torch.load(pretrained_path))
Expand All @@ -120,18 +103,18 @@ def forward(self, x):
model_feynman.train()
for i, data in enumerate(my_dataloader):
optimizer_feynman.zero_grad()

if is_cuda:
fct = data[0].float().cuda()
prd = data[1].float().cuda()
else:
fct = data[0].float()
prd = data[1].float()

loss = rmse_loss(model_feynman(fct),prd)
loss.backward()
optimizer_feynman.step()

'''
# Early stopping
if epoch%20==0 and epoch>0:
Expand All @@ -145,7 +128,7 @@ def forward(self, x):
torch.save(model_feynman.state_dict(), "results/NN_trained_models/models/" + filename + ".h5")
check_es_loss = loss
'''
torch.save(model_feynman.state_dict(), "results/NN_trained_models/models/" + filename + ".h5")
torch.save(model_feynman.state_dict(), "results/NN_trained_models/models/" + filename + ".h5")
lrs = lrs/10

return model_feynman
Expand Down
2 changes: 1 addition & 1 deletion aifeynman/S_compositionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def check_compositionality(pathdir,filename,model,express,mu,sigma,nu=10):
else:
i = i + 1


if i==len(data[0:1000]) and np.mean(list_z)<mu:
return (1,express,np.mean(list_z),np.std(list_z))
else:
Expand Down
10 changes: 5 additions & 5 deletions aifeynman/S_gen_sym.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def check_gen_sym(pathdir,filename,model,gen_sym_idx,express,mu,sigma,nu=10):
data = np.loadtxt(pathdir+filename)[:,gen_sym_idx]
# Turn the equation from RPN to normal mathematical expression
eq = RPN_to_eq(express)

# Get the variables appearing in the equation
possible_vars = ["x%s" %i for i in np.arange(0,30,1)]
variables = []
Expand Down Expand Up @@ -72,12 +72,12 @@ def check_gen_sym(pathdir,filename,model,gen_sym_idx,express,mu,sigma,nu=10):
error = error.detach().numpy()
list_z = np.append(list_z,np.log2(1+abs(error)*2**30))
z = np.sqrt(len(list_z))*(np.mean(list_z)-mu)/sigma

i = i + 1
else:
i = i + 1


if i==len(data[0:1000]) and np.mean(list_z)<mu:
return (1,express,np.mean(list_z),np.std(list_z))
else:
Expand Down Expand Up @@ -134,12 +134,12 @@ def add_gen_sym_on_pareto(PA1,PA, gen_sym_idx, express):
exp1 = PA1[i][2]
temp_list = copy.deepcopy(gen_sym_idx)
bf_eq = math_eq

while(len(temp_list)>1):
for j in range(len(possible_vars)-len(temp_list),temp_list[-1]-len(temp_list)+1,-1):
exp1 = exp1.replace(possible_vars[j],possible_vars[j+1])
temp_list = np.delete(temp_list,-1)

# replace variables in bf_eq
arr_idx = np.flip(np.arange(0,len(gen_sym_idx),1), axis=0)
actual_idx = np.flip(gen_sym_idx, axis=0)
Expand Down
2 changes: 1 addition & 1 deletion aifeynman/S_get_number_DL.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Calculates the complexity of a number to be used for the Pareto frontier

import numpy as np
import numpy as np

def get_number_DL(n):
epsilon = 1e-10
Expand Down
10 changes: 5 additions & 5 deletions aifeynman/S_gradient_decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def powerset_atleast_2(iterable, max_subset_size):
return r

def evaluate_derivatives(model, s, pts):

pts = pts.clone().detach()
try:
device = 'cuda' if model.is_cuda else 'cpu'
Expand Down Expand Up @@ -59,7 +59,7 @@ def evaluate_derivatives_andrew(model, s, pts):
pts = pts.cuda()
model = model.cuda()
grad_weights = grad_weights.cuda()

pts.requires_grad_(True)
outs = model(pts)
grad = torch.autograd.grad(outs, pts, grad_outputs=grad_weights, create_graph=True)[0]
Expand All @@ -74,7 +74,7 @@ def forward(self, X):

def draw_samples(X, y, model, s, NUM_SAMPLES, point = None):
'''
Draw samples by sampling each dimension independently,
Draw samples by sampling each dimension independently,
keeping the positions at s fixed to given point if exists,
sampled point if not.
'''
Expand Down Expand Up @@ -205,7 +205,7 @@ def filter_decompositions_relative_scoring(X, y, model, max_subset_size=None, vi
bench_scores.append(score)
snr = signal_to_noise(hypot_scores, bench_scores)
# penalizes larger decompositions
snr -= np.log10(2)*len(s)
snr -= np.log10(2)*len(s)
results.append((snr, s))
print((snr, s))
if visualize:
Expand Down Expand Up @@ -254,7 +254,7 @@ def identify_decompositions(pathdir,filename, model, max_subset_size=2, visualiz
data = np.loadtxt(pathdir+filename)
X = torch.Tensor(data[:, :-1])
y = torch.Tensor(data[:, [-1]])
# Return best decomposition
# Return best decomposition
all_scores = filter_decompositions_relative_scoring(X, y, model, visualize=visualize)
assert(all_scores)
best_decomposition = all_scores[0][1]
Expand Down
8 changes: 4 additions & 4 deletions aifeynman/S_polyfit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def as_tall(x):


def multipolyfit(xs, y, deg):

y = asarray(y).squeeze()
rows = y.shape[0]
xs = asarray(xs)
Expand All @@ -28,17 +28,17 @@ def multipolyfit(xs, y, deg):
xs = np.reshape(xs,(len(xs),1))

xs = hstack((ones((xs.shape[0], 1), dtype=xs.dtype) , xs))

generators = [basis_vector(num_covariates+1, i) for i in range(num_covariates+1)]

# All combinations of degrees
powers = map(sum, itertools.combinations_with_replacement(generators, deg))

# Raise data to specified degree pattern, stack in order
A = hstack(asarray([as_tall((xs**p).prod(1)) for p in powers]))
params = lsqr(A, y)[0] # get the best params of the fit
rms = lsqr(A, y)[4] # get the rms params of the fit

return (params, rms)


Expand Down
2 changes: 1 addition & 1 deletion aifeynman/S_remove_input_neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

def remove_input_neuron(net,n_inp,idx_neuron,ct_median,save_filename):
removed_weights = net.linear1.weight[:,idx_neuron]
# Remove the weights associated with the removed input neuron
# Remove the weights associated with the removed input neuron
t = torch.transpose(net.linear1.weight,0,1)
preserved_ids = torch.LongTensor(np.array(list(set(range(n_inp)) - set([idx_neuron]))))
t = nn.Parameter(t[preserved_ids, :])
Expand Down
Loading