Skip to content

Commit

Permalink
Merge pull request #177 from timokau/pareto-fixup
Browse files Browse the repository at this point in the history
Pareto: Genereate unique centroids, documentation, test
  • Loading branch information
timokau authored Nov 20, 2020
2 parents e995124 + 02d6578 commit f266087
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 27 deletions.
102 changes: 75 additions & 27 deletions csrank/dataset_reader/choicefunctions/choice_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def make_globular_pareto_choices(
n_objects=10,
seed=42,
cluster_spread=1.0,
cluster_size=10,
**kwargs,
):
def pareto_front(X, signs=None):
Expand All @@ -41,45 +40,94 @@ def pareto_front(X, signs=None):
)
return pareto

def sample_unit_ball(n_inst=10000, n_features=2, rng=None, radius=1.0):
rng = check_random_state(rng)
X = rng.randn(n_inst, n_features)
u = rng.uniform(size=n_inst)[:, None]
X /= np.linalg.norm(X, axis=1, ord=2)[:, None]
X *= radius * u
return X
def sample_from_unit_ball(n_points, dimension, radius, random_state):
"""Sample points uniformly from a ball.
def make_randn_pareto_choices(
n_instances=10000, n_features=2, n_objects=10, data_seed=None, center=0.0
The ball has radius `radius` and is centered at the origin.
Parameters
----------
n_points : int
The number of points to sample.
dimension : int
The dimension of the space.
radius : float
The radius of the ball.
random_state: np.random.RandomState
A numpy random state.
Returns
-------
numpy array of shape (n_points, dimension)
A list of points sampled from the ball.
"""
# Sample a random direction for each point
directions = random_state.randn(n_points, dimension)
# Normalize each direction vector to have length 1 (euclidean
# norm).
directions /= np.linalg.norm(directions, axis=1, ord=2)[:, None]

# Sample a length (as a fraction of the radius) uniformly for each
# point.
u = random_state.uniform(size=n_points)[:, None]
lengths = u * radius

return directions * lengths

def sample_pareto_from_isometric_normal(
n_points, dimension, center, random_state
):
"""Generate random objects from a d-dimensional isometric normal distribution.
"""Generate a Pareto problem from random objects.
Objects are drawn from a d-dimensional isometric normal
distribution.
This should be the easiest possible Pareto-problem, since the model can learn
a latent-utility which scores how likely a point is on the front (independent
of the other points)."""
rand = check_random_state(data_seed)
X = rand.randn(n_instances, n_objects, n_features)
Y = np.empty((n_instances, n_objects), dtype=bool)
for i in range(n_instances):
Y[i] = pareto_front(X[i])
of the other points).
Parameters
----------
n_points : int
The number of points to sample.
dimension : int
The dimension of the space.
center : scalar or numpy array
An offset that will be added to every point.
random_state: np.random.RandomState
A numpy random state.
Returns
-------
X: numpy array of shape (n_points, dimension)
A list of points sampled from the d-dimensional isometric
normal distribution.
Y. numpy array of shape n_points
A binary flag array indicating whether or not the corresponding
point is part of the Pareto front.
"""
X = random_state.randn(n_points, dimension)
Y = pareto_front(X)
return X + center, Y

rand = check_random_state(seed)
X = np.empty((n_instances, n_objects, n_features))
Y = np.empty((n_instances, n_objects), dtype=int)
for i in range(int(n_instances / cluster_size)):
center = sample_unit_ball(
n_inst=1, n_features=n_features, rng=rand, radius=cluster_spread
for i in range(n_instances):
center = sample_from_unit_ball(
n_points=1,
dimension=n_features,
radius=cluster_spread,
random_state=rand,
)
x, y = make_randn_pareto_choices(
n_instances=cluster_size,
n_features=n_features,
n_objects=n_objects,
data_seed=rand,
x, y = sample_pareto_from_isometric_normal(
n_points=n_objects,
dimension=n_features,
center=center,
random_state=rand,
)
X[i * cluster_size : (i + 1) * cluster_size] = x
Y[i * cluster_size : (i + 1) * cluster_size] = y
X[i] = x
Y[i] = y
return X, Y

def make_latent_linear_choices(
Expand Down
25 changes: 25 additions & 0 deletions csrank/tests/test_pareto_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np

from csrank import ChoiceDatasetGenerator


def test_pareto_problem_generation():
"""A simple sanity check for Pareto problem generation."""
gen = ChoiceDatasetGenerator(
dataset_type="pareto",
random_state=42,
n_train_instances=11,
n_test_instances=1,
n_objects=3,
n_features=2,
)
X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split()
assert X_train.shape == (11, 3, 2)
assert Y_train.shape == (11, 3)
assert X_test.shape == (1, 3, 2)

def is_binary_array(a):
return np.logical_or(a == 0, a == 1).all()

assert is_binary_array(Y_train)
assert is_binary_array(Y_test)

0 comments on commit f266087

Please sign in to comment.