From 64a019166188256a64eac3bdb5c0cc32a76b97e8 Mon Sep 17 00:00:00 2001 From: Viacheslav Borovitskiy Date: Wed, 11 Sep 2024 09:15:58 +0100 Subject: [PATCH] Compute kernel eigenvalues in log-space to scale to higher dimensions of the HypercubeGraph space. Not all tests are currrently passing. --- .../feature_maps/deterministic.py | 33 +++-- .../feature_maps/random_phase.py | 56 +++++--- geometric_kernels/kernels/karhunen_loeve.py | 124 +++++++++++++++--- geometric_kernels/kernels/matern_kernel.py | 43 ++++-- geometric_kernels/kernels/product.py | 6 +- geometric_kernels/spaces/circle.py | 8 +- geometric_kernels/spaces/eigenfunctions.py | 110 ++++++++++++++-- geometric_kernels/spaces/hypercube_graph.py | 88 ++++++++++--- geometric_kernels/spaces/hypersphere.py | 8 +- geometric_kernels/spaces/lie_groups.py | 8 +- .../test_matern_karhunenloeve_kernel.py | 2 +- tests/spaces/test_hypercube_graph.py | 12 +- tests/spaces/test_lie_groups.py | 3 +- tests/test_dtypes.py | 8 +- 14 files changed, 399 insertions(+), 110 deletions(-) diff --git a/geometric_kernels/feature_maps/deterministic.py b/geometric_kernels/feature_maps/deterministic.py index 873a57d0..99945179 100644 --- a/geometric_kernels/feature_maps/deterministic.py +++ b/geometric_kernels/feature_maps/deterministic.py @@ -22,6 +22,13 @@ class DeterministicFeatureMapCompact(FeatureMap): A :class:`~.spaces.DiscreteSpectrumSpace` space. :param num_levels: Number of levels in the kernel approximation. + + .. note:: + This feature map does not support a "`log_spectrum` mode", unlike the + :class:`~.feature_maps.RandomPhaseFeatureMapCompact` feature map or + the :class:`~.kernels.MaternKarhunenLoeveKernel` kernel. + The reason is simple: if the number of eigenfunctions per level + becomes large, this feature map is infeasible anyway. """ def __init__(self, space: DiscreteSpectrumSpace, num_levels: int): @@ -36,8 +43,9 @@ def __call__( self, X: B.Numeric, params: Dict[str, B.Numeric], + *, normalize: Optional[bool] = None, - **kwargs, + key: Optional[B.RandomState] = None, ) -> Tuple[None, B.Numeric]: """ :param X: @@ -48,15 +56,24 @@ def __call__( Normalize to have unit average variance (if omitted or None, follows the standard behavior of :class:`~.kernels.MaternKarhunenLoeveKernel`). - :param ``**kwargs``: - Unused. + :param key: + Random state, either `np.random.RandomState`, + `tf.random.Generator`, `torch.Generator` or `jax.tensor` (which + represents a random state). + + Keyword-only. Only accepted for compatibility with randomized + feature maps. If provided, the key is returned as the first + element of the tuple. :return: - `Tuple(None, features)` where `features` is an [N, O] array, N - is the number of inputs and O is the dimension of the feature map. + `Tuple(key, features)` where `features` is an [N, O] array, N + is the number of inputs and O is the dimension of the feature map, + while `key` is the `key` keyword argument, unchanged. .. note:: - The first element of the returned tuple is the simple None and + The first element of the returned tuple is the value of the `key` + keyword argument, unchanged. When this argument is omitted (which is + expected to be the typical use case), it is a simple None and should be ignored. It is only there to support the abstract interface: for some other subclasses of :class:`FeatureMap`, this first element may be an updated random key. @@ -72,9 +89,9 @@ def __call__( spectrum = spectrum / normalizer weights = B.transpose(B.power(spectrum, 0.5)) # [1, M] - eigenfunctions = self.kernel.eigenfunctions(X, **params) # [N, M] + eigenfunctions = self.kernel.eigenfunctions(X) # [N, M] features = B.cast(B.dtype(params["lengthscale"]), eigenfunctions) * B.cast( B.dtype(params["lengthscale"]), weights ) # [N, M] - return None, features + return key, features diff --git a/geometric_kernels/feature_maps/random_phase.py b/geometric_kernels/feature_maps/random_phase.py index 79a55111..9b0b4788 100644 --- a/geometric_kernels/feature_maps/random_phase.py +++ b/geometric_kernels/feature_maps/random_phase.py @@ -12,6 +12,8 @@ :mod:`geometric_kernels.feature_maps.rejection_sampling`. """ +from math import log + import lab as B from beartype.typing import Dict, Optional, Tuple @@ -46,7 +48,11 @@ def __init__( self.space = space self.num_levels = num_levels self.num_random_phases = num_random_phases - self.kernel = MaternKarhunenLoeveKernel(space, num_levels) + self.kernel = MaternKarhunenLoeveKernel( + space, + num_levels, + normalize=False, # RandomPhaseFeatureMapCompact uses its own normalization. + ) def __call__( self, @@ -54,8 +60,7 @@ def __call__( params: Dict[str, B.Numeric], *, key: B.RandomState, - normalize: Optional[bool] = None, - **kwargs, + normalize: bool = True, ) -> Tuple[B.RandomState, B.Numeric]: """ :param X: @@ -80,11 +85,7 @@ def __call__( does this for you. :param normalize: - Normalize to have unit average variance (if omitted - or None, follows the standard behavior of - :class:`kernels.MaternKarhunenLoeveKernel`). - :param ``**kwargs``: - Unused. + Normalize to have constant unit variance. Defaults to `True`. :return: `Tuple(key, features)` where `features` is an [N, O] array, N @@ -93,12 +94,11 @@ def __call__( state (generator) for any other backends. """ key, random_phases = self.space.random(key, self.num_random_phases) # [O, D] - eigenvalues = self.kernel.eigenvalues_laplacian - spectrum = self.kernel._spectrum( - eigenvalues, - nu=params["nu"], - lengthscale=params["lengthscale"], + log_spectrum = self.kernel.eigenvalues( + params, + normalize=True, # Will be overridden by the normalization below but helps with numerical stability. + log_spectrum_on=True, ) if is_complex(X): @@ -106,12 +106,32 @@ def __call__( else: dtype = B.dtype(params["lengthscale"]) - weights = B.power(spectrum, 0.5) # [L, 1] + log_weights = 0.5 * log_spectrum # [L, 1] + + eigenfunctions = self.kernel.eigenfunctions + if hasattr(eigenfunctions, "log_num_eigenfunctions_per_level"): + log_num_eigenfunctions_per_level = ( + eigenfunctions.log_num_eigenfunctions_per_level + ) # [L] + else: + log_num_eigenfunctions_per_level = list( + map(log, eigenfunctions.num_eigenfunctions_per_level) + ) # [L] + + log_num_eigenfunctions_per_level = B.cast( + B.dtype(params["lengthscale"]), + from_numpy(params["lengthscale"], log_num_eigenfunctions_per_level)[ + :, None + ], + ) # [L, 1] + + log_weights = log_weights + log_num_eigenfunctions_per_level + weights = B.exp(log_weights) # [L, 1] random_phases_b = B.cast(dtype, from_numpy(X, random_phases)) - phi_product = self.kernel.eigenfunctions.phi_product( - X, random_phases_b, **params + phi_product = self.kernel.eigenfunctions.normalized_phi_product( # type: ignore[attr-defined] + X, random_phases_b ) # [N, O, L] embedding = B.cast(dtype, phi_product) # [N, O, L] @@ -121,7 +141,6 @@ def __call__( if is_complex(features): features = B.concat(B.real(features), B.imag(features), axis=1) - normalize = normalize or (normalize is None and self.kernel.normalize) if normalize: normalizer = B.sqrt(B.sum(features**2, axis=-1, squeeze=False)) features = features / normalizer @@ -165,7 +184,6 @@ def __call__( *, key: B.RandomState, normalize: Optional[bool] = True, - **kwargs, ) -> Tuple[B.RandomState, B.Numeric]: """ :param X: @@ -189,8 +207,6 @@ def __call__( :param normalize: Normalize to have unit average variance (`True` by default). - :param ``**kwargs``: - Unused. :return: `Tuple(key, features)` where `features` is an [N, O] array, N is the number of inputs and O is the dimension of the feature map; diff --git a/geometric_kernels/kernels/karhunen_loeve.py b/geometric_kernels/kernels/karhunen_loeve.py index 5157f30a..0cc2b4b9 100644 --- a/geometric_kernels/kernels/karhunen_loeve.py +++ b/geometric_kernels/kernels/karhunen_loeve.py @@ -3,6 +3,9 @@ kernel for discrete spectrum spaces, subclasses of :class:`DiscreteSpectrumSpace`. """ +import logging +from math import log + import lab as B import numpy as np from beartype.typing import Dict, Optional @@ -50,20 +53,25 @@ class MaternKarhunenLoeveKernel(BaseGeometricKernel): :param num_levels: Number of levels to include in the summation. :param normalize: - Whether to normalize kernel to have unit average variance. + Whether to normalize kernel to have unit average variance. Keyword-only. + :param log_spectrum_on: + If True, the spectrum is computed in the log domain. Keyword-only. """ def __init__( self, space: DiscreteSpectrumSpace, num_levels: int, + *, normalize: bool = True, + log_spectrum_on=False, ): super().__init__(space) self.num_levels = num_levels # in code referred to as `L`. self._eigenvalues_laplacian = self.space.get_eigenvalues(self.num_levels) self._eigenfunctions = self.space.get_eigenfunctions(self.num_levels) self.normalize = normalize + self.log_spectrum_on = log_spectrum_on @property def space(self) -> DiscreteSpectrumSpace: @@ -94,7 +102,12 @@ def init_params(self) -> Dict[str, B.NPNumeric]: return params def _spectrum( - self, s: B.Numeric, nu: B.Numeric, lengthscale: B.Numeric + self, + s: B.Numeric, + nu: B.Numeric, + lengthscale: B.Numeric, + *, + log_spectrum_on: Optional[bool] = None, ) -> B.Numeric: """ The spectrum of the Matérn kernel with hyperparameters `nu` and @@ -106,25 +119,40 @@ def _spectrum( The smoothness parameter of the kernel. :param lengthscale: The length scale parameter of the kernel. + :param log_spectrum_on: + If True, the spectrum is computed in the log domain. Keyword-only. + If not provided or None, the value of `self.log_spectrum_on` is used. :return: - The spectrum of the Matérn kernel. + The spectrum of the Matérn kernel (or the log thereof, if + `log_spectrum_on` is set to True). """ assert lengthscale.shape == (1,) assert nu.shape == (1,) + if log_spectrum_on is None: + log_spectrum_on = self.log_spectrum_on + # Note: 1.0 in safe_nu can be replaced by any finite positive value safe_nu = B.where(nu == np.inf, B.cast(B.dtype(lengthscale), np.r_[1.0]), nu) # for nu == np.inf - spectral_values_nu_infinite = B.exp( - -(lengthscale**2) / 2.0 * B.cast(B.dtype(lengthscale), s) - ) + if log_spectrum_on: + spectral_values_nu_infinite = ( + -(lengthscale**2) / 2.0 * B.cast(B.dtype(lengthscale), s) + ) + else: + spectral_values_nu_infinite = B.exp( + -(lengthscale**2) / 2.0 * B.cast(B.dtype(lengthscale), s) + ) # for nu < np.inf power = -safe_nu - self.space.dimension / 2.0 base = 2.0 * safe_nu / lengthscale**2 + B.cast(B.dtype(safe_nu), s) - spectral_values_nu_finite = base**power + if log_spectrum_on: + spectral_values_nu_finite = B.log(base) * power + else: + spectral_values_nu_finite = base**power return B.where( nu == np.inf, spectral_values_nu_infinite, spectral_values_nu_finite @@ -145,7 +173,11 @@ def eigenvalues_laplacian(self) -> B.Numeric: return self._eigenvalues_laplacian def eigenvalues( - self, params: Dict[str, B.Numeric], normalize: Optional[bool] = None + self, + params: Dict[str, B.Numeric], + normalize: Optional[bool] = None, + *, + log_spectrum_on: Optional[bool] = None, ) -> B.Numeric: """ Eigenvalues of the kernel. @@ -159,6 +191,9 @@ def eigenvalues( If None, uses `self.normalize` to decide. Defaults to None. + :param log_spectrum_on: + If True, the spectrum is computed in the log domain. Keyword-only. + If not provided or None, the value of `self.log_spectrum_on` is used. :return: An [L, 1]-shaped array. @@ -168,24 +203,51 @@ def eigenvalues( assert "nu" in params assert params["nu"].shape == (1,) + if log_spectrum_on is None: + log_spectrum_on = self.log_spectrum_on + spectral_values = self._spectrum( self.eigenvalues_laplacian, nu=params["nu"], lengthscale=params["lengthscale"], + log_spectrum_on=log_spectrum_on, ) + normalize = normalize or (normalize is None and self.normalize) + if normalize: - normalizer = B.sum( - spectral_values - * B.cast( - B.dtype(spectral_values), - from_numpy( - spectral_values, - self.eigenfunctions.num_eigenfunctions_per_level, - )[:, None], + + if log_spectrum_on: + if hasattr(self.eigenfunctions, "log_num_eigenfunctions_per_level"): + num_eigenfunctions_per_level = ( + self.eigenfunctions.log_num_eigenfunctions_per_level + ) + else: + logging.getLogger(__name__).warning( + "Using log_spectrum_on on a space without" + " log_num_eigenfunctions_per_level." + ) + + num_eigenfunctions_per_level = list( + map(log, self.eigenfunctions.num_eigenfunctions_per_level) + ) + else: + num_eigenfunctions_per_level = ( + self.eigenfunctions.num_eigenfunctions_per_level ) + + num_eigenfunctions_per_level = B.cast( + B.dtype(spectral_values), + from_numpy(spectral_values, num_eigenfunctions_per_level)[:, None], ) - return spectral_values / normalizer + + if log_spectrum_on: + normalizer = B.logsumexp(spectral_values + num_eigenfunctions_per_level) + return spectral_values - normalizer + else: + normalizer = B.sum(spectral_values * num_eigenfunctions_per_level) + return spectral_values / normalizer + return spectral_values def K( @@ -196,7 +258,19 @@ def K( assert "nu" in params assert params["nu"].shape == (1,) - weights = B.cast(B.dtype(params["nu"]), self.eigenvalues(params)) # [L, 1] + if "log_spectrum_on" in kwargs: + log_spectrum_on = kwargs["log_spectrum_on"] + kwargs["log_weights_on"] = log_spectrum_on + kwargs.pop("log_spectrum_on") + else: + log_spectrum_on = self.log_spectrum_on + if log_spectrum_on: + kwargs["log_weights_on"] = True + + weights = B.cast( + B.dtype(params["nu"]), + self.eigenvalues(params, log_spectrum_on=log_spectrum_on), + ) # [L, 1] Phi = self.eigenfunctions K = Phi.weighted_outerproduct(weights, X, X2, **kwargs) # [N, N2] if is_complex(K): @@ -210,7 +284,19 @@ def K_diag(self, params, X: B.Numeric, **kwargs) -> B.Numeric: assert "nu" in params assert params["nu"].shape == (1,) - weights = B.cast(B.dtype(params["nu"]), self.eigenvalues(params)) # [L, 1] + if "log_spectrum_on" in kwargs: + log_spectrum_on = kwargs["log_spectrum_on"] + kwargs["log_weights_on"] = log_spectrum_on + kwargs.pop("log_spectrum_on") + else: + log_spectrum_on = self.log_spectrum_on + if log_spectrum_on: + kwargs["log_weights_on"] = True + + weights = B.cast( + B.dtype(params["nu"]), + self.eigenvalues(params, log_spectrum_on=log_spectrum_on), + ) # [L, 1] Phi = self.eigenfunctions K_diag = Phi.weighted_outerproduct_diag(weights, X, **kwargs) # [N,] if is_complex(K_diag): diff --git a/geometric_kernels/kernels/matern_kernel.py b/geometric_kernels/kernels/matern_kernel.py index d0ef1cb0..bc295449 100644 --- a/geometric_kernels/kernels/matern_kernel.py +++ b/geometric_kernels/kernels/matern_kernel.py @@ -7,6 +7,8 @@ Unless you know exactly what you are doing, use :class:`MaternGeometricKernel`. """ +import logging + from plum import dispatch, overload from geometric_kernels.feature_maps import ( @@ -72,16 +74,7 @@ def default_feature_map( @overload def feature_map_from_kernel(kernel: MaternKarhunenLoeveKernel): - if isinstance(kernel.space, CompactMatrixLieGroup): - # Because `CompactMatrixLieGroup` does not currently support explicit - # eigenfunction computation (they only support addition theorem). - return RandomPhaseFeatureMapCompact( - kernel.space, - kernel.num_levels, - MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES, - ) - else: - return DeterministicFeatureMapCompact(kernel.space, kernel.num_levels) + return feature_map_from_space(kernel.space, kernel.num_levels) @overload @@ -132,13 +125,29 @@ def feature_map_from_space(space: DiscreteSpectrumSpace, num: int): space, num, MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES ) elif isinstance(space, Hypersphere): - num_computed_levels = space.num_computed_levels + num_computed_levels = space.get_eigenfunctions(num).num_computed_levels # type: ignore[attr-defined] + # Individual spherical harmonics are only computed for lower dimensions. + # For higher dimensions, we use random phase features. if num_computed_levels > 0: return DeterministicFeatureMapCompact(space, min(num, num_computed_levels)) else: return RandomPhaseFeatureMapCompact( - space, num, MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES + space, num, int(MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES / num) ) + elif isinstance(space, HypercubeGraph) and space.dim > 10: + # The number of eigenfunctions grows exponentially with the dimension, + # thus we only use the eigenfunction-based feature map for low + # dimensions, switching to random phase features for higher dimensions. + logging.getLogger(__name__).warning( + "Using RandomPhaseFeatureMapCompact. For HypercubeGraph, this" + " approximate feature map will probably be of poor quality." + " However, the eigenfunction-based feature map is going to be" + " infeasible because the number of eigenfunctions at level l" + " grows too quickly with dim (as dim^l). Any port in a storm." + ) + return RandomPhaseFeatureMapCompact( + space, num, int(MaternGeometricKernel._DEFAULT_NUM_RANDOM_PHASES / num) + ) else: return DeterministicFeatureMapCompact(space, num) @@ -201,7 +210,9 @@ def default_num(space: DiscreteSpectrumSpace) -> int: MaternGeometricKernel._DEFAULT_NUM_EIGENFUNCTIONS, space.num_vertices ) elif isinstance(space, HypercubeGraph): - return min(MaternGeometricKernel._DEFAULT_NUM_LEVELS, space.dim + 1) + return min( + MaternGeometricKernel._DEFAULT_NUM_LEVELS_HYPERCUBE_GRAPH, space.dim + 1 + ) else: return MaternGeometricKernel._DEFAULT_NUM_LEVELS @@ -256,6 +267,7 @@ class MaternGeometricKernel: _DEFAULT_NUM_EIGENFUNCTIONS = 1000 _DEFAULT_NUM_LEVELS = 25 + _DEFAULT_NUM_LEVELS_HYPERCUBE_GRAPH = 15 _DEFAULT_NUM_LEVELS_LIE_GROUP = 20 _DEFAULT_NUM_RANDOM_PHASES = 3000 @@ -324,7 +336,10 @@ def __new__( kernel: BaseGeometricKernel if isinstance(space, DiscreteSpectrumSpace): num = num or default_num(space) - kernel = MaternKarhunenLoeveKernel(space, num, normalize=normalize) + log_spectrum_on = isinstance(space, HypercubeGraph) + kernel = MaternKarhunenLoeveKernel( + space, num, normalize=normalize, log_spectrum_on=log_spectrum_on + ) if return_feature_map: feature_map = default_feature_map(kernel=kernel) diff --git a/geometric_kernels/kernels/product.py b/geometric_kernels/kernels/product.py index 5f3a576d..7df3a210 100644 --- a/geometric_kernels/kernels/product.py +++ b/geometric_kernels/kernels/product.py @@ -121,7 +121,7 @@ def K(self, params: Dict[str, B.Numeric], X, X2=None, **kwargs) -> B.Numeric: return B.prod( B.stack( *[ - kernel.K(p, X, X2) + kernel.K(p, X, X2, **kwargs) for kernel, X, X2, p in zip(self.kernels, Xs, X2s, params_list) ], axis=-1, @@ -129,14 +129,14 @@ def K(self, params: Dict[str, B.Numeric], X, X2=None, **kwargs) -> B.Numeric: axis=-1, ) - def K_diag(self, params, X): + def K_diag(self, params, X, **kwargs) -> B.Numeric: Xs = project_product(X, self.dimension_indices, self.element_shapes) params_list = params_to_params_list(params) return B.prod( B.stack( *[ - kernel.K_diag(p, X) + kernel.K_diag(p, X, **kwargs) for kernel, X, p in zip(self.kernels, Xs, params_list) ], axis=-1, diff --git a/geometric_kernels/spaces/circle.py b/geometric_kernels/spaces/circle.py index 4edee885..e1ccb3d4 100644 --- a/geometric_kernels/spaces/circle.py +++ b/geometric_kernels/spaces/circle.py @@ -34,6 +34,8 @@ def __init__(self, num_levels: int): self._num_levels = num_levels def __call__(self, X: B.Numeric, **kwargs) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") N = B.shape(X)[0] theta = X const = 2.0**0.5 @@ -71,12 +73,12 @@ def _addition_theorem( represent the points in a given space. Defaults to None, in which case X is used for X2. - :param ``**kwargs``: - Any additional parameters. :return: An array of shape [N, N2, L]. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") theta1, theta2 = X, X2 angle_between = theta1[:, None, :] - theta2[None, :, :] # [N, N2, 1] freqs = B.range(B.dtype(X), self.num_levels) # [L] @@ -95,6 +97,8 @@ def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: :return: Array `result`, such that result[n, l] = 1 if l = 0 or 2 otherwise. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") N = X.shape[0] ones = B.ones(B.dtype(X), N, self.num_levels) # [N, L] value = ones * B.cast( diff --git a/geometric_kernels/spaces/eigenfunctions.py b/geometric_kernels/spaces/eigenfunctions.py index 8bdb9055..ddd14b8d 100644 --- a/geometric_kernels/spaces/eigenfunctions.py +++ b/geometric_kernels/spaces/eigenfunctions.py @@ -23,7 +23,12 @@ import lab as B from beartype.typing import List, Optional -from geometric_kernels.lab_extras import complex_like, is_complex, take_along_axis +from geometric_kernels.lab_extras import ( + complex_like, + from_numpy, + is_complex, + take_along_axis, +) class Eigenfunctions(abc.ABC): @@ -252,6 +257,85 @@ def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: """ raise NotImplementedError + def normalized_phi_product( + self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs + ) -> B.Numeric: + r""" + Computes the level-wise sums of outer products of eigenfunctions, + normalized by the number of eigenfunctions per level: + + .. math:: d_l^{-1} \sum_{s=1}^{d_l} f_{l s}(x_1) f_{l s}(x_2) + + for all $x_1$ in `X`, all $x_2$ in `X2`, and $0 \leq l < L$. + + **Motivation:** for some spaces (e.g. :class:`~.spaces.HypercubeGraph`), + the values $d_l$ can be extremely large, not even fitting int64. In this + case, the usual :meth:`phi_product` can give extremely large outputs, + leading to numerical instabilities. This method is provided to avoid + this issue. Downstream, it allows computing the weighted sums of form + + .. math:: w_l \sum_{s=1}^{d_l} f_{l s}(x_1) f_{l s}(x_2) + + in a numerically stable way, by first computing the renormalized + weights $w_l' = \exp(\log(w_l) + \log(d_l))$ in the log space, and + then multiplying the output of this method by $w_l'$. + + :param X: + The first of the two batches of points to evaluate the normalized + phi product at. An array of shape [N, ], where N is the + number of points and is the shape of the arrays that + represent the points in a given space. + :param X2: + The second of the two batches of points to evaluate the normalized + phi product at. An array of shape [N2, ], where N2 is the + number of points and is the shape of the arrays that + represent the points in a given space. + + Defaults to None, in which case X is used for X2. + :param ``**kwargs``: + Any additional parameters. + + :return: + An array of shape [N, N2, L]. + + .. note:: + By default, a trivial implementation is provided that simply + divides the output of :meth:`phi_product` by the number of + eigenfunctions per level. The individual subclasses should + consider implementing this method in a more efficient way. + """ + + num_eigenfunctions_per_level = B.cast( + B.dtype(X), from_numpy(X, self.num_eigenfunctions_per_level)[None, :] + ) + + return self.phi_product(X, X2, **kwargs) / num_eigenfunctions_per_level + + def normalized_phi_product_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: + r""" + Computes the diagonals of the matrices + ``normalized_phi_product(X, X, **kwargs)[:, :, l]``, $0 \leq l < L$. + + :param X: + As in :meth:`normalized_phi_product`. + :param ``**kwargs``: + As in :meth:`normalized_phi_product`. + + :return: + An array of shape [N, L]. + + .. note:: + By default, a trivial implementation is provided that simply + divides the output of :meth:`phi_product_diag` by the number of + eigenfunctions per level. The individual subclasses should + consider implementing this method in a more efficient way. + """ + num_eigenfunctions_per_level = B.cast( + B.dtype(X), from_numpy(X, self.num_eigenfunctions_per_level)[None, :] + ) + + return self.phi_product_diag(X, **kwargs) / num_eigenfunctions_per_level + class EigenfunctionsFromEigenvectors(Eigenfunctions): """ @@ -274,11 +358,13 @@ def weighted_outerproduct( X2: Optional[B.Numeric] = None, # type: ignore **kwargs, ) -> B.Numeric: - Phi_X = self.__call__(X, **kwargs) # [N, J] + if kwargs: + raise ValueError("This method does not support additional arguments.") + Phi_X = self.__call__(X) # [N, J] if X2 is None: Phi_X2 = Phi_X else: - Phi_X2 = self.__call__(X2, **kwargs) # [N2, J] + Phi_X2 = self.__call__(X2) # [N2, J] Phi_X = B.cast(B.dtype(weights), Phi_X) Phi_X2 = B.cast(B.dtype(weights), Phi_X2) @@ -289,21 +375,27 @@ def weighted_outerproduct( def weighted_outerproduct_diag( self, weights: B.Numeric, X: B.Numeric, **kwargs ) -> B.Numeric: - Phi_X = self.__call__(X, **kwargs) # [N, J] + if kwargs: + raise ValueError("This method does not support additional arguments.") + Phi_X = self.__call__(X) # [N, J] Kx = B.sum(B.transpose(weights) * Phi_X**2, axis=1) # [N,] return Kx def phi_product( self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs ) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") if X2 is None: X2 = X - Phi_X = self.__call__(X, **kwargs) # [N, J] - Phi_X2 = self.__call__(X2, **kwargs) # [N2, J] + Phi_X = self.__call__(X) # [N, J] + Phi_X2 = self.__call__(X2) # [N2, J] return B.einsum("nl,ml->nml", Phi_X, Phi_X2) # [N, N2, J] def phi_product_diag(self, X: B.Numeric, **kwargs): - Phi_X = self.__call__(X, **kwargs) # [N, J] + if kwargs: + raise ValueError("This method does not support additional arguments.") + Phi_X = self.__call__(X) # [N, J] return Phi_X**2 def __call__(self, X: B.Numeric, **kwargs) -> B.Numeric: @@ -313,13 +405,13 @@ def __call__(self, X: B.Numeric, **kwargs) -> B.Numeric: :param X: Indices, an array of shape [N, 1]. - :param ``**kwargs``: - Ignored. :return: An array of shape [N, J], whose element with index (n, j) corresponds to the X[n]-th element of the j-th eigenvector. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") indices = B.cast(B.dtype_int(X), X) Phi = take_along_axis(self.eigenvectors, indices, axis=0) return Phi diff --git a/geometric_kernels/spaces/hypercube_graph.py b/geometric_kernels/spaces/hypercube_graph.py index 0f58b7a6..4d6d974d 100644 --- a/geometric_kernels/spaces/hypercube_graph.py +++ b/geometric_kernels/spaces/hypercube_graph.py @@ -10,7 +10,7 @@ import numpy as np from beartype.typing import List, Optional -from geometric_kernels.lab_extras import dtype_double, float_like +from geometric_kernels.lab_extras import dtype_double, float_like, from_numpy from geometric_kernels.spaces.base import DiscreteSpectrumSpace from geometric_kernels.spaces.eigenfunctions import ( Eigenfunctions, @@ -51,6 +51,8 @@ def __init__(self, dim: int, num_levels: int) -> None: self._num_eigenfunctions: Optional[int] = None # To be computed when needed. def __call__(self, X: B.Bool, **kwargs) -> B.Float: + if kwargs: + raise ValueError("This method does not support additional arguments.") return B.stack( *[ walsh_function(self.dim, list(cur_combination), X) @@ -60,10 +62,11 @@ def __call__(self, X: B.Bool, **kwargs) -> B.Float: axis=1, ) - def _addition_theorem( + def normalized_phi_product( self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs ) -> B.Numeric: - + if kwargs: + raise ValueError("This method does not support additional arguments.") if X2 is None: X2 = X @@ -83,32 +86,64 @@ def _addition_theorem( kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized - values.append( - comb(self.dim, level) * cur_kravchuk_normalized[..., None] # [N, N2, 1] - ) + values.append(cur_kravchuk_normalized[..., None]) # [N, N2, 1] return B.concat(*values, axis=-1) # [N, N2, L] - def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: - """ - These are certain easy to compute constants. - """ + def _addition_theorem( + self, X: B.Numeric, X2: Optional[B.Numeric] = None, **kwargs + ) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") + + num_eigenfunctions_per_level = B.cast( + B.dtype(X), from_numpy(X, self.num_eigenfunctions_per_level)[None, :] + ) + + return self.normalized_phi_product(X, X2) * num_eigenfunctions_per_level + + def normalized_phi_product_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") + # These are certain easy to compute constants. values = [ - comb(self.dim, level) * B.ones(float_like(X), *X.shape[:-1], 1) # [N, 1] - for level in range(self.num_levels) + B.ones(float_like(X), *X.shape[:-1], 1) # [N, 1] + for _ in range(self.num_levels) ] return B.concat(*values, axis=1) # [N, L] + def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") + num_eigenfunctions_per_level = B.cast( + B.dtype(X), from_numpy(X, self.num_eigenfunctions_per_level)[None, :] + ) + + return self.normalized_phi_product_diag(X) * num_eigenfunctions_per_level + def weighted_outerproduct( self, weights: B.Numeric, X: B.Numeric, X2: Optional[B.Numeric] = None, # type: ignore + *, + log_weights_on: bool = False, # If True, weights are log weights. **kwargs, ) -> B.Numeric: + if kwargs: + raise ValueError( + "This method does not support keyword arguments beyond log_weights_on." + ) + if X2 is None: X2 = X + # Instead of multiplying weights by binomial coefficients, we sum their + # logs and then exponentiate the result for numerical stability. + # Furthermore, we save the computed Kravchuk polynomials for next iterations. + if not log_weights_on: + weights = B.log(weights) + hamming_distances = hamming_distance(X, X2) result = B.zeros(B.dtype(weights), X.shape[0], X2.shape[0]) # [N, N2] @@ -124,24 +159,33 @@ def weighted_outerproduct( kravchuk_normalized_j_minus_2 = kravchuk_normalized_j_minus_1 kravchuk_normalized_j_minus_1 = cur_kravchuk_normalized - # Instead of multiplying weights by binomial coefficients, we sum their - # logs and then exponentiate the result for numerical stability. - # Furthermore, we save the computed Kravchuk polynomials for next iterations. result += ( - B.exp(B.log(weights[level]) + log_binomial(self.dim, level)) + B.exp(weights[level] + log_binomial(self.dim, level)) * cur_kravchuk_normalized ) return result # [N, N2] def weighted_outerproduct_diag( - self, weights: B.Numeric, X: B.Numeric, **kwargs + self, + weights: B.Numeric, + X: B.Numeric, + *, + log_weights_on: bool = False, # If True, weights are log weights. + **kwargs, ) -> B.Numeric: + if kwargs: + raise ValueError( + "This method does not support keyword arguments beyond log_weights_on." + ) # Instead of multiplying weights by binomial coefficients, we sum their # logs and then exponentiate the result for numerical stability. + if not log_weights_on: + weights = B.log(weights) + result = sum( - B.exp(B.log(weights[level]) + log_binomial(self.dim, level)) + B.exp(weights[level] + log_binomial(self.dim, level)) * B.ones(float_like(X), *X.shape[:-1], 1) for level in range(self.num_levels) ) # [N, 1] @@ -162,6 +206,10 @@ def num_levels(self) -> int: def num_eigenfunctions_per_level(self) -> List[int]: return [comb(self.dim, level) for level in range(self.num_levels)] + @property + def log_num_eigenfunctions_per_level(self) -> List[int]: + return [log_binomial(self.dim, level) for level in range(self.num_levels)] + class HypercubeGraph(DiscreteSpectrumSpace): r""" @@ -234,7 +282,9 @@ def get_repeated_eigenvalues(self, num: int) -> B.Numeric: eigenfunctions = WalshFunctions(self.dim, num) eigenvalues = chain( - B.squeeze(eigenvalues_per_level), + eigenvalues_per_level[ + :, 0 + ], # This is better than squeeze because we want to keep the 0-st dim even if it's equal to 1. eigenfunctions.num_eigenfunctions_per_level, ) # [J,] return B.reshape(eigenvalues, -1, 1) # [J, 1] diff --git a/geometric_kernels/spaces/hypersphere.py b/geometric_kernels/spaces/hypersphere.py index f1674102..f3255b03 100644 --- a/geometric_kernels/spaces/hypersphere.py +++ b/geometric_kernels/spaces/hypersphere.py @@ -57,6 +57,8 @@ def num_computed_levels(self) -> int: return num def __call__(self, X: B.Numeric, **kwargs) -> B.Numeric: + if kwargs: + raise ValueError("This method does not support additional arguments.") return self._spherical_harmonics(X) def _addition_theorem( @@ -81,12 +83,12 @@ def _addition_theorem( represent the points in a given space. Defaults to None, in which case X is used for X2. - :param ``**kwargs``: - Any additional parameters. :return: An array of shape [N, N2, L]. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") values = [ level.addition(X, X2)[..., None] # [N, N2, 1] for level in self._spherical_harmonics.harmonic_levels @@ -97,6 +99,8 @@ def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: """ These are certain easy to compute constants. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") values = [ level.addition_at_1(X) # [N, 1] for level in self._spherical_harmonics.harmonic_levels diff --git a/geometric_kernels/spaces/lie_groups.py b/geometric_kernels/spaces/lie_groups.py index 9a79efa4..d7292c26 100644 --- a/geometric_kernels/spaces/lie_groups.py +++ b/geometric_kernels/spaces/lie_groups.py @@ -224,12 +224,12 @@ def _addition_theorem( An [N2, n, n]-shaped array, a batch of N2 matrices of size nxn. Defaults to None, in which case X is used for X2. - :param ``**kwargs``: - Any additional parameters. :return: An array of shape [N, N2, L]. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") if X2 is None: X2 = X diff = self._difference(X, X2) @@ -247,12 +247,12 @@ def _addition_theorem_diag(self, X: B.Numeric, **kwargs) -> B.Numeric: :param X: As in :meth:`_addition_theorem`. - :param ``**kwargs``: - As in :meth:`_addition_theorem`. :return: An array of shape [N, L]. """ + if kwargs: + raise ValueError("This method does not support additional arguments.") ones = B.ones(B.dtype(X), *X.shape[:-2], 1) values = [ repr_dim * repr_dim * ones # [N, 1], because chi(X*inv(X))=repr_dim diff --git a/tests/kernels/test_matern_karhunenloeve_kernel.py b/tests/kernels/test_matern_karhunenloeve_kernel.py index 1d87ecb2..7f11819a 100644 --- a/tests/kernels/test_matern_karhunenloeve_kernel.py +++ b/tests/kernels/test_matern_karhunenloeve_kernel.py @@ -28,7 +28,7 @@ def test_eigenfunctions(kernel: MaternKarhunenLoeveKernel): Phi = kernel.eigenfunctions X = np.random.randint(low=0, high=kernel.space.num_vertices, size=(num_data, 1)) - assert Phi(X, lengthscale=np.r_[0.93]).shape == (num_data, _TRUNCATION_LEVEL) + assert Phi(X).shape == (num_data, _TRUNCATION_LEVEL) def test_K_shapes(kernel: MaternKarhunenLoeveKernel): diff --git a/tests/spaces/test_hypercube_graph.py b/tests/spaces/test_hypercube_graph.py index 8b5d6389..0038443a 100644 --- a/tests/spaces/test_hypercube_graph.py +++ b/tests/spaces/test_hypercube_graph.py @@ -95,7 +95,7 @@ def test_weighted_outerproduct_with_addition_theorem(inputs, backend): _, eigenfunctions, X, X2 = inputs num_levels = eigenfunctions.num_levels - weights = np.random.rand(num_levels, 1) + weights = np.random.rand(num_levels, 1) ** 2 + 0.01 # Must be positive. chained_weights = chain( weights.squeeze(), eigenfunctions.num_eigenfunctions_per_level ) @@ -104,6 +104,8 @@ def test_weighted_outerproduct_with_addition_theorem(inputs, backend): Phi_X2 = eigenfunctions(X2) result = einsum("ni,ki,i->nk", Phi_X, Phi_X2, chained_weights) + np.seterr(invalid="raise") + # Check that `weighted_outerproduct`, which is based on the addition theorem, # returns the same result as the direct computation involving individual # eigenfunctions. @@ -117,7 +119,7 @@ def test_weighted_outerproduct_with_addition_theorem_one_input(inputs, backend): _, eigenfunctions, X, _ = inputs num_levels = eigenfunctions.num_levels - weights = np.random.rand(num_levels, 1) + weights = np.random.rand(num_levels, 1) ** 2 + 0.01 # Must be positive. result = eigenfunctions.weighted_outerproduct(weights, X, X) @@ -137,7 +139,7 @@ def test_weighted_outerproduct_diag(inputs, backend): _, eigenfunctions, X, _ = inputs num_levels = eigenfunctions.num_levels - weights = np.random.rand(num_levels, 1) + weights = np.random.rand(num_levels, 1) ** 2 + 0.01 # Must be positive. result = np.diag(eigenfunctions.weighted_outerproduct(weights, X, X)) @@ -159,7 +161,7 @@ def test_weighted_outerproduct_against_phi_product(inputs, backend): sum_phi_phi_for_level = eigenfunctions.phi_product(X, X2) - weights = np.random.rand(num_levels, 1) + weights = np.random.rand(num_levels, 1) ** 2 + 0.01 # Must be positive. result = B.einsum("id,...nki->...nk", weights, sum_phi_phi_for_level) # Check that `weighted_outerproduct`, which for HypercubeGraph has a @@ -177,7 +179,7 @@ def test_weighted_outerproduct_diag_against_phi_product(inputs, backend): phi_product_diag = eigenfunctions.phi_product_diag(X) - weights = np.random.rand(num_levels, 1) + weights = np.random.rand(num_levels, 1) ** 2 + 0.01 # Must be positive. result = B.einsum("id,ni->n", weights, phi_product_diag) # [N,] # Check that `weighted_outerproduct_diag`, which for HypercubeGraph has a diff --git a/tests/spaces/test_lie_groups.py b/tests/spaces/test_lie_groups.py index 863580d8..5aa16ee2 100644 --- a/tests/spaces/test_lie_groups.py +++ b/tests/spaces/test_lie_groups.py @@ -111,13 +111,14 @@ def test_characters_orthogonal(group_and_eigf): def test_feature_map(group_and_eigf): + np.seterr(invalid="raise") group, eigenfunctions = group_and_eigf order = eigenfunctions.num_levels dtype = get_dtype(group) key = B.create_random_state(dtype, seed=0) kernel = MaternKarhunenLoeveKernel(group, order, normalize=True) - param = dict(lengthscale=np.array([10]), nu=np.array([1.5])) + param = dict(lengthscale=np.array([10.0]), nu=np.array([1.5])) feature_order = 5000 feature_map = RandomPhaseFeatureMapCompact(group, order, feature_order) diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 5d347744..5ab973a8 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -22,6 +22,7 @@ Mesh, SymmetricPositiveDefiniteMatrices, ) +from geometric_kernels.spaces.eigenfunctions import EigenfunctionsWithAdditionTheorem def to_typed_ndarray(value, dtype): @@ -171,9 +172,10 @@ def test_feature_map_dtype(kl_spacepoint, dtype, backend): feature_map(point, params) # make sure it runs - key = B.create_random_state(B.dtype(point), seed=1234) - feature_map = RandomPhaseFeatureMapCompact(space, num_levels) - feature_map(point, params, key=key) + if isinstance(space.get_eigenfunctions(1), EigenfunctionsWithAdditionTheorem): + key = B.create_random_state(B.dtype(point), seed=1234) + feature_map = RandomPhaseFeatureMapCompact(space, num_levels) + feature_map(point, params, key=key) @pytest.fixture(params=["naive", "rs"])