# Rewrite of the original file in DeepXDE: https://github.com/lululxvi/deepxde
# ==============================================================================
import importlib.util
import numbers
import warnings
from collections.abc import Iterable
import brainstate
import numpy as np
from scipy import spatial
from scipy.stats.distributions import randint, rv_discrete, uniform
from .transformers import (
CategoricalEncoder,
Identity,
LabelEncoder,
LogN,
Normalize,
Pipeline,
StringEncoder,
)
sklearn_installed = importlib.util.find_spec("sklearn")
__all__ = ["sample"]
[docs]
def check_random_state(*args, **kwargs):
if not sklearn_installed:
raise ImportError(
"scikit-learn must be installed to use "
"the `check_random_state` function."
)
from sklearn.utils import check_random_state
return check_random_state(*args, **kwargs)
[docs]
def sample(n_samples, dimension, sampler="pseudo"):
"""Generate pseudorandom or quasirandom samples in [0, 1]^dimension.
Args:
n_samples (int): The number of samples.
dimension (int): Space dimension.
sampler (string): One of the following: "pseudo" (pseudorandom), "LHS" (Latin
hypercube sampling), "Halton" (Halton sequence), "Hammersley" (Hammersley
sequence), or "Sobol" (Sobol sequence).
"""
if sampler == "pseudo":
return pseudorandom(n_samples, dimension)
if sampler in ["LHS", "Halton", "Hammersley", "Sobol"]:
return quasirandom(n_samples, dimension, sampler)
raise ValueError("f{sampler} sampling is not available.")
[docs]
def pseudorandom(n_samples, dimension):
"""Pseudo random."""
# If random seed is set, then the rng based code always returns the same random
# number, which may not be what we expect.
return np.random.random(size=(n_samples, dimension)).astype(brainstate.environ.dftype())
[docs]
def quasirandom(n_samples, dimension, sampler):
import skopt
# Certain points should be removed:
# - Boundary points such as [..., 0, ...]
# - Special points [0, 0, 0, ...] and [0.5, 0.5, 0.5, ...], which cause error in
# Hypersphere.random_points() and Hypersphere.random_boundary_points()
skip = 0
if sampler == "LHS":
sampler = skopt.sampler.Lhs()
elif sampler == "Halton":
# 1st point: [0, 0, ...]
sampler = skopt.sampler.Halton(min_skip=1, max_skip=1)
elif sampler == "Hammersley":
# 1st point: [0, 0, ...]
if dimension == 1:
sampler = skopt.sampler.Hammersly(min_skip=1, max_skip=1)
else:
sampler = skopt.sampler.Hammersly()
skip = 1
elif sampler == "Sobol":
# 1st point: [0, 0, ...], 2nd point: [0.5, 0.5, ...]
sampler = skopt.sampler.Sobol(randomize=False)
if dimension < 3:
skip = 1
else:
skip = 2
space = [(0.0, 1.0)] * dimension
return np.asarray(sampler.generate(space, n_samples + skip)[skip:], dtype=brainstate.environ.dftype())
[docs]
class InitialPointGenerator:
def generate(self, dimensions, n_samples, random_state=None):
raise NotImplementedError
[docs]
def set_params(self, **params):
"""Set the parameters of this initial point generator.
Parameters
----------
**params : dict
Generator parameters.
Returns
-------
self : object
Generator instance.
"""
if not params:
# Simple optimization to gain speed (inspect is slow)
return self
for key, value in params.items():
setattr(self, key, value)
return self
def _random_permute_matrix(h, random_state=None):
if not sklearn_installed:
raise ImportError(
"scikit-learn must be installed to use "
"the `random_state` parameter."
)
rng = check_random_state(random_state)
h_rand_perm = np.zeros_like(h)
samples, n = h.shape
for j in range(n):
order = rng.permutation(range(samples))
h_rand_perm[:, j] = h[order, j]
return h_rand_perm
[docs]
def check_dimension(dimension, transform=None):
"""Turn a provided dimension description into a dimension object.
Checks that the provided dimension falls into one of the
supported types. For a list of supported types, look at
the documentation of ``dimension`` below.
If ``dimension`` is already a ``Dimension`` instance, return it.
Parameters
----------
dimension : Dimension
Search space Dimension.
Each search dimension can be defined either as:
- an instance of a `Dimension` object (`Real`, `Integer` or
`Categorical`).
- a 2-, 3- or 4-tuple, for `Real` and `Integer` dimensions, of
the form ``(low, high [, prior [, base]])`` (values in square
brackets are optional). If both ``low`` and ``high`` are integral
numbers (as per the `number.Integral`), a `Integer` dimension is
returned, else a `Real` dimension is returned.
- any iterable for `Categorical` dimension
.. note::
For a transitionary period, the old behavior is retained. This
means tuple, list and array currently all undergo dimension
inference as describe in the tuple entry above. If no `Integer`
or `Real` dimension can be inferred, a `Categorical` is returned.
This behavior will be tightened to the above description in an
upcoming version, and a warning is raised if the upcoming inference
would differ from the current behavior.
transform : "identity", "normalize", "string", "label", "onehot" optional
- For `Categorical` dimensions, the following transformations are
supported.
- "onehot" (default) one-hot transformation of the original space.
- "label" integer transformation of the original space
- "string" string transformation of the original space.
- "identity" same as the original space.
- For `Real` and `Integer` dimensions, the following transformations
are supported.
- "identity", (default) the transformed space is the same as the
original space.
- "normalize", the transformed space is scaled to be between 0 and 1.
Returns
-------
dimension : Dimension
Dimension instance.
"""
old_dim = _check_dimension_old(dimension, transform=transform)
try:
with warnings.catch_warnings(record=True) as warning_list:
new_dim = _check_dimension(dimension, transform=transform)
except Exception as err:
new_dim = f"<{err.__class__.__name__}: {err}>"
if new_dim != old_dim:
warning_msg = ""
if warning_list:
formatted_warning = "; ".join(
f"<{w.filename}:{w.lineno}: " f"{w.category}: {w.message}>"
for w in warning_list
)
warning_msg = f" (with warnings: {formatted_warning})"
warnings.warn(
f"Dimension {dimension!r} was inferred to {old_dim}. In "
"upcoming versions of scikit-optimize, it will be "
f"inferred to {new_dim}{warning_msg}. See the "
"documentation of the check_dimension function for the "
"upcoming API."
)
return old_dim
def _check_dimension(dimension, transform=None):
if isinstance(dimension, Dimension):
return dimension
if isinstance(dimension, tuple) and 2 <= len(dimension) <= 4:
low, high, *args = dimension
# Check that optional distribution and base have correct types
if (not args or isinstance(args[0], str)) and (
len(args) < 2 or isinstance(args[1], int)
):
# Infer an Integer if both bounds are Integral
if isinstance(low, numbers.Integral) and isinstance(high, numbers.Integral):
return Integer(int(low), int(high), *args, transform=transform)
# Infer a Real if both bounds are Real numbers
elif isinstance(low, numbers.Real) and isinstance(high, numbers.Real):
return Real(float(low), float(high), *args, transform=transform)
# warn if falling back on Categorical for tuples that look like they
# might be an error, because there is more than one type in them
if len(set(map(type, dimension))) > 1:
warnings.warn(
f"{dimension!r} was inferred to a Categorical "
"object, but looks like a tuple for an Integer or "
"Real dimension that was miss-spelled. Pass a list "
"or a Categorical object to suppress this warning.",
UserWarning,
)
if isinstance(dimension, Iterable):
return Categorical(dimension, transform=transform)
# Unconditionned so handle all cases that make it here
raise ValueError(
f"Invalid dimension {dimension!r}. See the "
"documentation of check_dimension for supported values."
)
def _check_dimension_old(dimension, transform=None):
if isinstance(dimension, Dimension):
return dimension
if not isinstance(dimension, (list, tuple, np.ndarray)):
raise ValueError(
f"Invalid dimension {dimension!r}. See the "
"documentation of check_dimension for supported "
"values."
)
# A `Dimension` described by a single value is assumed to be
# a `Categorical` dimension. This can be used in `BayesSearchCV`
# to define subspaces that fix one value, e.g. to choose the
# trainer type, see "sklearn-gridsearchcv-replacement.py"
# for examples.
if len(dimension) == 1:
return Categorical(dimension, transform=transform)
if len(dimension) == 2:
if any(
isinstance(d, (str, bool)) or isinstance(d, np.bool_) for d in dimension
):
return Categorical(dimension, transform=transform)
elif all(isinstance(dim, numbers.Integral) for dim in dimension):
return Integer(*map(int, dimension), transform=transform)
elif all(isinstance(dim, numbers.Real) for dim in dimension):
return Real(*map(float, dimension), transform=transform)
else:
raise ValueError(
f"Invalid dimension {dimension!r}. See the "
"documentation of check_dimension for supported "
"values."
)
if len(dimension) == 3:
if all(
isinstance(dim, numbers.Integral) for dim in dimension[:2]
) and dimension[2] in [
"uniform",
"log-uniform",
]:
return Integer(
*map(int, dimension[:2]), *dimension[2:], transform=transform
)
elif all(isinstance(dim, numbers.Real) for dim in dimension[:2]) and dimension[
2
] in ["uniform", "log-uniform"]:
return Real(*map(float, dimension[:2]), *dimension[2:], transform=transform)
else:
return Categorical(dimension, transform=transform)
if len(dimension) == 4:
if (
all([isinstance(dim, numbers.Integral) for dim in dimension[:2]])
and dimension[2] == "log-uniform"
and isinstance(dimension[3], int)
):
return Integer(
*map(int, dimension[:2]), *dimension[2:], transform=transform
)
elif (
all([isinstance(dim, numbers.Real) for dim in dimension[:2]])
and dimension[2] == "log-uniform"
and isinstance(dimension[3], int)
):
return Real(*map(float, dimension[:2]), *dimension[2:], transform=transform)
if len(dimension) > 3:
return Categorical(dimension, transform=transform)
raise ValueError(
f"Invalid dimension {dimension!r}. See the "
"documentation of check_dimension for supported "
"values."
)
def _transpose_list_array(x):
"""Transposes a list matrix."""
n_dims = len(x)
assert n_dims > 0
n_samples = len(x[0])
rows = [None] * n_samples
for i in range(n_samples):
r = [None] * n_dims
for j in range(n_dims):
r[j] = x[j][i]
rows[i] = r
return rows
# helper class to be able to print [1, ..., 4] instead of [1, '...', 4]
class _Ellipsis:
def __repr__(self):
return '...'
class Dimension:
"""Base class for search space dimensions."""
prior = None
def rvs(self, n_samples=1, random_state=None):
"""Draw random samples.
Parameters
----------
n_samples : int or None
The number of samples to be drawn.
random_state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
"""
rng = check_random_state(random_state)
samples = self._rvs.rvs(size=n_samples, random_state=rng)
return self.inverse_transform(samples)
def transform(self, X):
"""Transform samples form the original space to a warped space."""
return self.transformer.transform(X)
def inverse_transform(self, Xt):
"""Inverse transform samples from the warped space back into the original
space."""
return self.transformer.inverse_transform(Xt)
def set_transformer(self):
raise NotImplementedError
@property
def size(self):
return 1
@property
def transformed_size(self):
return 1
@property
def bounds(self):
raise NotImplementedError
@property
def is_constant(self):
raise NotImplementedError
@property
def transformed_bounds(self):
raise NotImplementedError
@property
def name(self):
return self._name
@name.setter
def name(self, value):
if isinstance(value, str) or value is None:
self._name = value
else:
raise ValueError("Dimension's name must be either string or None.")
def _uniform_inclusive(loc=0.0, scale=1.0):
# like scipy.stats.distributions but inclusive of `high`
# XXX scale + 1. might not actually be a float after scale if
# XXX scale is very large.
return uniform(loc=loc, scale=np.nextafter(scale, scale + 1.0))
class Real(Dimension):
"""Search space dimension that can take on any real value.
Parameters
----------
low : float
Lower bound (inclusive).
high : float
Upper bound (inclusive).
prior : "uniform" or "log-uniform", default="uniform"
Distribution to use when sampling random points for this dimension.
- If `"uniform"`, points are sampled uniformly between the lower
and upper bounds.
- If `"log-uniform"`, points are sampled uniformly between
`log(lower, base)` and `log(upper, base)` where log
has base `base`.
base : int
The logarithmic base to use for a log-uniform prior.
- Default 10, otherwise commonly 2.
transform : "identity", "normalize", optional
The following transformations are supported.
- "identity", (default) the transformed space is the same as the
original space.
- "normalize", the transformed space is scaled to be between
0 and 1.
name : str or None
Name associated with the dimension, e.g., "learning rate".
dtype : str or dtype, default=float
float type which will be used in inverse_transform,
can be float.
"""
def __init__(
self,
low,
high,
prior="uniform",
base=10,
transform=None,
name=None,
dtype=float,
):
if high <= low:
raise ValueError(
"the lower bound {} has to be less than the"
" upper bound {}".format(low, high)
)
if prior not in ["uniform", "log-uniform"]:
raise ValueError(
"prior should be 'uniform' or 'log-uniform'" " got {}".format(prior)
)
if prior == 'log-uniform' and low * high <= 0:
raise ValueError(
"search space should not contain 0 when" " using log-uniform prior"
)
self.low = low
self.high = high
self.prior = prior
self.base = base
self.log_base = np.log10(base)
self.name = name
self.dtype = dtype
self._rvs = None
self.transformer = None
self.transform_ = transform
if isinstance(self.dtype, str) and self.dtype not in [
'float',
'float16',
'float32',
'float64',
]:
raise ValueError(
"dtype must be 'float', 'float16', 'float32'"
"or 'float64'"
" got {}".format(self.dtype)
)
elif isinstance(self.dtype, type) and not np.issubdtype(
self.dtype, np.floating
):
raise ValueError(
"dtype must be a np.floating subtype;" " got {}".format(self.dtype)
)
if transform is None:
transform = "identity"
self.set_transformer(transform)
def set_transformer(self, transform="identity"):
"""Define rvs and transformer spaces.
Parameters
----------
transform : str
Can be 'normalize' or 'identity'
"""
self.transform_ = transform
if self.transform_ not in ["normalize", "identity"]:
raise ValueError(
"transform should be 'normalize' or 'identity'"
" got {}".format(self.transform_)
)
# XXX: The _rvs is for sampling in the transformed space.
# The rvs on Dimension calls inverse_transform on the points sampled
# using _rvs
if self.transform_ == "normalize":
# set upper bound to next float after 1. to make the numbers
# inclusive of upper edge
self._rvs = _uniform_inclusive(0.0, 1.0)
if self.prior == "uniform":
self.transformer = Pipeline(
[Identity(), Normalize(self.low, self.high)]
)
else:
self.transformer = Pipeline(
[
LogN(self.base),
Normalize(
np.log10(self.low) / self.log_base,
np.log10(self.high) / self.log_base,
),
]
)
else:
if self.prior == "uniform":
self._rvs = _uniform_inclusive(self.low, self.high - self.low)
self.transformer = Identity()
else:
self._rvs = _uniform_inclusive(
np.log10(self.low) / self.log_base,
np.log10(self.high) / self.log_base
- np.log10(self.low) / self.log_base,
)
self.transformer = LogN(self.base)
def __eq__(self, other):
return (
type(self) is type(other)
and np.allclose([self.low], [other.low])
and np.allclose([self.high], [other.high])
and self.prior == other.prior
and self.transform_ == other.transform_
)
def __repr__(self):
return "Real(low={}, high={}, prior='{}', transform='{}')".format(
self.low, self.high, self.prior, self.transform_
)
def inverse_transform(self, Xt):
"""Inverse transform samples from the warped space back into the original
space."""
inv_transform = super().inverse_transform(Xt)
if isinstance(inv_transform, list):
inv_transform = np.array(inv_transform)
inv_transform = np.clip(inv_transform, self.low, self.high).astype(self.dtype)
if self.dtype == float or self.dtype == 'float':
# necessary, otherwise the type is converted to a numpy type
return getattr(inv_transform, "tolist", lambda: inv_transform)()
else:
return inv_transform
@property
def bounds(self):
return (self.low, self.high)
@property
def is_constant(self):
return self.low == self.high
def __contains__(self, point):
if isinstance(point, list):
point = np.array(point)
return self.low <= point <= self.high
@property
def transformed_bounds(self):
if self.transform_ == "normalize":
return 0.0, 1.0
else:
if self.prior == "uniform":
return self.low, self.high
else:
return np.log10(self.low), np.log10(self.high)
def distance(self, a, b):
"""Compute distance between point `a` and `b`.
Parameters
----------
a : float
First point.
b : float
Second point.
"""
if not (a in self and b in self):
raise RuntimeError(
"Can only compute distance for values within "
"the space, not %s and %s." % (a, b)
)
return abs(a - b)
class Integer(Dimension):
"""Search space dimension that can take on integer values.
Parameters
----------
low : int
Lower bound (inclusive).
high : int
Upper bound (inclusive).
prior : "uniform" or "log-uniform", default="uniform"
Distribution to use when sampling random integers for
this dimension.
- If `"uniform"`, integers are sampled uniformly between the lower
and upper bounds.
- If `"log-uniform"`, integers are sampled uniformly between
`log(lower, base)` and `log(upper, base)` where log
has base `base`.
base : int
The logarithmic base to use for a log-uniform prior.
- Default 10, otherwise commonly 2.
transform : "identity", "normalize", optional
The following transformations are supported.
- "identity", (default) the transformed space is the same as the
original space.
- "normalize", the transformed space is scaled to be between
0 and 1.
name : str or None
Name associated with dimension, e.g., "number of trees".
dtype : str or dtype, default=np.int64
integer type which will be used in inverse_transform,
can be int, np.int16, np.uint32, np.int32, np.int64 (default).
When set to int, `inverse_transform` returns a list instead of
a numpy array
"""
def __init__(
self,
low,
high,
prior="uniform",
base=10,
transform=None,
name=None,
dtype=np.int64,
):
if high <= low:
raise ValueError(
"the lower bound {} has to be less than the"
" upper bound {}".format(low, high)
)
if prior not in ["uniform", "log-uniform"]:
raise ValueError(
"prior should be 'uniform' or 'log-uniform'" " got {}".format(prior)
)
if prior == 'log-uniform' and low * high <= 0:
raise ValueError(
"search space should not contain 0" " when using log-uniform prior"
)
self.low = low
self.high = high
self.prior = prior
self.base = base
self.log_base = np.log10(base)
self.name = name
self.dtype = dtype
self.transform_ = transform
self._rvs = None
self.transformer = None
if isinstance(self.dtype, str) and self.dtype not in [
'int',
'int8',
'int16',
'int32',
'int64',
'uint8',
'uint16',
'uint32',
'uint64',
]:
raise ValueError(
"dtype must be 'int', 'int8', 'int16',"
"'int32', 'int64', 'uint8',"
"'uint16', 'uint32', or"
"'uint64', but got {}".format(self.dtype)
)
elif isinstance(self.dtype, type) and self.dtype not in [
int,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
]:
raise ValueError(
"dtype must be 'int', 'np.int8', 'np.int16',"
"'np.int32', 'np.int64', 'np.uint8',"
"'np.uint16', 'np.uint32', or"
"'np.uint64', but got {}".format(self.dtype)
)
if transform is None:
transform = "identity"
self.set_transformer(transform)
def set_transformer(self, transform="identity"):
"""Define _rvs and transformer spaces.
Parameters
----------
transform : str
Can be 'normalize' or 'identity'
"""
self.transform_ = transform
if transform not in ["normalize", "identity"]:
raise ValueError(
"transform should be 'normalize' or 'identity'"
" got {}".format(self.transform_)
)
if self.transform_ == "normalize":
self._rvs = _uniform_inclusive(0.0, 1.0)
if self.prior == "uniform":
self.transformer = Pipeline(
[Identity(), Normalize(self.low, self.high, is_int=True)]
)
else:
self.transformer = Pipeline(
[
LogN(self.base),
Normalize(
np.log10(self.low) / self.log_base,
np.log10(self.high) / self.log_base,
),
]
)
else:
if self.prior == "uniform":
self._rvs = randint(self.low, self.high + 1)
self.transformer = Identity()
else:
self._rvs = _uniform_inclusive(
np.log10(self.low) / self.log_base,
np.log10(self.high) / self.log_base
- np.log10(self.low) / self.log_base,
)
self.transformer = LogN(self.base)
def __eq__(self, other):
return (
type(self) is type(other)
and np.allclose([self.low], [other.low])
and np.allclose([self.high], [other.high])
)
def __repr__(self):
return "Integer(low={}, high={}, prior='{}', transform='{}')".format(
self.low, self.high, self.prior, self.transform_
)
def inverse_transform(self, Xt):
"""Inverse transform samples from the warped space back into the original
space."""
# The concatenation of all transformed dimensions makes Xt to be
# of type float, hence the required cast back to int.
inv_transform = super().inverse_transform(Xt)
if isinstance(inv_transform, list):
inv_transform = np.array(inv_transform)
inv_transform = np.clip(inv_transform, self.low, self.high)
if self.dtype == int or self.dtype == 'int':
# necessary, otherwise the type is converted to a numpy type
value = np.round(inv_transform).astype(self.dtype)
return getattr(value, "tolist", lambda: value)()
else:
return np.round(inv_transform).astype(self.dtype)
@property
def bounds(self):
return (self.low, self.high)
@property
def is_constant(self):
return self.low == self.high
def __contains__(self, point):
if isinstance(point, list):
point = np.array(point)
return self.low <= point <= self.high
@property
def transformed_bounds(self):
if self.transform_ == "normalize":
return 0.0, 1.0
else:
return (self.low, self.high)
def distance(self, a, b):
"""Compute distance between point `a` and `b`.
Parameters
----------
a : int
First point.
b : int
Second point.
"""
if not (a in self and b in self):
raise RuntimeError(
"Can only compute distance for values within "
"the space, not %s and %s." % (a, b)
)
return abs(a - b)
class Categorical(Dimension):
"""Search space dimension that can take on categorical values.
Parameters
----------
categories : list, shape=(n_categories,)
Sequence of possible categories.
prior : list, shape=(categories,), default=None
Prior probabilities for each category. By default all categories
are equally likely.
transform : "onehot", "string", "identity", "label", default="onehot"
- "identity", the transformed space is the same as the original
space.
- "string", the transformed space is a string encoded
representation of the original space.
- "label", the transformed space is a label encoded
representation (integer) of the original space.
- "onehot", the transformed space is a one-hot encoded
representation of the original space.
name : str or None
Name associated with dimension, e.g., "colors".
"""
def __init__(self, categories, prior=None, transform=None, name=None):
self.categories = tuple(categories)
self.name = name
if transform is None:
transform = "onehot"
self.transform_ = transform
self.transformer = None
self._rvs = None
self.prior = prior
if prior is None:
self.prior_ = np.tile(1.0 / len(self.categories), len(self.categories))
else:
self.prior_ = prior
self.set_transformer(transform)
def set_transformer(self, transform="onehot"):
"""Define _rvs and transformer spaces.
Parameters
----------
transform : str
Can be 'normalize', 'onehot', 'string', 'label', or 'identity'
"""
self.transform_ = transform
if transform not in ["identity", "onehot", "string", "normalize", "label"]:
raise ValueError(
"Expected transform to be 'identity', 'string',"
"'label' or 'onehot' got {}".format(transform)
)
if transform == "onehot":
self.transformer = CategoricalEncoder()
self.transformer.fit(self.categories)
elif transform == "string":
self.transformer = StringEncoder()
self.transformer.fit(self.categories)
elif transform == "label":
self.transformer = LabelEncoder()
self.transformer.fit(self.categories)
elif transform == "normalize":
self.transformer = Pipeline(
[
LabelEncoder(list(self.categories)),
Normalize(0, len(self.categories) - 1, is_int=True),
]
)
else:
self.transformer = Identity()
self.transformer.fit(self.categories)
if transform == "normalize":
self._rvs = _uniform_inclusive(0.0, 1.0)
else:
# XXX check that sum(prior) == 1
self._rvs = rv_discrete(values=(range(len(self.categories)), self.prior_))
def __eq__(self, other):
return (
type(self) is type(other)
and self.categories == other.categories
and np.allclose(self.prior_, other.prior_)
)
def __repr__(self):
if len(self.categories) > 7:
cats = self.categories[:3] + (_Ellipsis(),) + self.categories[-3:]
else:
cats = self.categories
if self.prior is not None and len(self.prior) > 7:
prior = self.prior[:3] + [_Ellipsis()] + self.prior[-3:]
else:
prior = self.prior
return f"Categorical(categories={cats}, prior={prior})"
def inverse_transform(self, Xt):
"""Inverse transform samples from the warped space back into the original
space."""
# The concatenation of all transformed dimensions makes Xt to be
# of type float, hence the required cast back to int.
inv_transform = super().inverse_transform(Xt)
if isinstance(inv_transform, list):
inv_transform = np.array(inv_transform)
return inv_transform
def rvs(self, n_samples=None, random_state=None):
choices = self._rvs.rvs(size=n_samples, random_state=random_state)
if isinstance(choices, numbers.Integral):
return self.categories[choices]
elif self.transform_ == "normalize" and isinstance(choices, float):
return self.inverse_transform([(choices)])
elif self.transform_ == "normalize":
return self.inverse_transform(list(choices))
else:
return [self.categories[c] for c in choices]
@property
def transformed_size(self):
if self.transform_ == "onehot":
size = len(self.categories)
# when len(categories) == 2, CategoricalEncoder outputs a
# single value
return size if size != 2 else 1
return 1
@property
def bounds(self):
return self.categories
@property
def is_constant(self):
return len(self.categories) <= 1
def __contains__(self, point):
return point in self.categories
@property
def transformed_bounds(self):
if self.transformed_size == 1:
return 0.0, 1.0
else:
return [(0.0, 1.0) for i in range(self.transformed_size)]
def distance(self, a, b):
"""Compute distance between category `a` and `b`.
As categories have no order the distance between two points is one
if a != b and zero otherwise.
Parameters
----------
a : category
First category.
b : category
Second category.
"""
if not (a in self and b in self):
raise RuntimeError(
"Can only compute distance for values within"
" the space, not {} and {}.".format(a, b)
)
return 1 if a != b else 0
class Space:
"""Initialize a search space from given specifications.
Parameters
----------
dimensions : list, shape=(n_dims,)
List of search space dimensions.
Each search dimension can be defined either as
- a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
dimensions),
- a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
dimensions),
- as a list of categories (for `Categorical` dimensions), or
- an instance of a `Dimension` object (`Real`, `Integer` or
`Categorical`).
.. note::
The upper and lower bounds are inclusive for `Integer`
dimensions.
constraint : callable or None, default: None
Constraint function. Should take a single list of parameters
(i.e. a point in space) and return True if the point satisfies
the constraints.
If None, the space is not conditionally constrained.
"""
def __init__(self, dimensions, constraint=None):
self.dimensions = [check_dimension(dim) for dim in dimensions]
if constraint is None and isinstance(dimensions, Space):
constraint = dimensions.constraint
assert constraint is None or callable(constraint)
self.constraint = constraint
def __eq__(self, other):
return all([a == b for a, b in zip(self.dimensions, other.dimensions)])
def __repr__(self):
if len(self.dimensions) > 31:
dims = self.dimensions[:15] + [_Ellipsis()] + self.dimensions[-15:]
else:
dims = self.dimensions
return "Space([{}])".format(',\n '.join(map(str, dims)))
def __iter__(self):
return iter(self.dimensions)
@property
def dimension_names(self):
"""Names of all the dimensions in the search-space."""
index = 0
names = []
for dim in self.dimensions:
if dim.name is None:
names.append("X_%d" % index)
else:
names.append(dim.name)
index += 1
return names
@dimension_names.setter
def dimension_names(self, names):
"""Sets the names of all dimension objects via list of names.
Parameters
----------
names : list of str
List of names. Must be the same length as self.dimensions.
"""
if len(names) != len(self.dimensions):
raise ValueError("`names` must be the same length as " "`self.dimensions`.")
for dim, name in zip(self.dimensions, names):
dim.name = name
@property
def is_real(self):
"""Returns true if all dimensions are Real."""
return all([isinstance(dim, Real) for dim in self.dimensions])
def rvs(self, n_samples=1, random_state=None):
"""Draw random samples.
The samples are in the original space. They need to be transformed
before being passed to a trainer or minimizer by `space.transform()`.
Parameters
----------
n_samples : int, default=1
Number of samples to be drawn from the space.
random_state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
Returns
-------
points : list of lists, shape=(n_points, n_dims)
Points sampled from the space.
"""
rng = check_random_state(random_state)
points = []
for _ in range(10000):
# Draw
columns = []
for dim in self.dimensions:
columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
# Transpose
rows = _transpose_list_array(columns)
# Filter
if self.constraint is not None:
rows = [row for row in rows if self.constraint(row)]
# If we have enough valid samples
points.extend(rows)
if len(points) >= n_samples:
break
else:
raise RuntimeError(
'Could not find enough valid samples in constrained '
'space. Please check that the constraint allows for '
'valid samples to be drawn.'
)
return points[:n_samples]
def set_transformer(self, transform):
"""Sets the transformer of all dimension objects to `transform`
Parameters
----------
transform : str or list of str
Sets all transformer,, when `transform` is a string.
Otherwise, transform must be a list with strings with
the same length as `dimensions`
"""
# Transform
for j in range(self.n_dims):
if isinstance(transform, list):
self.dimensions[j].set_transformer(transform[j])
else:
self.dimensions[j].set_transformer(transform)
def set_transformer_by_type(self, transform, dim_type):
"""Sets the transformer of `dim_type` objects to `transform`
Parameters
----------
transform : str
Sets all transformer of type `dim_type` to `transform`
dim_type : type
Can be `skopt.space.Real`, `skopt.space.Integer` or
`skopt.space.Categorical`
"""
# Transform
for j in range(self.n_dims):
if isinstance(self.dimensions[j], dim_type):
self.dimensions[j].set_transformer(transform)
def get_transformer(self):
"""Returns all transformers as list."""
return [self.dimensions[j].transform_ for j in range(self.n_dims)]
def transform(self, X):
"""Transform samples from the original space into a warped space.
Note: this transformation is expected to be used to project samples
into a suitable space for numerical optimization.
Parameters
----------
X : list of lists, shape=(n_samples, n_dims)
The samples to transform.
Returns
-------
Xt : array of floats, shape=(n_samples, transformed_n_dims)
The transformed samples.
"""
# Pack by dimension
columns = []
for _ in self.dimensions:
columns.append([])
for i in range(len(X)):
for j in range(self.n_dims):
columns[j].append(X[i][j])
# Transform
for j in range(self.n_dims):
columns[j] = self.dimensions[j].transform(columns[j])
# Repack as an array
Xt = np.hstack([np.asarray(c).reshape((len(X), -1)) for c in columns])
return Xt
def inverse_transform(self, Xt):
"""Inverse transform samples from the warped space back to the original space.
Parameters
----------
Xt : array of floats, shape=(n_samples, transformed_n_dims)
The samples to inverse transform.
Returns
-------
X : list of lists, shape=(n_samples, n_dims)
The original samples.
"""
# Inverse transform
columns = []
start = 0
Xt = np.asarray(Xt)
for j in range(self.n_dims):
dim = self.dimensions[j]
offset = dim.transformed_size
if offset == 1:
columns.append(dim.inverse_transform(Xt[:, start]))
else:
columns.append(dim.inverse_transform(Xt[:, start: start + offset]))
start += offset
# Transpose
return _transpose_list_array(columns)
@property
def n_dims(self):
"""The dimensionality of the original space."""
return len(self.dimensions)
@property
def transformed_n_dims(self):
"""The dimensionality of the warped space."""
return sum([dim.transformed_size for dim in self.dimensions])
@property
def bounds(self):
"""The dimension bounds, in the original space."""
b = []
for dim in self.dimensions:
if dim.size == 1:
b.append(dim.bounds)
else:
b.extend(dim.bounds)
return b
def __contains__(self, point):
"""Check that `point` is within the bounds of the space."""
for component, dim in zip(point, self.dimensions):
if component not in dim:
return False
if self.constraint is not None:
return bool(self.constraint(point))
return True
def __getitem__(self, dimension_names):
"""Lookup and return the search-space dimension with the given name.
This allows for dict-like lookup of dimensions, for example:
`space['foo']` returns the dimension named 'foo' if it exists,
otherwise `None` is returned.
It also allows for lookup of a list of dimension-names, for example:
`space[['foo', 'bar']]` returns the two dimensions named
'foo' and 'bar' if they exist.
Parameters
----------
dimension_names : str or list(str)
Name of a single search-space dimension (str).
List of names for search-space dimensions (list(str)).
Returns
-------
dims tuple (index, Dimension), list(tuple(index, Dimension)), \
(None, None)
A single search-space dimension with the given name,
or a list of search-space dimensions with the given names.
"""
def _get(dimension_name):
"""Helper-function for getting a single dimension."""
index = 0
# Get the index of the search-space dimension using its name.
for dim in self.dimensions:
if dimension_name == dim.name:
return (index, dim)
elif dimension_name == index:
return (index, dim)
index += 1
return (None, None)
if isinstance(dimension_names, (str, int)):
# Get a single search-space dimension.
dims = _get(dimension_name=dimension_names)
elif isinstance(dimension_names, (list, tuple)):
# Get a list of search-space dimensions.
# Note that we do not check whether the names are really strings.
dims = [_get(dimension_name=name) for name in dimension_names]
else:
msg = (
"Dimension name should be either string or"
"list of strings, but got {}."
)
raise ValueError(msg.format(type(dimension_names)))
return dims
@property
def transformed_bounds(self):
"""The dimension bounds, in the warped space."""
b = []
for dim in self.dimensions:
if dim.transformed_size == 1:
b.append(dim.transformed_bounds)
else:
b.extend(dim.transformed_bounds)
return b
@property
def is_categorical(self):
"""Space contains exclusively categorical dimensions."""
return all([isinstance(dim, Categorical) for dim in self.dimensions])
@property
def is_partly_categorical(self):
"""Space contains any categorical dimensions."""
return any([isinstance(dim, Categorical) for dim in self.dimensions])
@property
def n_constant_dimensions(self):
"""Returns the number of constant dimensions which have zero degree of freedom,
e.g. an Integer dimensions with (0., 0.) as bounds."""
n = 0
for dim in self.dimensions:
if dim.is_constant:
n += 1
return n
def distance(self, point_a, point_b):
"""Compute distance between two points in this space.
Parameters
----------
point_a : array
First point.
point_b : array
Second point.
"""
distance = 0.0
for a, b, dim in zip(point_a, point_b, self.dimensions):
distance += dim.distance(a, b)
return distance
class Lhs(InitialPointGenerator):
"""Latin hypercube sampling.
Parameters
----------
lhs_type : str, default='classic'
- 'classic' - a small random number is added
- 'centered' - points are set uniformly in each interval
criterion : str or None, default='maximin'
When set to None, the LHS is not optimized
- 'correlation' : optimized LHS by minimizing the correlation
- 'maximin' : optimized LHS by maximizing the minimal pdist
- 'ratio' : optimized LHS by minimizing the ratio
`max(pdist) / min(pdist)`
iterations : int
Defines the number of iterations for optimizing LHS
"""
def __init__(self, lhs_type="classic", criterion="maximin", iterations=1000):
self.lhs_type = lhs_type
self.criterion = criterion
self.iterations = iterations
def generate(self, dimensions, n_samples, random_state=None):
"""Creates latin hypercube samples.
Parameters
----------
dimensions : list, shape (n_dims,)
List of search space dimensions.
Each search dimension can be defined either as
- a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
dimensions),
- a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
dimensions),
- as a list of categories (for `Categorical` dimensions), or
- an instance of a `Dimension` object (`Real`, `Integer` or
`Categorical`).
n_samples : int
The order of the LHS sequence. Defines the number of samples.
random_state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
Returns
-------
np.array, shape=(n_dim, n_samples)
LHS set
"""
rng = check_random_state(random_state)
space = Space(dimensions)
transformer = space.get_transformer()
n_dim = space.n_dims
space.set_transformer("normalize")
if self.criterion is None or n_samples == 1:
h = self._lhs_normalized(n_dim, n_samples, rng)
h = space.inverse_transform(h)
space.set_transformer(transformer)
return h
else:
h_opt = self._lhs_normalized(n_dim, n_samples, rng)
h_opt = space.inverse_transform(h_opt)
if self.criterion == "correlation":
mincorr = np.inf
for _ in range(self.iterations):
# Generate a random LHS
h = self._lhs_normalized(n_dim, n_samples, rng)
r = np.corrcoef(np.array(h).T)
if (
len(np.abs(r[r != 1])) > 0
and np.max(np.abs(r[r != 1])) < mincorr
):
mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
h_opt = h.copy()
h_opt = space.inverse_transform(h_opt)
elif self.criterion == "maximin":
maxdist = 0
# Maximize the minimum distance between points
for _ in range(self.iterations):
h = self._lhs_normalized(n_dim, n_samples, rng)
d = spatial.distance.pdist(np.array(h), 'euclidean')
if maxdist < np.min(d):
maxdist = np.min(d)
h_opt = h.copy()
h_opt = space.inverse_transform(h_opt)
elif self.criterion == "ratio":
minratio = np.inf
# Maximize the minimum distance between points
for _ in range(self.iterations):
h = self._lhs_normalized(n_dim, n_samples, rng)
p = spatial.distance.pdist(np.array(h), 'euclidean')
if np.min(p) == 0:
ratio = np.max(p) / 1e-8
else:
ratio = np.max(p) / np.min(p)
if minratio > ratio:
minratio = ratio
h_opt = h.copy()
h_opt = space.inverse_transform(h_opt)
else:
raise ValueError("Wrong criterion." "Got {}".format(self.criterion))
space.set_transformer(transformer)
return h_opt
def _lhs_normalized(self, n_dim, n_samples, random_state):
rng = check_random_state(random_state)
x = np.linspace(0, 1, n_samples + 1)
u = rng.rand(n_samples, n_dim)
h = np.zeros_like(u)
if self.lhs_type == "centered":
for j in range(n_dim):
h[:, j] = np.diff(x) / 2.0 + x[:n_samples]
elif self.lhs_type == "classic":
for j in range(n_dim):
h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
else:
raise ValueError(f"Wrong lhs_type. Got {self.lhs_type}")
return _random_permute_matrix(h, random_state=rng)