Source code for nnabla.initializer

# Copyright (c) 2017 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from . import random


[docs]class BaseInitializer(object): """Base class of the parameter initializer. """
[docs] def __call__(self, shape): """Generates an array with an initializer. Args: shape (:obj:`tuple` of :obj:`int`): :obj:`numpy.ndarray` with the shape created. Returns: :obj:`numpy.ndarray` : Array. Note: Subclasses of :class:`~nnabla.initializer.BaseInitializer` must override this method. """ raise NotImplementedError()
[docs]class NormalInitializer(BaseInitializer): r"""Generates a random array from a specified normal distribution. .. math:: \mathbf x \sim {\cal N} (\mathbf 0 | \sigma^2 \mathbf I) Args: sigma (float): :math:`\sigma`. rng (numpy.random.RandomState): Random number generator. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) w = I.NormalInitializer(5e-5) b = I.NormalInitializer(0.0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') """ def __init__(self, sigma=1.0, rng=None): if rng is None: rng = random.prng self.rng = rng self.sigma = sigma def __repr__(self): return '{}({})'.format(self.__class__.__name__, self.sigma) def __call__(self, shape): return self.rng.randn(*shape) * self.sigma
[docs]class UniformInitializer(BaseInitializer): r"""Generates a random array from a specified uniform distribution. .. math:: \mathbf x \sim {\cal U} (a, b) Args: lim (:obj:`tuple` of :obj:`float`): A tuple of two floats, :math:`(a, b)`. rng (numpy.random.RandomState): Random number generator. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) w = I.UniformInitializer() # this generates uniform distribution within the default range of (-1,1) b = I.UniformInitializer((-0.5,0.5)) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') """ def __init__(self, lim=(-1, 1), rng=None): if rng is None: rng = random.prng self.rng = rng self.lim = lim def __repr__(self): return '{}({})'.format(self.__class__.__name__, repr(self.lim)) def __call__(self, shape): return self.rng.uniform(self.lim[0], self.lim[1], size=shape)
[docs]class UniformIntInitializer(BaseInitializer): r"""Generates a random array from a specified integer uniform distribution. .. math:: \mathbf x \sim {\cal U} ([a, b)) Args: lim (:obj:`tuple` of :obj:`int`): A tuple of two ints, :math:`(a, b)`. rng (numpy.random.RandomState): Random number generator. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) w = I.UniformIntInitializer() # this generates uniform integer distribution within the default range of (0,10) b = I.UniformIntInitializer((-1,1)) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') """ def __init__(self, lim=(0, 10), rng=None): if rng is None: rng = random.prng self.rng = rng self.lim = lim def __repr__(self): return '{}({})'.format(self.__class__.__name__, repr(self.lim)) def __call__(self, shape): return self.rng.randint(self.lim[0], self.lim[1], size=shape)
[docs]class RangeInitializer(BaseInitializer): """Generates an array with sequence of numbers. .. math:: \mathbf x[i] = start + step * i Args: start (int): A start value. step (int): A step value. Example: .. code-block:: python import nnabla as nn import nnabla.initializer as I x = nn.Variable([100]) x.d = I.RangeInitializer(0, 1)(x.shape) """ def __init__(self, start=0, step=1): self.start = start self.step = step def __call__(self, shape): a = np.arange(0, shape[-1], 1) return np.broadcast_to(self.start + a * self.step, shape)
[docs]class ConstantInitializer(BaseInitializer): """Generates a constant valued array. Args: value (float): A constant value. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) w = I.ConstantInitializer(0.1) b = I.ConstantInitializer() # this generates constant valued array of default value 0 h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv' """ def __init__(self, value=0): self.value = value def __call__(self, shape): return np.ones(shape) * self.value
[docs]class OrthogonalInitializer(BaseInitializer): r"""Generates an orthogonal matrix weights proposed by Saxe et al. Args: gain (float): scaling factor which should be decided depending on a type of units. rng (numpy.random.RandomState): Random number generator. Example: .. code-block:: python import numpy as np import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) w = I.OrthogonalInitializer(np.sqrt(2.0)) b = I.ConstantInitializer(0.0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') References: * `Saxe, et al. Exact solutions to the nonlinear dynamics of learning in deep linear neural networks. <https://arxiv.org/abs/1312.6120>`_ """ def __init__(self, gain=1.0, rng=None): if rng is None: rng = random.prng self.rng = rng self.gain = gain def __repr__(self): return '{}({})'.format(self.__class__.__name__, self.gain) def __call__(self, shape): flat_shape = (shape[0], int(np.prod(shape[1:]))) x = self.rng.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(x, full_matrices=False) q = u if u.shape == flat_shape else v return q.reshape(shape).astype('float32') * self.gain
[docs]def calc_normal_std_he_forward(inmaps, outmaps, kernel=(1, 1)): r"""Calculates the standard deviation proposed by He et al. .. math:: \sigma = \sqrt{\frac{2}{NK}} Args: inmaps (int): Map size of an input Variable, :math:`N`. outmaps (int): Map size of an output Variable, :math:`M`. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel spatial shape. In above definition, :math:`K` is the product of shape dimensions. In Affine, the default value should be used. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) s = I.calc_normal_std_he_forward(x.shape[1],64) w = I.NormalInitializer(s) b = I.ConstantInitializer(0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') References: * `He, et al. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. <https://arxiv.org/abs/1502.01852>`_ """ return np.sqrt(2. / (np.prod(kernel) * inmaps))
[docs]def calc_normal_std_he_backward(inmaps, outmaps, kernel=(1, 1)): r"""Calculates the standard deviation of He et al. (backward case). .. math:: \sigma = \sqrt{\frac{2}{MK}} Args: inmaps (int): Map size of an input Variable, :math:`N`. outmaps (int): Map size of an output Variable, :math:`M`. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel spatial shape. In above definition, :math:`K` is the product of shape dimensions. In Affine, the default value should be used. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) s = I.calc_normal_std_he_backward(x.shape[1],64) w = I.NormalInitializer(s) b = I.ConstantInitializer(0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') References: * `He, et al. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. <https://arxiv.org/abs/1502.01852>`_ """ return np.sqrt(2. / (np.prod(kernel) * outmaps))
[docs]def calc_normal_std_glorot(inmaps, outmaps, kernel=(1, 1)): r"""Calculates the standard deviation proposed by Glorot et al. Note: We have updated the definition as following from v.1.2. It may affect the behavior of existing scripts that rely on the default initialization. .. math:: \sigma = \sqrt{\frac{2}{K(N + M)}} Args: inmaps (int): Map size of an input Variable, :math:`N`. outmaps (int): Map size of an output Variable, :math:`M`. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel spatial shape. In above definition, :math:`K` is the product of shape dimensions. In Affine, the default value should be used. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) s = I.calc_normal_std_glorot(x.shape[1],64) w = I.NormalInitializer(s) b = I.ConstantInitializer(0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') References: * `Glorot and Bengio. Understanding the difficulty of training deep feedforward neural networks <http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf>`_ """ return np.sqrt(2. / (np.prod(kernel) * (inmaps + outmaps)))
[docs]def calc_uniform_lim_glorot(inmaps, outmaps, kernel=(1, 1)): r"""Calculates the lower bound and the upper bound of the uniform distribution proposed by Glorot et al. Note: We have updated the definition as following from v.1.3. It may affect the behavior of existing scripts that rely on the default initialization. .. math:: b &= \sqrt{\frac{6}{K(N + M)}}\\ a &= -b Args: inmaps (int): Map size of an input Variable, :math:`N`. outmaps (int): Map size of an output Variable, :math:`M`. kernel (:obj:`tuple` of :obj:`int`): Convolution kernel spatial shape. In above definition, :math:`K` is the product of shape dimensions. In Affine, the default value should be used. Example: .. code-block:: python import nnabla as nn import nnabla.parametric_functions as PF import nnabla.initializer as I x = nn.Variable([60,1,28,28]) lb,ub= I.calc_uniform_lim_glorot(x.shape[1],64) w = I.UniformInitializer((lb,ub)) b = I.ConstantInitializer(0) h = PF.convolution(x, 64, [3, 3], w_init=w, b_init=b, pad=[1, 1], name='conv') References: * `Glorot and Bengio. Understanding the difficulty of training deep feedforward neural networks <http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf>`_ """ d = np.sqrt(6. / (np.prod(kernel) * (inmaps + outmaps))) return -d, d