Source code for nnabla.experimental.parametric_function_class.batch_normalization

# Copyright (c) 2017 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import nnabla as nn
import nnabla.functions as F
from nnabla.parameter import get_parameter_or_create, get_parameter
from nnabla.initializer import (
    calc_uniform_lim_glorot,
    ConstantInitializer, NormalInitializer, UniformInitializer)

from .module import Module


[docs]class BatchNormalization(Module): """ Batch normalization layer. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon }}\\\\ y_i &= & \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Mean and variance for each element in ``axes`` are calculated using elements on the rest axes. For example, if an input is 4 dimensions, and ``axes`` is ``[1]``, batch mean is calculated as ``np.mean(inp.d, axis=(0, 2, 3), keepdims=True)`` (using numpy expression as an example). decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. fix_parameters (bool): When set to `True`, the beta and gamma will not be updated. param_init (dict): Parameter initializers can be set with a dict. A key of the dict must be ``'beta'``, ``'gamma'``, ``'mean'`` or ``'var'``. A value of the dict must be an :obj:`~nnabla.initializer.Initializer` or a :obj:`numpy.ndarray`. E.g. ``{'beta': ConstantIntializer(0), 'gamma': np.ones(gamma_shape) * 2}``. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 The shape of parameters has the same number of dimensions with the input data, and the shapes in ``axes`` has the same dimensions with the input, while the rest has ``1``. If an input is 4-dim and ``axes=[1]``, the parameter shape will be ``param_shape = np.mean(inp.d, axis=(0, 2, 3), keepdims=True).shape`` (using numpy expression as an example). """ def __init__(self, n_features, n_dims, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False, param_init=None): assert len(axes) == 1 shape_stat = [1 for _ in range(n_dims)] shape_stat[axes[0]] = n_features if param_init is None: param_init = {} beta_init = param_init.get('beta', ConstantInitializer(0)) gamma_init = param_init.get('gamma', ConstantInitializer(1)) mean_init = param_init.get('mean', ConstantInitializer(0)) var_init = param_init.get('var', ConstantInitializer(1)) beta = nn.Variable.from_numpy_array( beta_init(shape_stat)).apply(need_grad=not fix_parameters) gamma = nn.Variable.from_numpy_array(gamma_init( shape_stat)).apply(need_grad=not fix_parameters) mean = nn.Variable.from_numpy_array(mean_init(shape_stat)) var = nn.Variable.from_numpy_array(var_init(shape_stat)) self.beta = beta self.gamma = gamma self.mean = mean self.var = var self.axes = axes self.decay_rate = decay_rate self.eps = eps self.output_stat = output_stat def __call__(self, inp, test=False): return F.batch_normalization(inp, self.beta, self.gamma, self.mean, self.var, self.axes, self.decay_rate, self.eps, not test, self.output_stat)
[docs]class BatchNorm1d(BatchNormalization): """ Batch normalization layer for 3d-Array or 3d-Variable. This is typically used together with `Conv1d`. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon }}\\\\ y_i &= & \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Mean and variance for each element in ``axes`` are calculated using elements on the rest axes. For example, if an input is 4 dimensions, and ``axes`` is ``[1]``, batch mean is calculated as ``np.mean(inp.d, axis=(0, 2, 3), keepdims=True)`` (using numpy expression as an example). decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. fix_parameters (bool): When set to `True`, the beta and gamma will not be updated. param_init (dict): Parameter initializers can be set with a dict. A key of the dict must be ``'beta'``, ``'gamma'``, ``'mean'`` or ``'var'``. A value of the dict must be an :obj:`~nnabla.initializer.Initializer` or a :obj:`numpy.ndarray`. E.g. ``{'beta': ConstantIntializer(0), 'gamma': np.ones(gamma_shape) * 2}``. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 The shape of parameters has the same number of dimensions with the input data, and the shapes in ``axes`` has the same dimensions with the input, while the rest has ``1``. If an input is 4-dim and ``axes=[1]``, the parameter shape will be ``param_shape = np.mean(inp.d, axis=(0, 2, 3), keepdims=True).shape`` (using numpy expression as an example). """ def __init__(self, n_features, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False, param_init=None): super(BatchNorm1d, self).__init__(n_features, 3, axes=axes, decay_rate=decay_rate, eps=1e-5, batch_stat=batch_stat, output_stat=output_stat, fix_parameters=fix_parameters, param_init=param_init)
[docs]class BatchNorm2d(BatchNormalization): """ Batch normalization layer for 4d-Array or 4d-Variable. This is typically used together with `Conv2d`. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon }}\\\\ y_i &= & \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Mean and variance for each element in ``axes`` are calculated using elements on the rest axes. For example, if an input is 4 dimensions, and ``axes`` is ``[1]``, batch mean is calculated as ``np.mean(inp.d, axis=(0, 2, 3), keepdims=True)`` (using numpy expression as an example). decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. fix_parameters (bool): When set to `True`, the beta and gamma will not be updated. param_init (dict): Parameter initializers can be set with a dict. A key of the dict must be ``'beta'``, ``'gamma'``, ``'mean'`` or ``'var'``. A value of the dict must be an :obj:`~nnabla.initializer.Initializer` or a :obj:`numpy.ndarray`. E.g. ``{'beta': ConstantIntializer(0), 'gamma': np.ones(gamma_shape) * 2}``. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 The shape of parameters has the same number of dimensions with the input data, and the shapes in ``axes`` has the same dimensions with the input, while the rest has ``1``. If an input is 4-dim and ``axes=[1]``, the parameter shape will be ``param_shape = np.mean(inp.d, axis=(0, 2, 3), keepdims=True).shape`` (using numpy expression as an example). """ def __init__(self, n_features, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False, param_init=None): super(BatchNorm2d, self).__init__(n_features, 4, axes=axes, decay_rate=decay_rate, eps=1e-5, batch_stat=batch_stat, output_stat=output_stat, fix_parameters=fix_parameters, param_init=param_init)
[docs]class BatchNorm3d(BatchNormalization): """ Batch normalization layer for 5d-Array or 5d-Variable. This is typically used together with `Conv3d`. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon }}\\\\ y_i &= & \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Mean and variance for each element in ``axes`` are calculated using elements on the rest axes. For example, if an input is 4 dimensions, and ``axes`` is ``[1]``, batch mean is calculated as ``np.mean(inp.d, axis=(0, 2, 3), keepdims=True)`` (using numpy expression as an example). decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. fix_parameters (bool): When set to `True`, the beta and gamma will not be updated. param_init (dict): Parameter initializers can be set with a dict. A key of the dict must be ``'beta'``, ``'gamma'``, ``'mean'`` or ``'var'``. A value of the dict must be an :obj:`~nnabla.initializer.Initializer` or a :obj:`numpy.ndarray`. E.g. ``{'beta': ConstantIntializer(0), 'gamma': np.ones(gamma_shape) * 2}``. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 The shape of parameters has the same number of dimensions with the input data, and the shapes in ``axes`` has the same dimensions with the input, while the rest has ``1``. If an input is 4-dim and ``axes=[1]``, the parameter shape will be ``param_shape = np.mean(inp.d, axis=(0, 2, 3), keepdims=True).shape`` (using numpy expression as an example). """ def __init__(self, n_features, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False, param_init=None): super(BatchNorm3d, self).__init__(n_features, 5, axes=axes, decay_rate=decay_rate, eps=1e-5, batch_stat=batch_stat, output_stat=output_stat, fix_parameters=fix_parameters, param_init=param_init)