Source code for nnabla.function_bases

# Copyright 2018,2019,2020,2021 Sony Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
# *WARNING*
# THIS FILE IS AUTO-GENERATED BY CODE GENERATOR.
# PLEASE DO NOT EDIT THIS FILE BY HAND!
# If you want to modify this file, edit following files.
# - python/src/nnabla/function_bases.py.tmpl
# - build-tools/code_generator/generate.py

from __future__ import absolute_import

from .context import get_current_context
from . import function as F
from .auto_forward import get_auto_forward

# Templates for function_api source building.
FUNCTION_API_HEADER = "def {name}{signature}:"
FUNCTION_API_BODY = '''ctx = get_current_context()
return _func_(ctx, {shortsignature})'''

def function_api(func):
    """
    Decorator for making function called with current context.
    Some tricky things are done here so that signature and docstring are available.
    """
    from .utils.signature_utils import SignatureEx

    name = func.__name__
    doc = func.__doc__
    if doc is None:
        doc = "No docstring."

    # Parsing argspecs
    sig = SignatureEx.from_callable(func)
    sig = sig.drop_arg('ctx', raise_if_not_found=True)
    # Creating signature
    # e.g. (x, weights, biases=None, n_outputs=None)
    signature = '(' + sig.format_argument_signature() + ')' + sig.format_return_annotation()
    # Creating signature without params and defaults
    # e.g. x, weights, biases, n_outputs
    shortsignature = sig.format_caller_argument_signature()

    # Create code by string
    src = (FUNCTION_API_HEADER + '\n' + '\n'.join(map(lambda x: '    ' +
                                                      x, FUNCTION_API_BODY.splitlines()))).format(**locals())

    # Evaluate source code from string
    code = compile(src, "<{name}>".format(**locals()), 'single')
    execdict = dict(_func_=func, get_current_context=get_current_context)
    exec(code, execdict)

    # Get created function.
    newfunc = execdict[name]
    # DOC newfunc.__doc__ = FUNCTION_API_DOC.format(**locals())
    doc += '''

    Note:
        All nnabla functions in :obj:`nnabla.functions` are decorated with the :obj:`nnabla.function_bases.function_api` decorator,
        which queries the current context and passes it into the first argument of the
        original function. The original function always takes a context as the first argument.

    '''
    newfunc.__doc__ = doc
    newfunc.__source__ = src
    newfunc.__function_api_base__ = func
    newfunc.__module__ = __name__
    return newfunc





[docs]@function_api
def affine(ctx, x, weight, bias=None, base_axis=1, n_outputs=-1, outputs=None):
    r"""
    Affine layer, also called as the fully connected layer. It calculates:
    
    .. math::
        {\mathbf y} = {\mathbf A} {\mathbf x} + {\mathbf b}.
    
    where :math:`{\mathbf x}` is the input and :math:`{\mathbf y}` is the output.
    
    Args:
        x(~nnabla.Variable): Input N-D array with shape (:math:`M_0 \times ... \times M_{B-1} \times D_B \times ... \times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        weight(~nnabla.Variable): Weight matrix with shape (:math:`(D_B \times ... \times D_N) \times L_{0} \times \ldots \times L_{I}`)
            [parameter]
        bias(~nnabla.Variable): Bias vector (:math:`L_{0} \times \ldots \times L_{I}`)
            [optional][parameter]
        base_axis(int): Base axis of Affine operation. Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: :math:`(B + 1)`-D array. (:math:`M_0 \times ... \times M_{B-1} \times L_{0} \times \ldots \times L_{I}`)
    """
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    return F.Affine(ctx, base_axis)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def rnn(ctx, x, h, weight_l0, weight=None, bias=None, num_layers=1, nonlinearity='tanh', dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None):
    r"""
    RNN function implements Elman RNN with nonlinearity to input sequence.
    RNN function is defined as following:
    
    .. math::
        {\mathbf h_t} = {\mathbf \tanh}( {\mathbf w_{ih}} *{\mathbf x_t} + {\mathbf b_{ih}} + {\mathbf w_{hh}}* {\mathbf h_{(t-1)}} + {\mathbf b_{hh}}).
    
    We use the following notations to describe the inputs and outputs below.
    :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size.
    
    References:
        * `Jeffrey Elman, Finding Structure in Time. <https://crl.ucsd.edu/~elman/Papers/fsit.pdf>`_
    
    Args:
        x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`.
        h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`.
        weight_l0(~nnabla.Variable): Input N-D array with shape :math:`(D, H, I + H)`.
            [parameter]
        weight(~nnabla.Variable): Input N-D array with shape :math:`(L-1, D, H, D * H + H)`.
            [optional][parameter]
        bias(~nnabla.Variable): Input N-D array with shape :math:`(L, D, H)`.
            [optional][parameter]
        num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1.
            [default= `1` ]
        nonlinearity(string): Type of nonlinearity applied to input sequcne. Must be either tanh or relu. Default is tanh.
            [default= `'tanh'` ]
        dropout(float): Dropout ratio applied to parameters. Default is 0.0.
            [default= `0.0` ]
        bidirectional(bool): If True, bidirectional computation will be performed in each layer. Default is False.
            [default= `False` ]
        training(bool): Backpropagation will be performed only when it is true. Default is True.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)`
        ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)`
    """
    if dropout is None:
        dropout = 0.0
    inputs = [x, h, weight_l0]
    if weight is not None:
        inputs += [weight]
    if bias is not None:
        inputs += [bias]
    return F.RNN(ctx, num_layers, nonlinearity, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def lstm(ctx, x, h, c, weight_l0, weight=None, bias=None, num_layers=1, dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None):
    r"""
    N-Step LSTM layer.
    
    .. math::
        {\mathbf f_t} &=& {\mathbf \sigma}( {\mathbf W_f} *{\mathbf x_t} + {\mathbf U_f}* {\mathbf h_{(t-1)}} + {\mathbf b_f})\\
        {\mathbf i_t} &=& {\mathbf \sigma}( {\mathbf W_i} *{\mathbf x_t} + {\mathbf U_i}* {\mathbf h_{(t-1)}} + {\mathbf b_i})\\
        {\mathbf o_t} &=& {\mathbf \sigma}( {\mathbf W_o} *{\mathbf x_t} + {\mathbf U_o}* {\mathbf h_{(t-1)}} + {\mathbf b_o})\\
        {\mathbf c_t} &=& {\mathbf f_t}\odot {\mathbf c_{(t-1)}} + {\mathbf i_t}\odot {\mathbf \tanh}({\mathbf W_c}*{\mathbf x_t} + {\mathbf U_c} *{\mathbf h_{(t-1)}} + {\mathbf b_c})\\
        {\mathbf h_t} &=& {\mathbf o_t} \odot {\mathbf \tanh}({\mathbf c_t}).
    
    We use the following notations to describe the inputs and outputs below.
    :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size.
    
    References:
        * `S. Hochreiter and J. Schmidhuber, Long Short-Term Memory. <https://www.bioinf.jku.at/publications/older/2604.pdf>`_
    
    Args:
        x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`.
        h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`.
        c(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`.
        weight_l0(~nnabla.Variable): weight parameters for the first layer. Shape is :math:`(D, 4, H, I + H)`.
            [parameter]
        weight(~nnabla.Variable): weight parameters for the second layer and above. Shape is :math:`(L-1, D, 4, H, D * H + H)`.
            [optional][parameter]
        bias(~nnabla.Variable): Bias vector (:math:`L`). Shape is :math:`(L, D, 4, H)`.
            [optional][parameter]
        num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1.
            [default= `1` ]
        dropout(float): Dropout ratio applied to parameters. Default is 0.0.
            [default= `0.0` ]
        bidirectional(bool): If True, bidirecitonal computation will be performed in each layer. Default is False.
            [default= `False` ]
        training(bool): Backpropagation will be performed only when it is True. Default is True.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)`. Its memory layout can be reshaped as :math:`(T, B, D, H)`.
        ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)`
        ~nnabla.Variable: Output :math:`c_n` with shape :math:`(L, D, B, H)`
    """
    if dropout is None:
        dropout = 0.0
    inputs = [x, h, c, weight_l0]
    if weight is not None:
        inputs += [weight]
    if bias is not None:
        inputs += [bias]
    return F.LSTM(ctx, num_layers, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def gru(ctx, x, h, weight_l0, weight=None, bias=None, num_layers=1, dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None):
    r"""
    N-Step GRU layer.
    
    .. math::
        {\mathbf r_t} &=& {\mathbf \sigma}( {\mathbf W_r} *{\mathbf x_t} + {\mathbf U_r}* {\mathbf h_{(t-1)}} + {\mathbf b_r})\\
        {\mathbf z_t} &=& {\mathbf \sigma}( {\mathbf W_z} *{\mathbf x_t} + {\mathbf U_z}* {\mathbf h_{(t-1)}} + {\mathbf b_z})\\
      {\mathbf n_t} &=& {\mathbf \tanh}( {\mathbf W_n}{\mathbf x_t}+ {\mathbf b_{in}}+ {\mathbf r_n}\odot( {\mathbf U_n}{\mathbf h_{t-1}}+ {\mathbf b_{hn}})) \\
      {\mathbf h_t} &=& (1- {\mathbf z_t})\odot {\mathbf n_t} + {\mathbf z_t}\odot {\mathbf h_{t-1}}.
    
    We use the following notations to describe the inputs and outputs below.
    :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size.
    
    References:
    
        * `K. cho et al., Learning Phrases Representations using RNN Encoder-Decoder for Statistical Machine Translation. <https://www.aclweb.org/anthology/D14-1179>`_
    
    Args:
        x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`.
        h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`.
        weight_l0(~nnabla.Variable): weight parameters for the first layer. Shape is :math:`(D, 3, H, I + H)`.
            [parameter]
        weight(~nnabla.Variable): weight parameters for the second layer and above. Shape is :math:`(L-1, D, 3, H, D * H + H)`.
            [optional][parameter]
        bias(~nnabla.Variable): Bias vector (:math:`L`). Shape is :math:`(L, D, 4, H)`.
            [optional][parameter]
        num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1.
            [default= `1` ]
        dropout(float): Dropout ratio applied to parameters. Default is 0.0.
            [default= `0.0` ]
        bidirectional(bool): If True, bidirecitonal computation will be performed in each layer. Default is False.
            [default= `False` ]
        training(bool): Backpropagation will be performed only when it is True. Default is True.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)`. Its memory layout can be reshaped as :math:`(T, B, D, H)`.
        ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)`
    """
    if dropout is None:
        dropout = 0.0
    inputs = [x, h, weight_l0]
    if weight is not None:
        inputs += [weight]
    if bias is not None:
        inputs += [bias]
    return F.GRU(ctx, num_layers, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def convolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    N-D Convolution with bias.
    
    See references for dilated convolution (a.k.a. atrous convolution).
    
    References:
    
        * `Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional
          Nets, Atrous Convolution, and Fully Connected CRFs.
          <https://arxiv.org/abs/1606.00915>`_
    
        * `Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions.
          <https://arxiv.org/abs/1511.07122>`_
    
    Note:
    
      Convolution is a computationally intensive operation that
      should preferrably be run with the `cudnn` backend. NNabla
      then uses CuDNN library functions to determine and cache the
      fastest algorithm for the given set of convolution parameters,
      which results in additional memory consumption which may pose
      a problem for GPUs with insufficient memory size. In that
      case, the `NNABLA_CUDNN_WORKSPACE_LIMIT` environment variable
      can be used to restrict the choice of algorithms to those that
      fit the given workspace memory limit, expressed in bytes. In
      some cases it may also be desired to restrict the automatic
      search to algorithms that produce deterministic (reproducable)
      results. This can be requested by setting the the environment
      variable `NNABLA_CUDNN_DETERMINISTIC` to a non-zero value.
    
    Args:
        x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`).
        weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C' \times C \times K_1 \times ... \times K_N`).
            [parameter]
        bias(~nnabla.Variable): Bias vector (:math:`C'`).
            [optional][parameter]
        base_axis(int): base axis :math:`B`.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: 
            :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`).
            
            A spatial size of the output is calculated as
            
            .. math::
            
              L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1,
            
            where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions.
            
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    return F.Convolution(ctx, base_axis, pad, stride, dilation, group, channel_last)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def fused_convolution(ctx, x, weight, bias=None, beta=None, gamma=None, mean=None, variance=None, z=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, decay_rate=0.9, eps=1e-05, batch_stat=True, nonlinearity='relu', nonlinearity_args=list(), pad_mode='constant', constant_value=0, n_outputs=-1, outputs=None):
    r"""
    Fused operation of Pad, Convolution, Batch Normalization, Add2 and Activation.
    
    This is an equivalent operation to the following,
    but may be more computationally efficient depending on the backend implementation
    (currently we don't provide an efficient implementation on any backend).
    
    .. code-block:: python
    
      h = F.pad(x, *pad_opts)
      h = F.convolution(h, weight, bias, pad=(0, ...), *conv_opts)
      h = F.batch_normalization(h, beta, gamma, mean, variance, *bn_opts)
      y = F.relu(h + z)
    
    You can optionally disable either of pad, batch normalization, residual addition and activation.
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        weight(~nnabla.Variable): `weight` in :meth:`~nnabla.functions.convolution`.
            
            [parameter]
        bias(~nnabla.Variable): `bias` in :meth:`~nnabla.functions.convolution`.
            
            [optional][parameter]
        beta(~nnabla.Variable): `beta` in :meth:`~nnabla.functions.batch_normalization`.
            
            [optional][parameter]
        gamma(~nnabla.Variable): `gamma` in :meth:`~nnabla.functions.batch_normalization`.
            
            [optional][parameter]
        mean(~nnabla.Variable): `mean` in :meth:`~nnabla.functions.batch_normalization`.
            
            [optional]
        variance(~nnabla.Variable): `variance` in :meth:`~nnabla.functions.batch_normalization`.
            
            [optional]
        z(~nnabla.Variable): N-D array of a residual input. By specifying None, the activation function will follow immediately after BN operation.
            [optional]
        base_axis(int): `base_axis` in :meth:`~nnabla.functions.convolution`. Note that the batch normalization `axes` is determined by this and `channel_last` option.
            
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): `pad_width` in :meth:`~nnabla.functions.pad`.
            If `len(pad) == (len(x.shape) - (base_axis+1))`, considered as `pad` in :meth:`~nnabla.functions.convolution`.
            
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): `stride` in :meth:`~nnabla.functions.convolution`.
            
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): `dilation` in :meth:`~nnabla.functions.convolution`.
            
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): `group` in :meth:`~nnabla.functions.convolution`.
            
            [default= `1` ]
        channel_last(bool): `channel_last` in :meth:`~nnabla.functions.convolution`.group
            
            [default= `False` ]
        decay_rate(float): `decay_rate` in :meth:`~nnabla.functions.batch_normalization`.
            
            [default= `0.9` ]
        eps(float): `eps` in :meth:`~nnabla.functions.batch_normalization`.
            
            [default= `1e-05` ]
        batch_stat(bool): `batch_stat` in :meth:`~nnabla.functions.batch_normalization`.
            
            [default= `True` ]
        nonlinearity(string): Activation type as string. The following is a list of available activation types
            and optional parameters specified as a vector of float by `nonlinearity_args`.
            
            =============== ===============================
            Activation type Arguments (`nonlinearity_args`)
            =============== ===============================
            identity        No argument
            relu            No argument
            sigmoid         No argument
            tanh            No argument
            leaky_relu      [alpha] (see LeakyReLU doc)
            elu             [alpha] (see ELU doc)
            relu6           No argument
            =============== ===============================
            
            [default= `'relu'` ]
        nonlinearity_args(repeated float): Optional arguments of nonlinearity as a vector of float.
            See the description of the `nonlinearity` argument.
            
            [default= `list()` ]
        pad_mode(string): `mode` in :meth:`~nnabla.functions.pad`.
            
            [default= `'constant'` ]
        constant_value(float): `constant_value` in :meth:`~nnabla.functions.pad`.
            
            [default= `0` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    if mean is not None:
        inputs += [mean]
    if variance is not None:
        inputs += [variance]
    if z is not None:
        inputs += [z]
    return F.FusedConvolution(ctx, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def depthwise_convolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, multiplier=1, n_outputs=-1, outputs=None):
    r"""
    N-D Depthwise Convolution with bias.
    
    References:
    
        * `F. Chollet. Xception: Deep Learning with Depthwise Separable Convolutions.
          <https://arxiv.org/abs/1610.02357>`_
    
    Args:
        x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`).
        weight(~nnabla.Variable): :math:`(1 + N)`-D array (:math:`C \times K_1 \times ... \times K_N`).
            [parameter]
        bias(~nnabla.Variable): Bias vector (:math:`C'`).
            [optional][parameter]
        base_axis(int): base axis :math:`B`.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        multiplier(int): Number of output feature maps per input feature map.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: 
            :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`).
            
            The output map size :math:`C'` is :math:`C` multiplied by :math:`m`
            
            .. math::
            
              C' =  m \times C,
            
            where :math:`m` is the multiplier.
            
            A spatial size of the output is calculated as
            
            .. math::
            
              L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1,
            
            where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions.
            
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    return F.DepthwiseConvolution(ctx, base_axis, pad, stride, dilation, multiplier)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def deconvolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, output_padding=None, n_outputs=-1, outputs=None):
    r"""
    N-D deconvolution, also known as transposed convolution, with bias operates backward convolution (derivative of the output w.r.t. the input) plus channel-wise learned bias.
    
    The weights are specified in the same manner as :meth:`~nnabla.functions.convolution` , as if it was an ordinary convolution function.
    The forward operation of :meth:`~nnabla.functions.deconvolution` will then be operationally equivalent to the backward pass of :meth:`~nnabla.functions.convolution` .
    Therefore, the number of input channels (can be seen as output channels of forward convolution) is specified in the first dimension, and the number of the output channels divided by the number of groups is specified in the second dimension.
    
    For `stride > 1`, a parameter-wise identical deconvolution on the output
    of a convolution may not produce the same output shape as the input to
    the convolution if, due to striding, the convolution did not fully cover
    the input spatial dimension. The `output_padding` parameter can then be
    used to appropriately increase the calculated output shape. Note that
    this is used to find the output shape for the deconvolution operation,
    but not to add zero-padding to the output.
    
    Args:
        x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`).
        weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C \times C' \times K_1 \times ... \times K_N`).
            [parameter]
        bias(~nnabla.Variable): Bias vector (:math:`C'`).
            [optional][parameter]
        base_axis(int): base axis :math:`B`.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]
        output_padding(:obj:`tuple` of :obj:`int`): Additional size added to the output shape.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]

    Returns:
        ~nnabla.Variable: 
            :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`).
            
            A spatial size of the output is calculated as
            
            .. math::
            
              L'_i =s_i (L_i - 1) - 2 p_i + d_i (k_i - 1) + 1,
            
            where :math:`s_i` is the stride, :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, and :math:`k_i` is the kernel size for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions.
            
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    if output_padding is None:
        output_padding = (0,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    return F.Deconvolution(ctx, base_axis, pad, stride, dilation, group, channel_last, output_padding)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def depthwise_deconvolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, divisor=1, n_outputs=-1, outputs=None):
    r"""
    Depthwise deconvolution computes the transposed depthwise convolution with bias for one-dimensional and two-dimensional input data.
    
    Args:
        x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`).
        weight(~nnabla.Variable): :math:`(1 + N)`-D array (:math:`C \times K_1 \times ... \times K_N`).
            [parameter]
        bias(~nnabla.Variable): Bias vector (:math:`C'`).
            [optional][parameter]
        base_axis(int): base axis :math:`B`.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        divisor(int): Number of input feature maps per output feature map.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: 
            :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`).
            
            The output map size :math:`C'` is :math:`C` multiplied by :math:`m`
            
            .. math::
            
              C' =  \frac{C}{d},
            
            where :math:`d` is the divisor.
            
            A spatial size of the output is calculated as
            
            .. math::
              L'_i =s_i (L_i - 1) - 2 p_i + d_i (k_i - 1) + 1,
            
            where :math:`s_i` is the stride, :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, and :math:`k_i` is the kernel size for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions.
            
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight]
    if bias is not None:
        inputs += [bias]
    return F.DepthwiseDeconvolution(ctx, base_axis, pad, stride, dilation, divisor)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def deformable_convolution(ctx, x, weight, offset, mask=None, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, deformable_group=1, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    2-D Deformable Convolution with bias.
    Another convolution with fixed output channels must be passed externally to calculate the offsets and mask.
    Mask should be normalized to :math:`[0,1]` interval.
    
    .. math::
      \begin{eqnarray}
        y(p) = \sum_{k=1}^{K} w_k \cdot x(p + p_k + \Delta p_k) \cdot \Delta m_k,
      \end{eqnarray}
    
    where :math:`x` and :math:`y` are input and output, :math:`w_k` is the weight, :math:`p` is the pixel location of interest, :math:`p_k` is the fixed displacement e.g., :math:`p_k \in \{(-1, -1), (-1, 0), \ldots (1, 1)\}` for the 2D 3x3 receptive field, :math:`\Delta p_k` is the learnable displacement, and :math:`\Delta m_k` is the learnable scale normalized in :math:`[0, 1]` by a function like the sigmoid. Note that :math:`\Delta p_k` and :math:`\Delta m_k` are sample-dependent, location-dependent, and feature-independent.
    
    References:
    
        * `Dai et al., Deformable Convolutional Networks.
          <https://arxiv.org/abs/1703.06211>`_
    
        * `Zhu et al., Deformable ConvNets v2: More Deformable, Better Results.
          <https://arxiv.org/abs/1811.11168>`_
    
    Args:
        x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`).
        weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C' \times C \times K_1 \times ... \times K_N`).
            [parameter]
        offset(~nnabla.Variable): Offsets for deformable convolutions. Shape is fixed to :math:`(N, deformable{\_}group \times 2 \times Kh \times Kw, H, W)`. Offsets must be calculated externally through a separate convolution layer.
        mask(~nnabla.Variable): Normalized mask for deformable convolutions v2. Shape is fixed to :math:`(N, deformable{\_}group \times Kh \times Kw, H, W)`. Masks must be calculated externally together with the offsets through a separate convolution layer.
            [optional]
        bias(~nnabla.Variable): Bias vector (:math:`C'`).
            [optional][parameter]
        base_axis(int): base axis :math:`B`.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        deformable_group(int): Number of deformable groups of channels.
            [default= `1` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: 
            :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`).
            
            A spatial size of the output is calculated as
            
            .. math::
            
              L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1,
            
            where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions.
            
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight, offset]
    if mask is not None:
        inputs += [mask]
    if bias is not None:
        inputs += [bias]
    return F.DeformableConvolution(ctx, base_axis, pad, stride, dilation, group, deformable_group, channel_last)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def adaptive_separable_convolution(ctx, x, vertical_kernel, horizontal_kernel, n_outputs=-1, outputs=None):
    r"""
    2-D Adaptive Separable Convolution for NCHW (the channel-first tensor).
    Sample and pixel dependent vertical and horizontal kernels are dynamically generated ones,
    which are used for approximating a feature-independent 2-D kernel in this function.
    Thus, the kernel used in this function is dependent on samples and pixels but independent on features.
    
    If the padding is needed, use the pad function to the input :math:`x` before this function.
    
    Adaptive separable convolution is formulated as
    
    .. math::
    
      \tilde{I}(c, h, w) = \sum_{j, i} K_v(j, h, w) \times K_h(i, h, w) \times I(c, h + j, w + i),
    
    where :math:`I(c, h, w)` and :math:`\tilde{I}(c, h, w)` are the input and output images
    at :math:`c`-th channel, :math:`h`-th height, :math:`w`-th width.
    :math:`K_V(:, h, w)` and :math:`K_h(:, h, w)` are vertical and horizontal 1-D kernels
    at :math:`h`-th height and :math:`w`-th width.
    
    References:
    
        * `Simon Niklaus, Long Mai, Feng Liu,
          Video Frame Interpolation via Adaptive Separable Convolution,
          <https://arxiv.org/abs/1708.01692>`_
    
        * `Mart Kartasev, Carlo Rapisarda, Dominik Fay,
          Implementing Adaptive Separable Convolution for Video Frame Interpolation,
          <https://arxiv.org/abs/1809.07759>`_
    
    Args:
        x(~nnabla.Variable): :math:`4-D` array (:math:`B \times C \times H \times W`)
        vertical_kernel(~nnabla.Variable): :math:`4-D` array (:math:`B \times K_v \times H \times W`)
        horizontal_kernel(~nnabla.Variable): :math:`4-D` array (:math:`B \times K_h \times H \times W`)

    Returns:
        ~nnabla.Variable: :math:`4-D` array (:math:`B \times C \times H - K_v + 1 \times W - K_h + 1`)
    """
    return F.AdaptiveSeparableConvolution(ctx)(x, vertical_kernel, horizontal_kernel, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def max_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    Max pooling. It pools the maximum values inside the scanning kernel:
    
    .. math::
        y_{i_1, i_2} = \max_{k_1, k_2 \in K} (x_{i_1 + k_1, i_2 + k_2})
    
    where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output.
    
    Args:
        x(~nnabla.Variable): Input variable.
        kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis.
        stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis.
            [default= `kernel` ]
        ignore_border(bool): If false, kernels covering borders are also considered for the output.
            [default= `True` ]
        pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension.
            [default= `(0,) * len(kernel)` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Maximum values variable
    """
    if stride is None:
        stride = kernel
    if pad is None:
        pad = (0,) * len(kernel)
    return F.MaxPooling(ctx, kernel, stride, ignore_border, pad, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def average_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, including_pad=True, n_outputs=-1, outputs=None):
    r"""
    Average pooling. It pools the averaged values inside the scanning kernel:
    
    .. math::
        y_{i_1, i_2} = \frac{1}{K_1 K_2} \sum_{k1} \sum_{k2} x_{i_1 + k_1, i_2 + k_2}
    
    where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output.
    
    Args:
        x(~nnabla.Variable): Input variable.
        kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis.
        stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis.
            [default= `kernel` ]
        ignore_border(bool): If false, kernels covering borders are also considered for the output.
            [default= `True` ]
        pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension.
            [default= `(0,) * len(kernel)` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]
        including_pad(bool): If true, border padding values are considered for the output.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Average values variable
    """
    if stride is None:
        stride = kernel
    if pad is None:
        pad = (0,) * len(kernel)
    return F.AveragePooling(ctx, kernel, stride, ignore_border, pad, channel_last, including_pad)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def global_average_pooling(ctx, x, n_outputs=-1, outputs=None):
    r""".. WARNING::
      This function is experimental support, so please do not actively use it.
    
    Global average pooling. It pools an averaged value from the whole image
    
    Args:
        x(~nnabla.Variable): Input variable.

    Returns:
        ~nnabla.Variable: Average values variable
    """
    return F.GlobalAveragePooling(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sum_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    Sum pooling. It pools the summed values inside the scanning kernel:
    
    .. math::
        y_{i_1, i_2} = \sum_{k1} \sum_{k2} x_{i_1 + k_1, i_2 + k_2}
    
    where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output.
    
    Args:
        x(~nnabla.Variable): Input variable.
        kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis.
        stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis.
            [default= `kernel` ]
        ignore_border(bool): If false, kernels covering borders are also considered for the output.
            [default= `True` ]
        pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension.
            [default= `(0,) * len(kernel)` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Summed values variable
    """
    if stride is None:
        stride = kernel
    if pad is None:
        pad = (0,) * len(kernel)
    return F.SumPooling(ctx, kernel, stride, ignore_border, pad, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def unpooling(ctx, x, kernel, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    Inverse operation of pooling. It spreads the input values:
    
    .. math::
        y_{k_1 i_1 + j_1, k_2 i_2 + j_2} = x_{i_1, i_2}
    
    where :math:`_{i_1, i_2}` is the input and :math:`y_{k_1 i_1 + j_1, k_2 i_2 + j_2}` is the output.
    
    Args:
        x(~nnabla.Variable): Input variable.
        kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis.
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Spread values variable
    """
    return F.Unpooling(ctx, kernel, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def embed(ctx, x0, w, n_outputs=-1, outputs=None):
    r"""
    Embed slices of a matrix/tensor with indexing array/tensor.
    
    Args:
        x0(~nnabla.Variable): Indices with shape :math:`(I_0, ..., I_N)`
        w(~nnabla.Variable): Weights with shape :math:`(W_0, ..., W_M)`
            [parameter]

    Returns:
        ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    return F.Embed(ctx)(x0, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def roi_align(ctx, input, boxes, output_size, spatial_scale=(1.0, 1.0), sampling_ratio=None, channel_last=None, n_outputs=-1, outputs=None):
    r"""
      Map Regions of Interest (RoI) defined by bounding `boxes` to features
      of `output_size` height and width using bilinear interpolation with
      `sampling_ratio` points in the interpolation grid.
    
      >>> import numpy as np, nnabla as nn, nnabla.functions as F
      >>> nn.set_auto_forward(True)
      >>> input = F.pad(F.constant(1, (1, 1, 2, 2)) * 2, (1, 1, 1, 1), "constant", 1)
      >>> print(input.d)
      [[[[1. 1. 1. 1.]
         [1. 2. 2. 1.]
         [1. 2. 2. 1.]
         [1. 1. 1. 1.]]]]
      >>> boxes = nn.Variable.from_numpy_array([[0, 0, 0, 4, 4], [0, 1, 1, 3, 3]])
      >>> output = F.roi_align(input, boxes, (2, 2))
      >>> print(output.d[0])
      [[[[1.25 1.25]
         [1.25 1.25]]]
      >>> print(output.d[1])
      [[[2.   2.  ]
        [2.   2.  ]]]]
    
      The `spatial_scale` argument tuple may be used to appropriately scale
      the box coordinates, for example, to scale normalized box coordinate to
      the input height and width dimensions.
    
      >>> input = F.reshape(F.arange(1, 13), (1, 1, 3, 4))
      >>> print(input.d)
      >>> boxes = nn.Variable.from_numpy_array([[0, 1/4, 1/3, 3/4, 2/30]])
      >>> output = F.roi_align(input, boxes, (1, 2), spatial_scale=(3, 4))
      >>> print(input.d)
      [[[[6. 7.]]]]
    
      References:
    
          * `He et al., Mask R-CNN. <https://arxiv.org/abs/1703.06870v3>`_
    
    Args:
        input(~nnabla.Variable): N-D array with shape :math:`(N, H, W, C)` or :math:`(N, C, H, W)`.
        boxes(~nnabla.Variable): N-D array with shape :math:`(K, 5)` containing box coordinates in (b, x1, y1, x2, y2) format where b is the batch index. Note that an invalid (out-of-range) batch index will generate an error only when running on CPU; when using a GPU context the batch index values are clipped to the range of input samples.
        output_size(:obj:`tuple` of :obj:`int`): the height and width of the output feature maps.
        spatial_scale(repeated float): Scaling factor from box to input coordinates, as (x, y).
            [default= `(1.0, 1.0)` ]
        sampling_ratio(int): The number of sampling points used for interpolation. Computed as `ceil((y2 - y1) / output_size[0])` for height and likewise for width if `sampling_ratio <= 0`.
            [default= `-1` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: 
            N-D array with shape :math:`(K, C, output\_size[0], output\_size[1])`
            or :math:`(K, output\_size[0], output\_size[1], C)`.
            
    """
    if sampling_ratio is None:
        sampling_ratio = -1
    if channel_last is None:
        channel_last = False
    return F.RoiAlign(ctx, output_size, spatial_scale, sampling_ratio, channel_last)(input, boxes, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sigmoid(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise sigmoid function.
    
    .. math::
    
        f(x) = \frac{1}{1 + \exp(-x)},
    
    Args:
        x(~nnabla.Variable): Input

    Returns:
        ~nnabla.Variable: Output
    """
    return F.Sigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def swish(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise swish function, by Ramachandran et al. (2017).
    
    .. math::
    
        y_i = \frac{x_i}{1 + \exp(-x_i)},
    
    References:
        * `Prajit Ramachandran, Barret Zoph, and Quoc V. Le, Swish: a Self-Gated Activation Function, arXiv:1710.05941 [cs.NE]
          <https://arxiv.org/abs/1710.05941>`_
    Args:
        x(~nnabla.Variable): Input

    Returns:
        ~nnabla.Variable: Output
    """
    return F.Swish(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def tanh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic tangent (tanh) function.
    
    .. math::
        y_i = \tanh (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Tanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def relu(ctx, x, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise Rectified Linear Unit (ReLU) function.
    
    .. math::
        y_i = \max (0, x_i)
    
    Args:
        x(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ReLU(ctx, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def leaky_relu(ctx, x, alpha=0.1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise Leaky Rectified Linear Unit (ReLU) function.
    
    It is defined as:
    
    .. math::
        y_i = \alpha * \min(0, x_i) + \max (0, x_i)
    
    
    Args:
        x(~nnabla.Variable): N-D array
        alpha(float): The slope value multiplied to negative numbers. :math:`\alpha` in the definition.
            [default= `0.1` ]
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LeakyReLU(ctx, alpha, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def softmax(ctx, x, axis=None, n_outputs=-1, outputs=None):
    r"""
    Softmax normalization. Calculates
    
    .. math::
        y_i = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
    
    along the dimension specified by `axis`, where :math:`x_i` is the input and :math:`y_i` is the output.
    
    Args:
        x(~nnabla.Variable): N-D array. Typically indicates a score.
        axis(int): Axis normalization is taken.
            [default= `len(x.shape) - 1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.Softmax(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def log_softmax(ctx, x, axis=None, n_outputs=-1, outputs=None):
    r"""
    Fused operation of Softmax normalization followed by log, which is defined as
    
    .. math::
        y_i = \log \frac{\exp(x_i)}{\sum_j \exp(x_j)},
    
    where :math:`y_i` is the input and :math:`x_i` is the output at i-th channel.
    An advantage of this fusion is reducing the numerical instability due to the log application.
    
    The original definition can be rewritten as
    
    .. math::
        y_i = x_i - \max_j(x_j) - \log\left(\sum_j \exp(x_j - \max_k(x_k))\right).
    
    It is more stable as a log is always applied to a value :math:`\ge e`, while a log can be evaluated for 0 in the non-fused operation.
    
    Also, backward gradient computation is more stable than the original one as it doesn't perform division by x due to a gradient of log. The definition is as following.
    
    .. math::
        dx_i = dy_i - y_i * \sum_j dy_j
    
    where :math:`dx_i` and :math:`dy_i` denote gradients of loss
    wrt :math:`x_i` and :math:`y_i` respectively.
    
    Args:
        x(~nnabla.Variable): N-D array. Typically indicates a score.
        axis(int): Axis normalization is taken.
            [default= `len(x.shape) - 1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.LogSoftmax(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def elu(ctx, x, alpha=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise Exponential Linear Unit (ELU) function.
    
    .. math::
        y_i= \left\{
        \begin{array}{ll}
        x_i & (x > 0)\\
        \alpha (\exp(x_i) - 1) & (x \leq 0)
        \end{array} \right..
    
    References:
        * `Clevart et al., Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs).
          <http://arxiv.org/abs/1511.07289>`_
    
    Args:
        x(~nnabla.Variable): N-D array
        alpha(float): Coefficient for negative outputs. :math:`\alpha` in definition
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ELU(ctx, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def selu(ctx, x, scale=1.05070098735548, alpha=1.673263242354377, n_outputs=-1, outputs=None):
    r"""
    Element-wise Scaled Exponential Linear Unit (SELU) function by Klambauer et al. (2017).
    
    .. math::
        y_i= \lambda \left\{
        \begin{array}{ll}
        x_i & (x > 0)\\
        \alpha (\exp(x_i) - 1) & (x \leq 0)
        \end{array} \right..
    
    The coefficients :math:`\lambda` and :math:`\alpha` default to the following values :math:`\lambda_{01}` and :math:`\alpha_{01}`, respectively, provided by Klambauer et al. (2017):
    
    .. math::
        \begin{array}{lll}
          \lambda_{01} &=&  \left(  1 - \operatorname{erfc}\left( \frac{1}{\sqrt{2}} \right) \sqrt{e}  \right)
                      \sqrt{2 \pi} \\
                     && \left(
                          2 \operatorname{erfc} \left( \sqrt{2} \right) e^2
                          + \pi \operatorname{erfc}\left( \frac{1}{\sqrt{2}} \right)^2 e
                          \right. \\
                     && \left.
                          - 2(2 + \pi) \operatorname{erfc} \left( \frac{1}{\sqrt{2}} \right) \sqrt{e}
                          + \pi + 2
                     \right)^{-1/2}  \\
                  &\approx& 1.0507 \\
          \alpha_{01} &=&  - \frac
                        {\sqrt {\frac {2}{\pi}}}
                        {\operatorname{erfc} \left( \frac{1}{\sqrt{2}} \right) \exp \left(\frac {1} {2} \right) - 1} \\
                  &\approx& 1.67326
        \end{array}
    
    
    References:
        * `Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017).
          Self-Normalizing Neural Networks. In Advances in Neural Information
          Processing Systems (NIPS). <https://arxiv.org/abs/1706.02515>`_
    
    Args:
        x(~nnabla.Variable): N-D array
        scale(float): The coefficient :math:`\lambda` in the definition.
            [default= `1.05070098735548` ]
        alpha(float): The coefficient :math:`\alpha` in the definition.
            [default= `1.673263242354377` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.SELU(ctx, scale, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def crelu(ctx, x, axis=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise Concatenated Rectified Linear Unit (CReLU) function.
    This function calculates the ReLU of :math:`x` and :math:`-x` , then concatenates the results together at a specified axis,
    and returns the resulting array.
    
    
    References:
        * `Wenling Shang, Kihyuk Sohn, Diogo Almeida, Honglak Lee.
          Understanding and Improving Convolutional Neural Networks
          via Concatenated Rectified Linear Units.
          <https://arxiv.org/abs/1603.05201>`_
    
    Args:
        x(~nnabla.Variable): N-D array.
        axis(int): The ReLU activations of positive inputs and negative inputs are concatenated at axis.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array where axis dimension is doubled by concatenating.
    """
    return F.CReLU(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def celu(ctx, x, alpha=1.0, axis=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise Concatenated Exponential Linear Unit (CELU) function.
    Concatenates ELU outputs of positive and negative inputs together at specified axis.
    
    Args:
        x(~nnabla.Variable): N-D array.
        alpha(float): Coefficient for negative outputs. :math:`\alpha` in definition.
            [default= `1.0` ]
        axis(int): The ELU activations of positive inputs and negative inputs are concatenated at axis.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array where axis dimension is doubled by concatenating.
    """
    return F.CELU(ctx, alpha, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def prelu(ctx, x0, x1, base_axis=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise Parametrized Rectified Linear Unit function. Calculates:
    
    .. math::
        y_i = \max(0, x_i) + w_i \min(0, x_i)
    
    where negative slope :math:`w` is learned and can vary across channels (an
    axis specified with `base_axis`).
    
    Args:
        x0(~nnabla.Variable): (N-D array) Input
        x1(~nnabla.Variable): (N-D array) Weights
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.PReLU(ctx, base_axis)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def gelu(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Gaussian Error Unit (GELU) function.
    
    .. math::
        GELU(x) = xP(X \leq  x) = x \Phi (x)
    
    which is approximated by
    
    .. math::
        GELU(x) = 0.5x (1 + \tanh ( \sqrt(2/\pi)(x + 0.044715x^3) ))
    
    References:
        * `Dan Hendrycks and Kevin Gimpel.
          Gaussian Error Linera Units (GELUs).
          <https://arxiv.org/abs/1606.08415>`_
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.GELU(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def mish(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Mish activation function.
    
    .. math::
        Mish(x) = x \tanh(\log(1+\exp(x_i)))
    
    
    References:
        * `Diganta Misra.
          Mish A Self Regularized Non-Monotonic Neural Activation Function.
          <https://arxiv.org/abs/1908.08681>`_
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Mish(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def relu6(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise ReLU6 function.
    Capping ReLU activation to 6 is often observed to learn sparse features earlier.
    
    .. math::
        ReLU6(x) = \min(\max(0,x,),6)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ReLU6(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def hard_sigmoid(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Segment-wise linear approximation of sigmoid.
    Preferable when speed of computation is more important than precision.
    Returns :math:`0` if :math:`x < -2.5`.
    Returns :math:`1` if :math:`x> 2.5`.
    Returns :math:`0.2x + 0.5` if :math:`-2.5 <= x <= 2.5`.
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.HardSigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def hard_tanh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise HardTanh function.
    Computationally cheaper than Tanh function.
    Returns :math:`1` if :math:`x > 1`.
    Returns :math:`-1` if :math:`x < -1`.
    Returns :math:`x` otherwise.
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.HardTanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def log_sigmoid(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise LogSigmoid function.
    
    .. math::
        LogSigmoid(x) = \log(1/(1+\exp(-x_i)))
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LogSigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def softplus(ctx, x, beta=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise SoftPlus function.
    Unlike Sigmoid and Tanh that have upper and lower bound, SoftPlus is only lower-bounded by 0.
    
    .. math::
        SoftPlus(x) = \frac{1}{\beta} * \log(1+\exp(\beta * x_i))
    
    Args:
        x(~nnabla.Variable): N-D array
        beta(float): the `beta` value for SoftPlus formulation
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.SoftPlus(ctx, beta)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def softsign(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise SoftSign.
    Can be used in place of Tanh function.
    While Tanh converges exponentially, SoftSign converges polynomially.
    
    .. math::
        SoftSign(x) = x/(1+|x|)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.SoftSign(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def tanh_shrink(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wies TanhShrink function.
    
    .. math::
        TanhShrink(x) = x - \tanh(x)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.TanhShrink(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sinc(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise Sinc function.
    Unlike other popular activation functions, it has rises and falls.
    returns :math:`1` if :math:`x = 0`.
    returns :math:`\sin(x)/x` otherwise.
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Sinc(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def fused_batch_normalization(ctx, x, beta, gamma, mean, variance, z=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, nonlinearity='relu', n_outputs=-1, outputs=None):
    r"""
    Batch normalization fused with add2 (adding a residual input) and activation.
    
    This is an equivalent operation to the following,
    but is more computationally efficient:
    
    .. code-block:: python
    
      h = F.batch_normalization(x, beta, gamma, mean, variance, *opts)
      y = F.relu(h + z)
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
        mean(~nnabla.Variable): N-D array of running mean (modified during forward execution).
        variance(~nnabla.Variable): N-D array of running variance (modified during forward execution).
        z(~nnabla.Variable): N-D array of a residual input. By specifying None, the activation function will follow immediately after BN operation.
            [optional]
        axes(repeated int64): Axes mean and variance are taken.
            [default= `(1,)` ]
        decay_rate(float): Decay rate of running mean and variance.
            [default= `0.9` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        batch_stat(bool): Use mini-batch statistics rather than running ones.
            [default= `True` ]
        nonlinearity(string): Activation chosen from ('relu').
            [default= `'relu'` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    inputs = [x, beta, gamma, mean, variance]
    if z is not None:
        inputs += [z]
    return F.FusedBatchNormalization(ctx, axes, decay_rate, eps, batch_stat, nonlinearity)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def batch_normalization(ctx, x, beta=None, gamma=None, mean=None, variance=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, no_scale=False, no_bias=False, n_outputs=-1, outputs=None):
    r"""
    Batch normalization.
    
    .. math::
        \begin{eqnarray}
          \mu &=& \frac{1}{M} \sum x_i \\
          \sigma^2 &=& \frac{1}{M} \left(\sum x_i - \mu\right)^2 \\
          \hat{x}_i &=& \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\
          y_i &=& \hat{x}_i \gamma + \beta.
        \end{eqnarray}
    
    
    At testing time, the mean and variance values used are those that were computed during training by moving average.
    
    References:
    
        * `Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.
          <https://arxiv.org/abs/1502.03167>`_
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
            [optional]
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
            [optional]
        mean(~nnabla.Variable): N-D array of running mean (modified during forward execution).
            [optional]
        variance(~nnabla.Variable): N-D array of running variance (modified during forward execution).
            [optional]
        axes(repeated int64): Axes mean and variance are taken.
            [default= `(1,)` ]
        decay_rate(float): Decay rate of running mean and variance.
            [default= `0.9` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        batch_stat(bool): Use mini-batch statistics rather than running ones.
            [default= `True` ]
        no_scale(bool): If `True`, the scale term is omitted.
            [default= `False` ]
        no_bias(bool): If `True`, the bias term is omitted.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    inputs = [x]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    if mean is not None:
        inputs += [mean]
    if variance is not None:
        inputs += [variance]
    return F.BatchNormalization(ctx, axes, decay_rate, eps, batch_stat, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def group_normalization(ctx, x, beta=None, gamma=None, num_groups=None, channel_axis=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None):
    r"""
    Applies Group Normalization over an input tensor, which is defined as:
    
    .. math::
      \begin{eqnarray}
        \mu^g &=& \frac{1}{H} \sum_{i=1}^{H} x_i^g \\
        \sigma^g &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^g - \mu^g\right)^2 + \epsilon} \\
        y &=& \frac{x - \mu^g}{\sigma^g} \gamma + \beta
      \end{eqnarray}
    
    where :math:`x` and :math:`y` are input and output variable,
    :math:`\mu^g` and :math:`\sigma^g` are the mean and std of each group which contains `num_channels / num_groups` channels,
    and :math:`\gamma` and :math:`\beta` are adaptive gains and biases.
    
    The input channels, specified by :attr:`channel_axis`, are separated into :attr:`num_groups` groups,
    and the mean and std are calculated over the each group.
    For example, if the input shape is [B, C, H, W] (= channel_axis=1, batch_axis=0),
    an input variable is once reshaped to [B, num_groups, C / num_groups, H, W]
    and standardize by its mean and std whose shapes are [B, num_groups, 1, 1, 1].
    Finally, an output variable is reshaped again to the original input shape (= [B, C, H, W] in the case above).
    
    References:
    
        * `Yuxin Wu, Kaiming He, Group Normalization.
          <https://arxiv.org/abs/1803.08494>`_
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
            [optional]
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
            [optional]
        num_groups(int): A number of groups. The channel dim of 'x' must be integer multiple of `num_groups`.
            [default= `1` ]
        channel_axis(int): Channel axis.
            [default= `1` ]
        batch_axis(repeated int64): Axes mean and variance are taken.
            [default= `(0,)` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        no_scale(bool): If `True`, the scale term is omitted.
            [default= `False` ]
        no_bias(bool): If `True`, the bias term is omitted.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if num_groups is None:
        num_groups = 1
    if channel_axis is None:
        channel_axis = 1
    inputs = [x]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    return F.GroupNormalization(ctx, num_groups, channel_axis, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def instance_normalization(ctx, x, beta=None, gamma=None, channel_axis=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None):
    r"""
    Applies Instance Normalization over an input tensor, which is defined as
    
    .. math::
      \begin{eqnarray}
        \mu^i &=& \frac{1}{H} \sum_{i=1}^{H} x_i^i \\
        \sigma^i &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^i - \mu^i\right)^2 + \epsilon} \\
        y &=& \frac{x - \mu^i}{\sigma^i} \gamma + \beta
      \end{eqnarray}
    
    where :math:`x` and :math:`y` are input and output variable,
    :math:`\mu^i` and :math:`\sigma^i` are the mean and std of each instance which is separately calculated for each batch and channel,
    and :math:`\gamma` and :math:`\beta` are adaptive gains and biases.
    
    If the input shape is [B, C, H, W] (= channel_axis=1, batch_axis=0), the shape of calculated mean and std are [B, C, 1, 1]
    
    References:
    
        * `Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky, Instance Normalization: The Missing Ingredient for Fast Stylization.
          <https://arxiv.org/abs/1607.08022>`_
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
            [optional]
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
            [optional]
        channel_axis(int): Channel axis.
            [default= `1` ]
        batch_axis(repeated int64): Axes mean and variance are taken.
            [default= `(0,)` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        no_scale(bool): If `True`, the scale term is omitted.
            [default= `False` ]
        no_bias(bool): If `True`, the bias term is omitted.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if channel_axis is None:
        channel_axis = 1
    inputs = [x]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    return F.InstanceNormalization(ctx, channel_axis, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def layer_normalization(ctx, x, beta=None, gamma=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None):
    r"""
    Applies Layer Normalization over an input tensor, which is defined as
    
    .. math::
      \begin{eqnarray}
        \mu^l &=& \frac{1}{H} \sum_{i=1}^{H} x_i^l \\
        \sigma^l &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^l - \mu^l\right)^2 + \epsilon} \\
        y &=& \frac{x - \mu^l}{\sigma^l} \gamma + \beta
      \end{eqnarray}
    
    where :math:`x` and :math:`y` are input and output variable,
    :math:`\mu^l` and :math:`\sigma^l` are the mean and std of each layer which is separately calculated for each batch,
    and :math:`\beta` and :math:`\gamma` are adaptive biases and gains.
    
    If the input shape is [B, C, H, W] (= batch_axis=0), the shape of calculated mean and std are [B, 1, 1, 1]
    
    References:
    
        * `Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton, Layer Normalization.
          <https://arxiv.org/abs/1607.06450>`_
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
            [optional]
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
            [optional]
        batch_axis(repeated int64): Axes mean and variance are taken.
            [default= `(0,)` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        no_scale(bool): If `True`, the scale term is omitted.
            [default= `False` ]
        no_bias(bool): If `True`, the bias term is omitted.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    inputs = [x]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    return F.LayerNormalization(ctx, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def norm_normalization(ctx, x, p=None, axes=None, eps=1e-12, n_outputs=-1, outputs=None):
    r"""
    Norm normalization.
        
    .. math::
        y = \frac{x_i}{\|x\|_p}
    
    Args:
        x(~nnabla.Variable): N-D array.
        p(float): Order of the norm.
            [default= `2` ]
        axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced.
            [default= `range(x.ndim)` ]
        eps(float): Epsilon for the normalization. This `eps` is added before taking the p-th root in the norm computation.
            [default= `1e-12` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if p is None:
        p = 2
    if axes is None:
        axes = range(x.ndim)
    return F.NormNormalization(ctx, p, axes, eps)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def sync_batch_normalization(ctx, x, beta, gamma, mean, variance, comm, group=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, n_outputs=-1, outputs=None):
    r"""
    Synchronized Batch Normalization:
    
    For some tasks (e.g., semantic segmentation), batch size will be too small and BatchNormalization layer might not work well.
    SyncBatchNorlization layer solves these problems by synchronizing batch stats (mean and var) between multiple processes.
    
    .. math::
        \begin{eqnarray}
          \mu &=& \frac{1}{M} \sum x_i \\
          \sigma^2 &=& \frac{1}{M} \left(\sum x_i - \mu\right)^2 \\
          \hat{x}_i &=& \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\
          y_i &=& \hat{x}_i \gamma + \beta.
        \end{eqnarray}
    
    References:
    
        * Implementing Synchronized Multi-GPU Batch Normalization https://hangzhang.org/PyTorch-Encoding/notes/syncbn.html
    
    Note:
        Since v1.32.0, the gradients of beta and gamma are not synchronized after backward computation (they had been synchronized previously).
        Users are responsible for synchronizing the gradients of beta and gamma by performing all-reduce,
        which is naturally done by performing all-reduce for gradients of all the parameters as we do usually in data parallel distributed training.
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
        mean(~nnabla.Variable): N-D array of running mean (modified during forward execution).
        variance(~nnabla.Variable): N-D array of running variance (modified during forward execution).
        comm(Communicator): The communicator
        group(string): The name of the communicator group
            [default= `world` ]
        axes(repeated int64): Axes mean and variance are taken.
            [default= `(1,)` ]
        decay_rate(float): Decay rate of running mean and variance.
            [default= `0.9` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        batch_stat(bool): Use mini-batch statistics rather than running ones.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if group is None:
        group = world
    return F.SyncBatchNormalization(ctx, comm, group, axes, decay_rate, eps, batch_stat)(x, beta, gamma, mean, variance, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def tensor_normalization(ctx, x, beta=None, gamma=None, axes=(1,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None):
    r"""
    General tensor normalization.
    Input variable `x` is normalized by mean and std calculated by `x` itself.
    Mean and variance are calculated along `axes`.
    For example, if the input shape is (B, C, H, W) and axes is [0, 1],
    the shape of calculated mean and std are (B, C, 1 ,1).
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        beta(~nnabla.Variable): N-D array of beta which is learned.
            [optional]
        gamma(~nnabla.Variable): N-D array of gamma which is learned.
            [optional]
        axes(repeated int64): Axes mean and variance are taken.
            [default= `(1,)` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]
        no_scale(bool): If `True`, the scale term is omitted.
            [default= `False` ]
        no_bias(bool): If `True`, the bias term is omitted.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    inputs = [x]
    if beta is not None:
        inputs += [beta]
    if gamma is not None:
        inputs += [gamma]
    return F.TensorNormalization(ctx, axes, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def weight_normalization(ctx, w, g, dim=0, eps=1e-12, n_outputs=-1, outputs=None):
    r"""
    Weight normalization.
    
    .. math::
      \mathbf{w}_{WN} = g \dfrac{\mathbf{w}}{\|\mathbf{w}\|}
    
    where :math:`\mathbf{w}` is the input weights to be normalized.
    and :math:`g` is learnable multiplication factors each of which is applied to each data at `dim`.
    
    References:
        * `Tim Salimans, Diederik P. Kingma, Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks. <https://arxiv.org/abs/1602.07868>`_
    
    Args:
        w(~nnabla.Variable): N-D array of learnable weights.
        g(~nnabla.Variable): 1-D array of learnable scales.
        dim(int): Output dimension. For the other dimensions, the norms are computed.
            [default= `0` ]
        eps(float): Epsilon for the normalization. This `eps` is added before taking the sqrt in the norm computation.
            [default= `1e-12` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.WeightNormalization(ctx, dim, eps)(w, g, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def weight_standardization(ctx, w, channel_axis=None, eps=1e-05, n_outputs=-1, outputs=None):
    r"""
    Applies Weight Standardization over an input weight, which is defined as
    
    .. math::
      \begin{eqnarray}
        \mu_{W_i} &=& \frac{1}{I} \sum_{j=1}^{I} W_{ij} \\
        \sigma_{W_i} &=& \sqrt{\frac{1}{I} \sum_{i=1}^{I} \left(W_{ij} - \mu_{W_{i}}\right)^2 + \epsilon} \\
        \hat{W_{ij}} &=& \frac{W_{ij} - \mu_{W_i}}{\sigma_{W_i}} \\
        y &=& \hat{W} \ast x
      \end{eqnarray}
    
    References:
    
      * `Siyuan Qiao, Huiyu Wang, Chenxi Liu, Wei Shen, Alan Yuille, Weight Standardization
        <https://arxiv.org/pdf/1903.10520v1.pdf>`_
    
    Args:
        w(~nnabla.Variable): N-D array of learnable weights.
        channel_axis(int): An axis for output channel. Default value is 0 which assumes the weights of convolution.
            [default= `0` ]
        eps(float): Tiny value to avoid zero division by std.
            [default= `1e-05` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if channel_axis is None:
        channel_axis = 0
    return F.WeightStandardization(ctx, channel_axis, eps)(w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def spectral_norm(ctx, w, u, dim=0, itr=1, eps=1e-12, test=False, output_u=False, n_outputs=-1, outputs=None):
    r"""
    Spectral Normalization.
    
    .. math::
    
        W_{sn} = \frac{W}{\sigma(W)}
    
    where :math:`W` is the input matrix, and the :math:`\sigma(W)` is the spectral norm of :math:`W`. The spectral norm is approximately computed by the power iteration.
    
    References:
    
        Takeru Miyato, Toshiki Kataoka, Masanori Koyama, Yuichi Yoshida, 
        "Spectral Normalization for Generative Adversarial Networks", 
        International Conference on Learning Representations. 2018.
    
    Args:
        w(~nnabla.Variable): N-D array of learnable weights. This is normally network parameter.
        u(~nnabla.Variable): 1-D array of singular vector. When `test == False`, the data region of `u` will be updated during forward calculation.
        dim(int): Output dimension. Default is 0. If the dimension is not 0, then the specified dimension becomes the most-left dimension by transposing.
            [default= `0` ]
        itr(int): Number of power iterations. Default is 1.
            [default= `1` ]
        eps(float): Epsilon for the normalization. This `eps` is added before taking the sqrt in the norm computation.
            [default= `1e-12` ]
        test(bool): When in `True`, `u` will not be updated. Default is `False`.
            [default= `False` ]
        output_u(bool): Output original `u` or not. `u` is updated when `test == False` but you can get original `u` as output with this option. Default is `False`.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Spectrally normalized :math:`W_{sn}` with the same shape as :math:`W`.
    """
    return F.SpectralNorm(ctx, dim, itr, eps, test, output_u)(w, u, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def mean_subtraction(ctx, x, rmean, t, base_axis=1, update_running_mean=True, n_outputs=-1, outputs=None):
    r"""
    It subtracts the mean of the elements of the input array,
    and normalizes it to :math:`0`. Preprocessing arrays with this function has the effect of improving accuracy
    in various tasks such as image classification.
    
    At training time, this function is defined as
    
    .. math::
        \begin{eqnarray}
          \mu &=& \frac{1}{M} \sum x_i \\
          y_i &=& x_i - \mu
        \end{eqnarray}
    
    At testing time, the mean values used are those that were computed during training by moving average.
    
    Note:
        The backward performs an approximated differentiation that takes into account only the latest mini-batch.
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        rmean(~nnabla.Variable): N-D array of running mean (modified during forward execution).
        t(~nnabla.Variable): Scalar of num of iteration of running mean (modified during forward execution).
        base_axis(int): Base axis of Mean Subtraction operation. Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        update_running_mean(bool): Update running mean during forward execution.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.MeanSubtraction(ctx, base_axis, update_running_mean)(x, rmean, t, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def clip_grad_by_value(ctx, x, min, max, n_outputs=-1, outputs=None):
    r"""In forward pass, the function behaves as the identity.
    
    In backward pass,
    
        .. math::
            g_x = \begin{cases}
                max & (g_y > max) \\
                g_y & (otherwise) \\
                min & (g_y < min)
               \end{cases}.
    
    A typical case for use is to prevent the gradient explosion through a whole computational graph.
    For example, if you want to clip gradient values for each feature map,
    
    .. code-block:: python
    
      x = nn.Variable([16, 3, 32, 32])
      min = F.broadcast(nn.Variable.from_numpy_array(np.asarray([-1.0]).reshape((1, 1, 1, 1))), (16, 3, 32, 32))
      max = F.broadcast(nn.Variable.from_numpy_array(np.asarray([1.0]).reshape((1, 1, 1, 1))), (16, 3, 32, 32))
      c = F.clip_grad_by_value(x, min=min, max=max)
      h = PF.convolution(c, 64, (3, 3), pad=(1, 1))
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        min(~nnabla.Variable): N-D array of minimum input value by which the gradients of the `y` are clipped. Note that the shape of `min` must be the same as `x`'s and the backward to `min` is not performed.
        max(~nnabla.Variable): N-D array of maximum input value by which the gradients of the `y` are clipped. Note that the shape of `max` must be the same as `x`'s and the backward to `max` is not performed.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.ClipGradByValue(ctx)(x, min, max, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def clip_grad_by_norm(ctx, x, clip_norm=None, axes=None, n_outputs=-1, outputs=None):
    r"""
    In the forward pass, the function behaves like the identity.
    
    In the backward pass,
    
    .. math::
    
        g_x = N \times \frac{g_y}{\|g_y\|_2}.
    
    where :math:`g_x` is the gradient w.r.t the input, :math:`g_y` is the gradient w.r.t. the output,
    and :math:`N` is `clip_norm` where the norm of :math:`g_y` becomes. this is the case that `axes` is not set.
    When `axes` is set, the norm is computed over `axes`.
    
    A typical case for use is to prevent the gradient explosion through a whole computational graph.
    For example, if you want to normalize gradient values over feature axis,
    
    .. code-block:: python
    
      x = nn.Variable([16, 3, 32, 32])
      c = F.clip_grad_by_norm(x, axes=(1, ))
      h = PF.convolution(c, 64, (3, 3), pad=(1, 1))
    
    Args:
        x(~nnabla.Variable): N-D array of input.
        clip_norm(float): Clip to the norm of input to `clip_norm` in the backward pass.
            [default= `1.0` ]
        axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar. This is used in the forward pass.
            [default= `range(x.ndim)` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if clip_norm is None:
        clip_norm = 1.0
    if axes is None:
        axes = range(x.ndim)
    return F.ClipGradByNorm(ctx, clip_norm, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def sum(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None):
    r"""
    Reduces a matrix along a specified axis with the sum function.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = range(x.ndim)
    return F.Sum(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def cumsum(ctx, x, axis=None, exclusive=False, reverse=False, n_outputs=-1, outputs=None):
    r"""
    Cumulative sum along a given axis.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axis(int): Axis along which cumulative sum is to be calculated
            [default= `0` ]
        exclusive(bool): If True, perform exclusive cumsum
            [default= `False` ]
        reverse(bool): If True, perform cumsum in reverse direction
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axis is None:
        axis = 0
    return F.CumSum(ctx, axis, exclusive, reverse)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def mean(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None):
    r"""
    Reduces a matrix along a specified axis with the mean function.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axes(repeated int64): Axes to be reduced.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = range(x.ndim)
    return F.Mean(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def max(ctx, x, axes=None, keep_dims=False, with_index=False, only_index=False, n_outputs=-1, outputs=None):
    r"""
    Reduction along axis or axes with max operation.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axes(repeated int64): Axes to be reduced.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]
        with_index(bool): Return values and indices.
            [default= `False` ]
        only_index(bool): Return only indices.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = range(x.ndim)
    return F.Max(ctx, axes, keep_dims, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def min(ctx, x, axes=None, keep_dims=False, with_index=False, only_index=False, n_outputs=-1, outputs=None):
    r"""
    Reduction along axis or axes with min operation.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axes(repeated int64): Axes to be reduced.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]
        with_index(bool): Return values and indices.
            [default= `False` ]
        only_index(bool): Return only indices.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = range(x.ndim)
    return F.Min(ctx, axes, keep_dims, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def norm(ctx, x, p=None, axes=None, keep_dims=False, n_outputs=-1, outputs=None):
    r"""
    Reduction along axis or axes with norm operation.
    
    .. math::
        y = \|x\|_p = \left( \sum_i |x_i|^p \right)^{\frac{1}{p}}
    
    Args:
        x(~nnabla.Variable): N-D array.
        p(float): Order of the norm.
            [default= `2` ]
        axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if p is None:
        p = 2
    if axes is None:
        axes = range(x.ndim)
    return F.Norm(ctx, p, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def prod(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None):
    r"""
    Reduction along axis or axes with product operation.
    
    Note:
        Backward computation is not accurate in a zero value input.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axes(repeated int64): Axes to be reduced.
            [default= `range(x.ndim)` ]
        keep_dims(bool): Flag whether the reduced axis is kept.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = range(x.ndim)
    return F.Prod(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def cumprod(ctx, x, axis=None, exclusive=False, reverse=False, n_outputs=-1, outputs=None):
    r"""
    Cumulative product along a given axis.
    
    Note:
        Backward computation is not accurate in a zero value input.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axis(int): Axis along which cumulative product is to be calculated
            [default= `0` ]
        exclusive(bool): If True, perform exclusive cumprod
            [default= `False` ]
        reverse(bool): If True, perform cumprod in reverse direction
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axis is None:
        axis = 0
    return F.CumProd(ctx, axis, exclusive, reverse)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def reduce_sum(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Reduction along an axis with sum operation.
    
    Note:
        This is deprecated. Use ``sum`` instead.
    
    Args:
        x(~nnabla.Variable): N-D array.

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.ReduceSum(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def reduce_mean(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Reduction by mean along an axis.
    
    Note:
        This is deprecated. Use ``mean`` instead.
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.ReduceMean(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def add2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise addition.
    
    .. math::
       y_i = x^{(0)}_i + x^{(1)}_i
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Add2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def add_n(ctx, *x, **kw):
    r"""
    Element-wise addition.
    
    .. math::
       y_i = x^{(0)}_i + . . . + x^{(n-1)}_i
    
    Args:
        *x(~nnabla.Variable): N-D arrays
            [variadic]

    Returns:
        ~nnabla.Variable: N-D array
    """
    assert len(x) >= 1, "add_n must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    return F.AddN(ctx)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def bc_add2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Note: This shouldn't be called by users.
    
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.BcAdd2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sub2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise subtraction.
    
    .. math::
       y_i = x^{(0)}_i - x^{(1)}_i
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Sub2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def mul2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise multiplication.
    
    .. math::
       y_i = x^{(0)}_i x^{(1)}_i
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Mul2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def mul_n(ctx, *x, **kw):
    r"""
    Element-wise multiplication.
    
    .. math::
       y_i = x^{(0)}_i . . . x^{(n-1)}_i
    
    Args:
        *x(~nnabla.Variable): N-D arrays
            [variadic]

    Returns:
        ~nnabla.Variable: N-D array
    """
    assert len(x) >= 1, "mul_n must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    return F.MulN(ctx)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def div2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise division.
    
    .. math::
       y_i = \frac{x^{(0)}_i} {x^{(1)}_i}
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Div2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def pow2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise power function.
    
    .. math::
       y_i = {(x^{(0)}_i)} ^ {x^{(1)}_i}
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Pow2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def add_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar addition.
    
    .. math::
       y_i = x_i + v
    
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.AddScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def mul_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar multiplication.
    
    .. math::
       y_i = v x_i
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.MulScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def pow_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar power function.
    
    .. math::
       y_i = (x_i) ^ v
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.PowScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def r_sub_scalar(ctx, x, val=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar subtraction.
    
    .. math::
       y_i = v - x_i
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.RSubScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def r_div_scalar(ctx, x, val=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar division.
    
    .. math::
        y_i = \frac{v}{x_i}
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.RDivScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def r_pow_scalar(ctx, x, val=1, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar power function.
    
    .. math::
        y_i = v ^ {x_i}
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.RPowScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sign(ctx, x, alpha=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise sign function.
    
    In the forward pass, it is defined as
    
    .. math::
    
        f(x) = \begin{cases}
            1  & (x > 0) \\
            -1 & (x < 0) \\
            \alpha & (x = 0)
        \end{cases}.
    
    In the backward pass, it is defined as
    
    .. math::
        \frac{\partial f(x)}{\partial x} = 1,
    
    or in other words, it behaves as the identity function for the gradient in the backward pass.
    
    Args:
        x(~nnabla.Variable): Input
        alpha(float): Value in case of :math:`x = 0`.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Sign(ctx, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def minimum2(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element-wise minimum.
    
    .. math::
       y_i = \min(x^{(0)}_i, x^{(1)}_i)
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array of min value
    """
    return F.Minimum2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def maximum2(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element-wise maximum.
    
    .. math::
       y_i = \max(x^{(0)}_i, x^{(1)}_i)
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array of max value
    """
    return F.Maximum2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def minimum_scalar(ctx, x, val=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar minimum.
    
    .. math::
        y_i = \min(x_i, v)
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.MinimumScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def maximum_scalar(ctx, x, val=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise scalar maximum.
    
    .. math::
        y_i = \max (x_i, v)
    
    Args:
        x(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.MaximumScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_and(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical AND.
    
    .. math::
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1 & (x^{(0)}_i \neq 0 \;\&\; x^{(1)}_i \neq 0) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.LogicalAnd(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_or(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical OR.
    
    .. math::
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            0 & (x^{(0)}_i = 0 \;\&\; x^{(1)}_i = 0) \\
            1 & otherwise
        \end{cases}.
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.LogicalOr(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_xor(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical XOR.
    
    .. math::
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1 & (x^{(0)}_i = 0 \;\&\; x^{(1)}_i = 0) \\
            1 & (x^{(0)}_i \neq 0 \;\&\; x^{(1)}_i \neq 0) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.LogicalXor(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def equal(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element wise 'equal'
    
    .. math::
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1 & (x^{(0)}_i = x^{(1)}_i) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.Equal(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def not_equal(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    
    Element wise 'not equal'
    
    .. math::
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            0 & (x^{(0)}_i = x^{(1)}_i) \\
            1 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.NotEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def greater_equal(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1  & (x^{(0)}_i \geq x^{(1)}_i) \\
            0 & (x^{(0)}_i < x^{(1)}_i)
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.GreaterEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def greater(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1  & (x^{(0)}_i > x^{(1)}_i) \\
            0 & (x^{(0)}_i \leq x^{(1)}_i)
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.Greater(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def less_equal(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1  & (x^{(0)}_i \leq x^{(1)}_i) \\
            0 & (x^{(0)}_i > x^{(1)}_i)
        \end{cases}.
    
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.LessEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def less(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,x^{(1)}_i) = \begin{cases}
            1  & (x^{(0)}_i < x^{(1)}_i) \\
            0 & (x^{(0)}_i \geq x^{(1)}_i)
        \end{cases}.
    
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: No Description
    """
    return F.Less(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def searchsorted(ctx, sorted_sequence, values, right=None, n_outputs=-1, outputs=None):
    r"""
    Finds indices in the innermost dimension of a sorted sequance where values must be inserted in order to maintain value
    
    
    Args:
        sorted_sequence(~nnabla.Variable): N-D array of sorted sequence where search is to be performed. Note that this must be a sorted array
        values(~nnabla.Variable): N-D array of Search values
        right(bool): :If True, given a value v, the function returns index i such that sorted_sequence[i-1] <= v < sorted_sequence[i] (index of closest upper bound of v). By default, this is false so the function returns index i such that a[i-1] < v <= a[i] (index of closest lower bound of v)
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array containing the required indices
    """
    if right is None:
        right = False
    return F.SearchSorted(ctx, right)(sorted_sequence, values, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_and_scalar(ctx, x0, val, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical AND with scalar.
    
    .. math::
        f(x_i,v) = \begin{cases}
            1 & (x_i \neq 0 \;\&\; v \neq 0) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(bool): No Description

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LogicalAndScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_or_scalar(ctx, x0, val, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical OR with scalar.
    
    .. math::
        f(x_i,v) = \begin{cases}
            0 & (x_i = 0 \;\&\; v = 0) \\
            1 & otherwise
        \end{cases}.
    Args:
        x0(~nnabla.Variable): Input variable
        val(bool): No Description

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LogicalOrScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_xor_scalar(ctx, x0, val, n_outputs=-1, outputs=None):
    r"""
    Elementwise logical XOR with scalar.
    
    .. math::
        f(x_i,v) = \begin{cases}
            1 & (x_i = 0 \;\&\; v = 0) \\
            1 & (x_i \neq 0 \;\&\; v \neq 0) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(bool): No Description

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LogicalXorScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise 'equal' with a scalar
    
    .. math::
        f(x_i,v) = \begin{cases}
            1 & (x_i = v) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.EqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def not_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise 'not equal' with a scalar
    
    .. math::
        f(x_i,v) = \begin{cases}
            0 & (x_i = v) \\
            1 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.NotEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def greater_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison with a scalar. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,v) = \begin{cases}
            1  & (x^{(0)}_i \geq v \\
            0 & (x^{(0)}_i < v
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.GreaterEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def greater_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison with a scalar. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,v) = \begin{cases}
            1  & (x^{(0)}_i > v \\
            0 & (x^{(0)}_i \leq v
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.GreaterScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def less_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison with a scalar. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,v) = \begin{cases}
            1  & (x^{(0)}_i \leq v) \\
            0 & (x^{(0)}_i > v)
        \end{cases}.
    
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LessEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def less_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None):
    r"""
    Element wise comparison with a scalar. The :math:`i^{th}` element of the output is:
    
    .. math::
    
        f(x^{(0)}_i,v) = \begin{cases}
            1  & (x^{(0)}_i < v) \\
            0 & (x^{(0)}_i \geq v)
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LessScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def logical_not(ctx, x0, n_outputs=-1, outputs=None):
    r"""
    Element-wise logical NOT operation
    
    .. math::
        f(x_i) = \begin{cases}
            1 & (x_i = 0) \\
            0 & otherwise
        \end{cases}.
    
    Args:
        x0(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.LogicalNot(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def isnan(ctx, x0, n_outputs=-1, outputs=None):
    r"""
    Test element-wise for NaN and return a ``0/1`` array.
    
    Args:
        x0(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.IsNaN(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def isinf(ctx, x0, n_outputs=-1, outputs=None):
    r"""
    Test element-wise for ``inf/-inf`` and return a ``0/1`` array.
    
    Args:
        x0(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.IsInf(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def reset_nan(ctx, x0, val=0, n_outputs=-1, outputs=None):
    r"""
    Replace NaNs with a scalar value specified by ``val``.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ResetNaN(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def reset_inf(ctx, x0, val=0, n_outputs=-1, outputs=None):
    r"""
    Replace ``-inf/inf`` with a scalar value specified by ``val``.
    
    Args:
        x0(~nnabla.Variable): Input variable
        val(float): Value of the scalar
            [default= `0` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ResetInf(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def where(ctx, condition, x_true, x_false, n_outputs=-1, outputs=None):
    r"""
    Return elements, either from ``x_true`` or ``x_false``, depending on ``condition``.
    
    If rank of ``condition`` is higher than those of ``x_true`` and ``x_false``, the first dimensions of ``x_true`` and ``x_false`` must match the dimensions of ``condition``.
    
    Example:
    
    .. code-block:: python
    
      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
    
      a = nn.Variable.from_numpy_array(np.random.rand(2, 3))
      x = nn.Variable.from_numpy_array(np.random.rand(2, 3, 4))
      y = nn.Variable.from_numpy_array(np.random.rand(2, 3, 4))
      z = F.where(F.greater_scalar(a, 0.5), x, y)
      z.forward()
    
      # Numpy equivalent
      z_numpy = np.where(a.d > 0.5, x.d, y.d)
      assert np.allclose(z_numpy, z.d)
    
    Args:
        condition(~nnabla.Variable): N-d array. For all i, when ``condition[i] == true``, yield ``x_true[i]``, otherwise ``x_false[i]``.
        x_true(~nnabla.Variable): N-d array with higher or equal rank to ``condition``.
        x_false(~nnabla.Variable): N-d array with higher or equal rank to ``condition``.

    Returns:
        ~nnabla.Variable: N-D array with the same shape as condition
    """
    return F.Where(ctx)(condition, x_true, x_false, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def constant(ctx, val=0, shape=[], n_outputs=-1, outputs=None):
    r"""
    Generate a constant-valued array.
    
    Args:
        val(float): Constant value.
            [default= `0` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of the output array.
            [default= `[]` ]

    Returns:
        ~nnabla.Variable: N-D array where all values are the specified constant.
    """
    return F.Constant(ctx, val, shape)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def arange(ctx, start, stop, step=1, n_outputs=-1, outputs=None):
    r"""
    Generate a range of values within the half-open interval
    ``[start, stop)`` (the interval including start but excluding
    stop) with `step` increments.
    
    Args:
        start(float): Start value.
        stop(float): End value.
        step(float): Step value.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: 1-D array with the generated values.
    """
    return F.Arange(ctx, start, stop, step)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def linspace(ctx, start, stop, num, n_outputs=-1, outputs=None):
    r"""
    Generate a one-dimensional vector/tensor of size `num` whose values are evenly spaced from `start` to `end`, inclusive.
    
    Args:
        start(float): Start value.
        stop(float): End value.
        num(int): Size of the constructed vector/tensor.

    Returns:
        ~nnabla.Variable: 1-D array with the generated values.
    """
    return F.Linspace(ctx, start, stop, num)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def abs(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise absolute value function.
    
    .. math::
       y_i = |x_i|
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: Element-wise absolute variable
    """
    return F.Abs(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def exp(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise natural exponential function.
    
    .. math::
       y_i = \exp(x_i).
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: Element-wise exp variable
    """
    return F.Exp(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def log(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise natural logarithm function.
    
    .. math::
       y_i = \ln(x_i).
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: Element-wise log variable
    """
    return F.Log(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def identity(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Identity function.
    
    .. math::
        y = x
    
    Args:
        x(~nnabla.Variable): N-D array.

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Identity(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def batch_matmul(ctx, a, b, transpose_a=False, transpose_b=False, n_outputs=-1, outputs=None):
    r"""
    Batch matrix multiplication.
    
    Two of batchs of matrices are multiplied for each sample in a batch.
    A batch of matrices is composed as [..., P, Q] where the last two dimensions compose matrix dimensions,
    and the first dimensions up to the third last dimension are considered as batch samples.
    These batch dimensions are internally broadcasted when the size of a dimension is 1.
    
    Example:
    
    .. code-block:: python
    
      import nnabla as nn
      import nnabla.functions as F
      import numpy as np
    
      nn.set_auto_forward(True)
    
      # Same batch size
      a = nn.Variable.from_numpy_array(np.random.rand(2, 2, 3, 4))
      b = nn.Variable.from_numpy_array(np.random.rand(2, 2, 4, 3))
      c = F.batch_matmul(a, b)
    
      # Different batch size with the broadcast
      a = nn.Variable.from_numpy_array(np.random.rand(2, 1, 3, 4))
      b = nn.Variable.from_numpy_array(np.random.rand(1, 3, 4, 3))
      c = F.batch_matmul(a, b)
    
    .. WARNING::
      Since the version 1.13, the behavior of the batch dimensions changed, it supported the internal
      broadcast when the size of a dimension is 1. Accordingly, this function does not supports different
      batch dimensions between two inputs even if the total sample size for each input is same.
    
    Args:
        a(~nnabla.Variable): N-D array with >= 2-dim. The last two dimensions will be treated as a matrix.
        b(~nnabla.Variable): N-D array with >= 2-dim. The last two dimensions will be treated as a matrix. The product of the size of 0-th dimension through the size of the third last dimension must be same as that of the input ``a``.
        transpose_a(bool): Transpose the last two axes of ``a`` in matrix multiplication.
            [default= `False` ]
        transpose_b(bool): Transpose the last two axes of ``b`` in matrix multiplication.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Output of sample-wise matrix multiplication in a batch. When ``a`` is of a shape of [N, P, Q], ``b`` is of a shape of [N, Q, R], and transpose options are all False, the output will be a shape of [N, P, R].
    """
    return F.BatchMatmul(ctx, transpose_a, transpose_b)(a, b, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def round(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise round function.
    
    In the forward pass, this function simply computes `round` to the nearest integer value.
    
    .. math::
        y_i = round(x_i).
    
    In the backward pass, the simple Straight-Through Estimator (STE) is applied,
    
    .. math::
        \frac{\partial y_i}{\partial x_i} = 1.
    
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Round(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def ceil(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise ceil function.
    
    In the forward pass, this function simply returns the smallest integer which is not less than the input.
    
    .. math::
        y_i = ceil(x_i).
    
    In the backward pass, the simple Straight-Through Estimator (STE) is applied,
    
    .. math::
        \frac{\partial y_i}{\partial x_i} = 1.
    
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Ceil(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def floor(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise floor function.
    
    In the forward pass, this function simply returns the largest integer which is not greater than the input.
    
    .. math::
        y_i = floor(x_i).
    
    In the backward pass, the simple Straight-Through Estimator (STE) is applied,
    
    .. math::
        \frac{\partial y_i}{\partial x_i} = 1.
    
    
    Args:
        x(~nnabla.Variable): Input variable

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Floor(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sin(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise sine (sin) function.
    
    .. math::
        y_i = \sin (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Sin(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def cos(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise cosine (cos) function.
    
    .. math::
        y_i = \cos (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Cos(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def tan(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise tangent (tan) function.
    
    .. math::
        y_i = \tan (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Tan(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sinh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic sine (sinh) function.
    
    .. math::
        y_i = \sinh (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Sinh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def cosh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic cosine (cosh) function.
    
    .. math::
        y_i = \cosh (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Cosh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def asin(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise arcsine (asin) function.
    
    .. math::
        y_i = \arcsin (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ASin(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def acos(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise arccosine (acos) function.
    
    .. math::
        y_i = \arccos (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ACos(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def atan(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise arctangent (atan) function.
    
    .. math::
        y_i = \arctan (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ATan(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def atan2(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element-wise arctangent (atan) function with 2 input variables.
    
    .. math::
        y_i = \arctan2 (x_{i1}, x_{i2})
    
    Args:
        x0(~nnabla.Variable): N-D array
        x1(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as input variables
    """
    return F.ATan2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def asinh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic arcsine (asinh) function.
    
    .. math::
        y_i = \text{arcsinh} (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ASinh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def acosh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic arccosine (acosh) function.
    
    .. math::
        y_i = \text{arccosh} (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ACosh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def atanh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise hyperbolic arctangent (atanh) function.
    
    .. math::
        y_i = \text{arctanh} (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.ATanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def erf(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise Error function.
    
    .. math::
        y_i = \text{erf} (x_i)
    
    Args:
        x(~nnabla.Variable): N-D array

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Erf(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def concatenate(ctx, *x, **kw):
    r"""
    Concatenate a variable number of input arrays along the specified axis.
    
    Args:
        *x(~nnabla.Variable): N-D arrays.
            [variadic]
        axis(int): Axis
            [default= `len(x[0].shape) - 1` ]

    Returns:
        ~nnabla.Variable: Concatenate variable
    """
    assert len(x) >= 1, "concatenate must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    axis = kw.pop('axis', len(x[0].shape) - 1)
    return F.Concatenate(ctx, axis)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def split(ctx, x, axis=0, n_outputs=-1, outputs=None):
    r"""
    Split arrays at the specified axis.
    
    note:
        This function should not be called directly when constructing models.
        Instead, use :meth:`nnabla.functions.split` which
        automatically sets `n_output` from the input's shape and axis.
    
    Args:
        x(~nnabla.Variable): N-D array
        axis(int): Axis
            [default= `0` ]

    Returns:
        ~nnabla.Variable: list of N-D arrays
            [variadic][parameter]
    """
    return F.Split(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def stack(ctx, *x, **kw):
    r"""
    Joins two or more arrays on a new axis.
    
    Note:
        Unlike :meth:`nnabla.functions.concatenate` , which joins arrays on an existing axis,
        Stack joins arrays on a new axis.
    
    Args:
        *x(~nnabla.Variable): N-D arrays. The sizes of all the arrays to be stacked must be the same.
            [variadic]
        axis(int): The axis on which to concatenate arrays. Axis indices take on values 0, 1, 2, and so on from the left. For example, to stack four (3,28,28) inputs on the second axis, specify 1. In this case, the output size will be (3,4,28,28).
            [default= `0` ]

    Returns:
        ~nnabla.Variable: Output
    """
    assert len(x) >= 1, "stack must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    axis = kw.pop('axis', 0)
    return F.Stack(ctx, axis)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def slice(ctx, x, start=None, stop=None, step=None, n_outputs=-1, outputs=None):
    r"""
    Slice arrays along specified axis.
    
    Args:
        x(~nnabla.Variable): N-D array
        start(repeated int64): Start indices for each axis
            [default= `(0,) * len(x.shape)` ]
        stop(repeated int64): Stop indices for each axis
            [default= `tuple(x.shape)` ]
        step(repeated int64): Step indices for each axis
            [default= `(1,) * len(x.shape)` ]

    Returns:
        ~nnabla.Variable: Sliced N-D array
    """
    if start is None:
        start = (0,) * len(x.shape)
    if stop is None:
        stop = tuple(x.shape)
    if step is None:
        step = (1,) * len(x.shape)
    return F.Slice(ctx, start, stop, step)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def pad(ctx, x, pad_width, mode='constant', constant_value=0, n_outputs=-1, outputs=None):
    r"""
    Pad the input N-D array `x` over the number of dimensions given
    by half the length of the `pad_width` iterable, where every two
    values in `pad_width` determine the before and after pad size of
    an axis. The `pad_width` iterable must hold an even number of
    positive values which may cover all or fewer dimensions of the
    input variable `x`. If `pad_width` covers fewer dimensions then
    it applies to the innermost dimensions of `x`.
    
    .. code-block:: python
    
      x = nn.Variable.from_numpy_array(np.ones((2, 3, 4)))
      assert F.pad(x, (1, 1, 2, 2)).shape == (2, 5, 8)
    
    Padding is performed according to the requested `mode`:
    
    constant
      Pads with a value given by the keyword argument `constant_value`.
    
      .. code-block:: python
    
        x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int))
        y = F.pad(x, (3, 3), 'constant', constant_value = -1)
        y.forward()
        assert np.all(y.d == np.array([-1, -1, -1, 1, 2, 3, 4, -1, -1, -1]))
    
    reflect
      Pads with the reflection of the vector mirrored on the first
      and last values of the vector along each axis.
    
      .. code-block:: python
    
        x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int))
        y = F.pad(x, (3, 3), 'reflect')
        y.forward()
        assert np.all(y.d == np.array([4, 3, 2, 1, 2, 3, 4, 3, 2, 1]))
    
    repeat
      Pads with the edge value of the vector along each axis.
    
      .. code-block:: python
    
        x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int))
        y = F.pad(x, (3, 3), 'repeat')
        y.forward()
        assert np.all(y.d == np.array([1, 1, 1, 1, 2, 3, 4, 4, 4, 4]))
    
    Args:
        x(~nnabla.Variable): N-D array
        pad_width(repeated int64): Iterable of *before* and *after* pad values.
        mode(string): Padding mode string.
            [default= `'constant'` ]
        constant_value(float): Fill value if mode is `constant`.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: 
            Padded N-D array with the same number of dimensions as the input.
            
            .. code-block:: python
            
              x = nn.Variable((3, 3, 4, 2))  # a shape like (B, C, H, W)
              # 1-D padding: last dim by 1 left and 2 on the right side
              assert F.pad(x, (1, 2)).shape == (3, 3, 4, 5)
              # 2-D padding: last dim by (1, 1) and 2nd to last by (2, 2)
              assert F.pad(x, (2, 2, 1, 1)).shape == (3, 3, 8, 4)
              # 3-D padding: dims C by (0, 1), H by (2, 1), and W by (3, 3)
              assert F.pad(x, (0, 1, 2, 1, 3, 3)).shape == (3, 4, 7, 8)
            
    """
    return F.Pad(ctx, pad_width, mode, constant_value)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def transpose(ctx, x, axes, n_outputs=-1, outputs=None):
    r"""
    Transposes tensor dimensions.
    
    Args:
        x(~nnabla.Variable): N-D array
        axes(repeated int64): Source axis indices for each axis.

    Returns:
        ~nnabla.Variable: Transposed N-D array.
    """
    return F.Transpose(ctx, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def broadcast(ctx, x, shape, n_outputs=-1, outputs=None):
    r"""
    Broadcasting ND-array to the specified shape.
    
    Args:
        x(~nnabla.Variable): N-D array
        shape(:obj:`tuple` of :obj:`int`): Shape broadcasted to. The size must be the same in axis where ``x``'s shape is not 1.

    Returns:
        ~nnabla.Variable: Broadcasted N-D array
    """
    return F.Broadcast(ctx, shape)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def broadcast_to(ctx, x, y, axis=None, n_outputs=-1, outputs=None):
    r""".. WARNING::
      This function is experimental support, so please do not actively use it.
    
    Broadcasting ND-array to the specified buffer.
    
    Args:
        x(~nnabla.Variable): N-D array
        y(~nnabla.Variable): N-D array
        axis(int): Target axis to start broadcasting. If this is not set, broadcast will try to fit y to x starting from the last dimension
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Broadcasted N-D array
    """
    if axis is None:
        axis = -1
    return F.BroadcastTo(ctx, axis)(x, y, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def tile(ctx, x, reps, n_outputs=-1, outputs=None):
    r"""
    Forward input `x` repeated the number of times given by `reps`. If `reps`
    is a sequence, the output has dimension of ``d = max(len(reps), x.ndim)``
    and either `x` is promoted to be d-dimensional by prepending new axes or
    `reps` is promoted to x.ndim by prepending 1's.
    
    Args:
        x(~nnabla.Variable): N-D array
        reps(repeated int64): The number of repetitions of `x` along each axis.

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.Tile(ctx, reps)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def one_hot(ctx, x, shape, n_outputs=-1, outputs=None):
    r"""
    This function creates one-hot vector based on input indices.
    The range [-shape[i], -1] of input indices are regarded as [0, shape[i]-1],
    and an input index outside [-shape[i], shape[i]-1] generates a vector 
    filled with zero.
    
            Example:
    
            .. code-block:: python
    
              import nnabla as nn
              import nnabla.functions as F
              import numpy as np
    
              labels = nn.Variable.from_numpy_array(np.array([[9], [4], [5], [-9], [10]]))
              print(labels.shape)  # (5, 1)
    
              num_class = 10
    
              y_train = F.one_hot(labels, shape=(num_class, ))
              y_train.forward()
    
              print(y_train.shape)  # (5, 10)
              print(y_train.d)
    
              # [[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
              #  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
              #  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
              #  [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
              #  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
    
              # Can also be used for ndarray.
    
              labels = nn.Variable.from_numpy_array(np.array([[1, 7], [4, 7], [8, 6], [5, 0], [2, 6]]))
              print(labels.shape)  # (5, 2)
    
              num_class_1, num_class_2  = 10, 8
    
              y_train = F.one_hot(labels, shape=(num_class_1, num_class_2))
              y_train.forward()
    
              print(y_train.shape)  # (5, 10, 8)
              print(y_train.d)
    
              # [[[0. 0. 0. 0. 0. 0. 0. 0.]          [[0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 1.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 1. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]    ...    [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]           [0. 0. 0. 0. 0. 0. 0. 0.]
              #   [0. 0. 0. 0. 0. 0. 0. 0.]],         [0. 0. 0. 0. 0. 0. 0. 0.]]]
    
    Args:
        x(~nnabla.Variable): N-D array representing label's indice.
        shape(:obj:`tuple` of :obj:`int`): Number of classes. When nd-labels are given, dimensions must match. See the example above.

    Returns:
        ~nnabla.Variable: N-D array one-hot vector/tensor.
    """
    return F.OneHot(ctx, shape)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def flip(ctx, x, axes=None, n_outputs=-1, outputs=None):
    r"""
    Reverses the order of elements of the specified dimension of an array.
    
    Args:
        x(~nnabla.Variable): N-D array
        axes(repeated int64): The index of the dimension to reverse the order of the elements. Axis indices take on values 0, 1, 2, and so on from the left. For example, to flip a 32 (W) by 24 (H) 100 RGB image (100,3,24,32) vertically and horizontally, specify (2,3).
            [default= `[len(x.shape) - 1]` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = [len(x.shape) - 1]
    return F.Flip(ctx, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def shift(ctx, x, shifts=None, border_mode='nearest', n_outputs=-1, outputs=None):
    r"""
    Shifts the array elements by the specified amount.
    
    Args:
        x(~nnabla.Variable): N-D array.
        shifts(repeated int64): The amount to shift elements. For example, to shift image data to the right by 2 pixels and up 3 pixels, specify (-3,2).
            [default= `(0,) * len(x.shape)` ]
        border_mode(string): Specify how to process the ends of arrays whose values will be undetermined as a result of shifting. nearest: The data at the ends of the original      array is copied and used. reflect: Original data reflected      at the ends of the original array is used.
            [default= `'nearest'` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if shifts is None:
        shifts = (0,) * len(x.shape)
    return F.Shift(ctx, shifts, border_mode)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def sort(ctx, x, axis=-1, reverse=False, with_index=False, only_index=False, n_outputs=-1, outputs=None):
    r"""
    Sorts the elements of `x` along a given `axis` in ascending
    order by value. A negative `axis` counts from the last dimension
    of `x`, so the default of -1 sorts along the last dimension. If
    `reverse` is True, then the elements are sorted in descending
    order.
    
    If `with_index` is True, result is a tuple ``(sorted, indices)``
    or only ``indices`` if `only_index` is True. Setting
    `only_index` to True implies that `with_index` is also True.
    
    Args:
        x(~nnabla.Variable): N-D array.
        axis(int): Axis along which to sort.
            [default= `-1` ]
        reverse(bool): Sort in descending order.
            [default= `False` ]
        with_index(bool): Return sorted values and index.
            [default= `False` ]
        only_index(bool): Return only the sort index.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: list of N-D arrays
            [variadic][parameter]
    """
    return F.Sort(ctx, axis, reverse, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def reshape(ctx, x, shape, inplace=True, n_outputs=-1, outputs=None):
    r"""
    Reshapes the input variable in-place. It does not create a copy of the variable.
    The output variable (y) has a new shape but points to the same data as the input variable (x).
    This means that if the data in the output variable (y) is modified, the data in the input
    variable (x) also gets modified since the reshape was done in-place.
    
    Note:
        This function has the same behavior as the :meth:`nnabla.Variable.reshape` method.
    
    Args:
        x(~nnabla.Variable): N-D array.
        shape(:obj:`tuple` of :obj:`int`): Dimensions for each axis. ``-1`` can be specified only in one shape dimension. The value is calculated from the size of the array and remaining dimensions.
        inplace(bool): The output array is shared with the input array if True.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Reshaped N-D array
    """
    return F.Reshape(ctx, shape, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def shape(ctx, x, start=None, end=None, n_outputs=-1, outputs=None):
    r"""
    Get the shape of a tensor. Optional attributes start and end can be used to compute
    a slice of the input tensor's shape. If start axis is omitted, the slice starts from
    axis 0.
    
    Args:
        x(~nnabla.Variable): N-D array.
        start(int): If start axis is omitted, the slice starts from axis 0.
            [default= `0` ]
        end(int): The end axis, if specified, is exclusive (and the returned value will not include.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: 1-D array
    """
    if start is None:
        start = 0
    if end is None:
        end = 0
    return F.Shape(ctx, start, end)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def matrix_diag(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Returns an array where the last two dimensions consist of the diagonal matrix.
    
    Args:
        x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N`).

    Returns:
        ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N \times M_N`).
    """
    return F.MatrixDiag(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def matrix_diag_part(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Returns an array in which the values of the last dimension consist of the diagonal
    elements of the last two dimensions of an input array.
    
    Args:
        x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N \times M_N`).

    Returns:
        ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N`).
    """
    return F.MatrixDiagPart(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def trilu(ctx, x, k=0, upper=True, n_outputs=-1, outputs=None):
    r"""
    Returns an array in which the values of the last dimension consist of the triangular
    matrix of the last two dimensions of an input array.
    
    Args:
        x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N`).
        k(int): The number diagonals above or below the main diagonal to exclude or include.
            [default= `0` ]
        upper(bool): Determine whether upper or lower part of matrix is retained.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N`).
    """
    return F.Trilu(ctx, k, upper)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def meshgrid(ctx, *x, **kw):
    r"""
    Return coordinate matrices from coordinate vectors. Given N 1-D arrays, this function returns N-D coordinate arrays for vectorized evaluations on an N-D grid.
    Example: 
      >>> x,y = F.meshgrid(F.arange(0,3), F.arange(0,2))
      >>> x.d
      array([[0., 1., 2.],
             [0., 1., 2.]], dtype=float32)
     >>> y.d 
     array([[0., 0., 0.],
            [1., 1., 1.]], dtype=float32)
    
     >>> i,j = F.meshgrid(F.arange(0,3), F.arange(0,2), ij_indexing=True)
     >>> i.d 
     array([[0., 0.],
            [1., 1.],
            [2., 2.]], dtype=float32)
     >>> j.d 
     array([[0., 1.],
            [0., 1.],
            [0., 1.]], dtype=float32)
    
    Args:
        *x(~nnabla.Variable): N-D arrays.
            [variadic]
        ij_indexing(bool): If set true (Matrix ('ij') indexing ), the broadcasting dimensions are swapped. Default is False (Cartesian ('xy') indexing ).
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D arrays
            [variadic]
    """
    assert len(x) >= 1, "meshgrid must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    ij_indexing = kw.pop('ij_indexing', False)
    return F.Meshgrid(ctx, ij_indexing)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def batch_det(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Batch-wise determinant function.
    
    .. math::
      Y_b = \det(X_b), 
    
    where :math:`X_b` and :math:`Y_b` are the :math:`b`-th input and output, respectively.
    
    Args:
        x(~nnabla.Variable): batched N-D array

    Returns:
        ~nnabla.Variable: batched N-D array of determinant
    """
    return F.BatchDet(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def batch_inv(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Returns an array of inverted matrix
    
    Args:
        x(~nnabla.Variable): batched N-D array

    Returns:
        ~nnabla.Variable: batched N-D array of inverted matrix
    """
    return F.BatchInv(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def batch_logdet(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Batch-wise log absolute determinant function.
    
    .. math::
      Y_b = \log(|\det(X_b)|), 
    
    where :math:`X_b` and :math:`Y_b` are the :math:`b`-th input and output, respectively.
    
    Args:
        x(~nnabla.Variable): batched N-D array

    Returns:
        ~nnabla.Variable: batched N-D array of log absolute determinant
    """
    return F.BatchLogdet(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def batch_cholesky(ctx, x, upper=False, n_outputs=-1, outputs=None):
    r"""
    Batch-wise cholesky decomposition of symmetric positive definite matrix.
    The gradient of this function will be a symmetric matrix.
    This function does not check whether given matrix is symmetric positive define matrix or not.
    
    Args:
        x(~nnabla.Variable): batched N-D array
        upper(bool): If true, will return an upper triangular matrix. Otherwise will return a lower triangular matrix.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: batched N-D array of lower/upper triangular matrix.
    """
    return F.BatchCholesky(ctx, upper)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def assign(ctx, dst, src, n_outputs=-1, outputs=None):
    r"""
    Assign source array to destination array just like `tf.assign`.
    This is useful to synchronize or manually update parameters.
    
    .. code-block:: python
    
      dst = nn.Variable((2, 3, 4))
      src = nn.Variable((2, 3, 4))
      assign = F.assign(dst, src)
    
      assign.forward()
      assert np.allclose(dst.d, src.d) # dst and src have identical values.
      assert np.allclose(assign.d dst.d) # returned Variable is also identical to dst.
    
    Unlike TensorFlow, the returned Variable has a backward path to `dst`:
    
    .. math::
    
      g_{dst} = g_{y}
    
    Args:
        dst(~nnabla.Variable): A destination N-D array
        src(~nnabla.Variable): A source N-D array

    Returns:
        ~nnabla.Variable: An assigned array
    """
    return F.Assign(ctx)(dst, src, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def gather(ctx, x, Indices, axis=None, batch_dims=None, n_outputs=-1, outputs=None):
    r"""Gather from the input data according to the index.
    
    Given the input data :math:`X` of :math:`(D_{0}, \ldots, D_{N-1})` shape and
    the indices :math:`IDX` of :math:`(I_{0}, \ldots, I_{M-1})` shape, in case of `batch_dims = 0`,
    the gather outputs
    
    .. math::
      && Y[d_{0}, \ldots, d_{axis - 1}, i_{0}, \ldots, i_{M-1}, d_{axis + 1}, \ldots, d_{N-1}] = \\
      && X[d_{0}, \ldots, d_{axis - 1}, IDX[i_{0}, \ldots, i_{M-1}], d_{axis + 1}, \ldots, d_{N-1}].
    
    Generally, the gather outputs
    
    .. math::
      && Y[d_{0}, \ldots, d_{axis - 1}, i_{B}, \ldots, i_{M-1}, d_{axis + 1}, \ldots, d_{N-1}] = \\
      && X[d_{0}, \ldots, d_{axis - 1}, IDX[i_{0}, \ldots, i_{B - 1}, i_{B} \ldots, i_{M-1}], d_{axis + 1}, \ldots d_{N-1}].
    
    where :math:`B` = `batch_dims`.
    
    `x.shape[:batch_dims]` must be equal to `indices.shape[:batch_dims]`.
    
    Output shape is `x.shape[:axis] + indices.shape[batch_dims:] + x.shape[axis + 1]`.
    
    Args:
        x(~nnabla.Variable): Data from which to gather.
        Indices(~nnabla.Variable): Index with which to gather.
        axis(int): Axis in `x` to gather from. `axis` must be greater than or equal to `batch_dims`.
            [default= `0` ]
        batch_dims(int): The number of batch dimensions.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: Gathered output.
    """
    if axis is None:
        axis = 0
    if batch_dims is None:
        batch_dims = 0
    return F.Gather(ctx, axis, batch_dims)(x, Indices, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def gather_nd(ctx, x, indices, n_outputs=-1, outputs=None):
    r"""
    Gather elements or slices from `data` according to `indices`, which must
    be at least two-dimensional with the first dimension :math:`M` being less
    or equal to the :math:`N` dimensions of `data`. Given `data` with shape
    :math:`(X_0, X_1, ..., X_{N-1})` and indices with shape
    :math:`(M, Y_0, ..., Y_{K-1})` output has shape
    :math:`(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})`. If :math:`M == N`, output
    shape is simply :math:`(Y_0, ..., Y_{K-1})`.
    
    The forward of :func:`~nnabla.functions.gather_nd` is equivalent to:
    
    .. code-block:: python
    
      def gather_nd(data, index):
          import numpy as np
          tmp_index = index.reshape(index.shape[0], -1)
          tmp_index = (idx + (Ellipsis,) for idx in zip(*new_index))
          out_shape = index.shape[1:] + data.shape[index.shape[0]:]
          return np.vstack(data[idx] for idx in tmp_index).reshape(*out_shape)
    
    Examples:
    
    >>> import numpy as np, nnabla as nn, nnabla.functions as F
    >>> nn.set_auto_forward(True)
    >>> data = F.arange(1, 11).reshape([2, 5])
    >>> print(data.d)
    [[ 1.  2.  3.  4.  5.]
     [ 6.  7.  8.  9. 10.]]
    >>> F.gather_nd(data, [[1, 1, 0]]).shape
    (3, 5)
    >>> F.gather_nd(data, [[1, 1, 0], [0, 1, 0]]).shape
    (3,)
    >>> print(F.gather_nd(data, [[1, 1, 0], [0, 1, 0]]).d)
    [6. 7. 1.]
    >>> print(F.gather_nd(data, [[1, 1, 0]]).d)
    [[ 6.  7.  8.  9. 10.]
     [ 6.  7.  8.  9. 10.]
     [ 1.  2.  3.  4.  5.]]
    
    When `indices` is provided as a :obj:`~nnabla.Variable` it will be
    possible to change the actual index values after function creation.
    It is important to note that out-of-bound indices raise error when
    running on CPU but are ignored when using an accelerated computation
    context.
    
    >>> indices = nn.Variable((2, 1))
    >>> indices.d = [[0], [0]]
    >>> y = F.gather_nd(data, indices)
    >>> print(y.d)
    [1.]
    >>> indices.d = [[1], [4]]
    >>> y.forward()
    >>> print(y.d)
    [10.]
    
    Args:
        x(~nnabla.Variable): N-D array input data
        indices(~nnabla.Variable): N-D array indices

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.GatherNd(ctx)(x, indices, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def bool_gather(ctx, input, mask, n_outputs=-1, outputs=None):
    r"""Gather from the input data according to the mask. 
    
    Given an input of :math:`(B_1, \ldots, B_N, D_1, \ldots, D_M)` shape and mask of :math:`(B_1, \ldots, B_N)` shape, the function returns an output of :math:`(nnz, D_1, \ldots, D_M)` shape and :math:`nnz` is the number of non-zero elements in mask.
    
    .. code-block:: python
    
      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
    
      nn.set_auto_forward(True)
    
      input = nn.Variable.from_numpy_array([[1, 2], [3, 4], [5, 6]])
      mask = nn.Variable.from_numpy_array([1, 0, 1])
      output = F.bool_gather(input, mask)
      
      print(output.d) # [[1, 2], [5, 6]]
    
    
    Note that this function is normally used with the dynamic graph 
    since this function outputs a variable-length output. If used with the static graph, 
    a network has to be constructed all time in iteration.
    
    Args:
        input(~nnabla.Variable): Data from which to gather.
        mask(~nnabla.Variable): Mask with which to gather. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask.

    Returns:
        ~nnabla.Variable: Gathered output.
    """
    return F.BoolGather(ctx)(input, mask, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def scatter_nd(ctx, data, indices, out=None, shape=None, add=False, n_outputs=-1, outputs=None):
    r"""
    Scatter `data` into a new array of given `shape` according to `indices`.
    This operation is the inverse of :func:`~nnabla.functions.gather_nd`.
    
    The forward of :func:`~nnabla.functions.scatter_nd` is equivalent to:
    
    .. code-block:: python
    
      def scatter_nd(data, indices, shape):
          import numpy as np
          if isinstance(indices, np.ndarray)
              indices = indices.tolist()
          result = np.zeros(shape, dtype=data.dtype)
          result[indices] = data
          return result
    
    Examples:
    
    >>> import numpy as np, nnabla as nn, nnabla.functions as F
    >>> nn.set_auto_forward(True)
    >>> data = nn.Variable.from_numpy_array(np.array([9, 10, 11, 12]))
    >>> indices = nn.Variable.from_numpy_array(np.array([[4, 3, 1, 7]]))
    >>> scattered = F.scatter_nd(data, indices, shape=(8,))
    >>> print(scatterd.d)
    [ 0. 11.  0. 10.  9.  0.  0. 12.]
    >>> print(F.gather_nd(scattered, indices).d)
    [ 9. 10. 11. 12.]
    
    Args:
        data(~nnabla.Variable): N-D array input data.
        indices(~nnabla.Variable): N-D array scatter indices.
        out(~nnabla.Variable): existing output array
            [optional]
        shape(repeated int64): Shape of output variable.
            [default= `None` ]
        add(bool): Add the input data to the same destination specified by the indices.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array of given `shape`.
    """
    inputs = [data, indices]
    if out is not None:
        inputs += [out]
    return F.ScatterNd(ctx, shape, add)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def scatter_add(ctx, x0, indices, x1, axis=None, n_outputs=-1, outputs=None):
    r"""
    Add all values from `x1` into the `x0` according to index specified by `indices`.
    This function adds `x1` into the copy of `x0` and outputs the copy.
    The original `x0` will not be changed.
    `x0`, `indices` and `x1` must have same number of dimensions.
    
    The forward of :func:`~nnabla.functions.scatter_add` is equivalent to:
    
    .. code-block:: python
    
      def scatter_add(x0, indices, x1, axis):
          # Assuming each input is 3 dimensional
          import numpy as np
          output = np.copy(x0)
          for i in range(indices.shape[0]):
              for j in range(indices.shape[1]):
                  for k in range(indices.shape[2]):
                      if axis == 0:
                          output[indices[i][j][k]][j][k] += x1[i][j][k]
                      elif axis == 1:
                          output[i][indices[i][j][k]][k] += x1[i][j][k]
                      elif axis == 2:
                          output[i][j][indices[i][j][k]] += x1[i][j][k]
          return output
    
    Args:
        x0(~nnabla.Variable): N-D array which the data is added to its copy.
        indices(~nnabla.Variable): N-D array scatter indices. The size of each dimension must be equal or smaller than that of x0 except for the specified axis. The value of indices must be smaller than the size of specified axis' dimension of x0. The size of each dimension must be equal or smaller than that of x1. Indices must not be negative.
        x1(~nnabla.Variable): N-D array which is scattered and added to x0.
        axis(int): Axis along which to index. The axis must not exceed the inputs' dimension.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: N-D array which contains the result of scatter addition. The shape is same as x0.
    """
    if axis is None:
        axis = 0
    return F.ScatterAdd(ctx, axis)(x0, indices, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def bool_scatter(ctx, input, mask, output=None, n_outputs=-1, outputs=None):
    r"""Scatter the `input` according to the `mask`.
    
    Given an input of :math:`(nnz, D_1, \ldots, D_M)` shape and mask of :math:`(B_1, \ldots, B_N)` shape, the function returns an output :math:`(B_1, \ldots, B_N, D_1, \ldots, D_M)` and :math:`nnz` is the number of non-zero elements in the mask.
    
    .. code-block:: python
    
      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
    
      nn.set_auto_forward(True)
    
      input0 = nn.Variable.from_numpy_array([[1, 2], [3, 4], [5, 6]])
      mask = nn.Variable.from_numpy_array([1, 0, 1])
      output0 = F.bool_gather(input0, mask)
      
      input1 = output0 + 10
      output1 = F.bool_scatter(input1, mask)
      
      print(output1.d)  # [[11, 12], [0, 0], [15, 16]] 
    
    Note that the higher-order gradients of this function relies on F.gather, thus 
    the higher-order gradients of this function is normally used with the dynamic graph.
    
    Args:
        input(~nnabla.Variable): Data to be scattered.
        mask(~nnabla.Variable): Mask with which to scatter. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask.
        output(~nnabla.Variable): Destination of output. If specified, data are inplaced.
            [optional]

    Returns:
        ~nnabla.Variable: Scattered output.
    """
    inputs = [input, mask]
    if output is not None:
        inputs += [output]
    return F.BoolScatter(ctx)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def bool_fill(ctx, data, mask, value=0, n_outputs=-1, outputs=None):
    r"""Fill the data with the value to according to the mask.
    
    .. code-block:: python
    
      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
    
      nn.set_auto_forward(True)
    
      input = nn.Variable.from_numpy_array([[np.inf, 2], [3, np.nan]])
      mask = nn.Variable.from_numpy_array([[1, 0], [0, 1]])
      output = F.bool_fill(input, mask, -1)
      
      print(output.d)  # [[-1, 2], [3, -1]]
    
    Args:
        data(~nnabla.Variable): Data to be filled.
        mask(~nnabla.Variable): Mask with which to fill. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask.
        value(float): Value to fill.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: Filled output.
    """
    return F.BoolFill(ctx, value)(data, mask, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def pack_padded_sequence(ctx, padded_sequence, lengths, batch_first=False, n_outputs=-1, outputs=None):
    r"""Pack a padded variable-length sequences.
    
    This method packs a padded variable-length sequences.
    
    :math:`T_i` is the length of the :math:`i`-th Variable in the sequences.
    :math:`B` is the batch size equal to the length of the sequences.
    :math:`T` is the max of :math:`T_i` for all :math:`i`.
    :math:`*` is the remaining dimensions including none.
    
    .. note::
      This function assumes the length-sorted padded sequence in the decreasing order
      and must be used by :func:`~nnabla.utils.rnn.pack_padded_sequence` in the dynamic computation mode.
      See :
    
    Args:
        padded_sequence(~nnabla.Variable): Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape.
            
        lengths(~nnabla.Variable): Sequence length for each batch and always resides in CPU.
        batch_first(bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False,
            otherwise (:math:`B`, :math:`T`, :math:`*`).
            
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Packed sequence of (:math:`N`, :math:`*`) shape.
        ~nnabla.Variable: Batch size for each time and always resides in CPU.
    """
    return F.PackPaddedSequence(ctx, batch_first)(padded_sequence, lengths, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def pad_packed_sequence(ctx, packed_sequence, batch_sizes, batch_first=False, padding_value=None, total_length=None, n_outputs=-1, outputs=None):
    r"""Pad packed sequence.
    
    This method unpacks the packed sequqnce and pad it, the inverse operation of :func:`pack_padded_sequence`.
    
    :math:`T_i` is the length of the :math:`i`-th Variable in the sequences.
    :math:`B` is the batch size equal to the length of the sequences.
    :math:`T` is the max of :math:`T_i` for all :math:`i`.
    :math:`*` is the remaining dimensions including none.
    
    .. note::
      This function assumes the output of the length-sorted padded sequence in the decreasing order
      and must be used by :func:`~nnabla.utils.rnn.pad_packed_sequence` in the dynamic computation mode.
    
    Args:
        packed_sequence(~nnabla.Variable): Packed sequence of (:math:`N`, :math:`*`) shape.
            
        batch_sizes(~nnabla.Variable): Batch size for each time and always resides in CPU.
        batch_first(bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False,
            otherwise (:math:`B`, :math:`T`, :math:`*`).
            
            [default= `False` ]
        padding_value(float): Padding value.
            [default= `0.0` ]
        total_length(int): If not None, the outputs are padded up to the `total_length`.
            If the `total_length` is less than the max length in the `sequences`,
            the error is thrown.
            
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape.
        ~nnabla.Variable: Sequence length for each batch and always resides in CPU.
    """
    if padding_value is None:
        padding_value = 0.0
    if total_length is None:
        total_length = -1
    return F.PadPackedSequence(ctx, batch_first, padding_value, total_length)(packed_sequence, batch_sizes, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def nonzero(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Find indices of non-zero elements.
    
    NonZero behaves similar to NonZero Operator in ONNX.
    
    Examples:
    
    >>> import numpy as np, nnabla as nn, nnabla.functions as F
    >>> nn.set_auto_forward(True)
    >>> x = F.arange(1, 10).reshape([3, 3])
    >>> x.d[0, 1] = x.d[1, 2] = x.d[2, 2] = 0
    >>> print(x.d)
    [[1. 0. 3.],
     [4. 5. 0.],
     [7. 8. 0.]]
    >>> y = F.nonzero(x)
    >>> print(y.shape)
    (2, 6)
    >>> print(y.d)
    [[0 0 1 1 2 2],
     [0 2 0 1 0 1]]
    
    Note that this function is normally used with the dynamic graph 
    since this function outputs a variable-length output. If used with 
    the static graph, a network has to be constructed all time in iteration.
    
    Args:
        x(~nnabla.Variable): N-D arrays.

    Returns:
        ~nnabla.Variable: N-D array indices.
    """
    return F.NonZero(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def interpolate(ctx, x, output_size, mode, align_corners=True, half_pixel=False, half_pixel_for_nn=False, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    Resize an ND array with interpolation.
    
    The last ``len(output_size)`` dimensions of the input ``x`` are considered as the spatial dimensions to be resized.
    
    Args:
        x(~nnabla.Variable): N-D array.
        output_size(repeated int64): Output size.
        mode(string): Interpolation mode chosen from ('nearest'|'linear').
        align_corners(bool): If true, the corner pixels of input and output arrays are aligned, such that the output corner pixels have the same values with the input corner pixels. The default is ``None``, and it becomes `True` if mode is 'linear', otherwise `False`.
            [default= `True` ]
        half_pixel(bool): If true, in the coordinate transformation, 0.5 is added to the output coordinate and 0.5 is subtracted from the input coordinate after scaling.
            [default= `False` ]
        half_pixel_for_nn(bool): This is a special argument to support the backward-compatibility of the nearest neighbor interpolation. Default is `False`. When in ``True``, the implementation of nearest neighbor interpolation is the old one.
            [default= `False` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.Interpolate(ctx, output_size, mode, align_corners, half_pixel, half_pixel_for_nn, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def onnx_resize(ctx, x, roi=(), scales=(), sizes=(), mode='nearest', coordinate_transformation_mode='half_pixel', cubic_coeff_a=None, exclude_outside=None, extrapolation_value=None, nearest_mode='round_prefer_floor', n_outputs=-1, outputs=None):
    r"""
    Resize an ND array with interpolation. This function provides a 
    compatible interface to ONNX Resize.
    
    References:
        * `ONNX Operators documentation.
        <https://github.com/onnx/onnx/blob/main/docs/Operators.md>`
    
    Args:
        x(~nnabla.Variable): N-D array.
        roi(repeated float): RoIs for tf_crop_and_resize.
            [default= `()` ]
        scales(repeated float): Scale factors along axes.
            [default= `()` ]
        sizes(repeated int64): Output size.
            [default= `()` ]
        mode(string): Interpolation mode chosen from ('nearest'|'linear'|'cubic').
            [default= `'nearest'` ]
        coordinate_transformation_mode(string): How to transform the coordinate in the resized tensor to the coordinate in the original tensor. This mode is chosen from ('half_pixel'|'pytorch_half_pixel'|'align_corners'|'asymmetric'|'tf_crop_and_resize').
            [default= `'half_pixel'` ]
        cubic_coeff_a(float): The coefficient used in cubic interpolation.
            [default= `-0.75` ]
        exclude_outside(int): Whether to set coefficients to zero when sampling locations is outside the input tensor.
            [default= `0` ]
        extrapolation_value(float): An extrapolation value used when a sampling location is outside the input tensor at tf_crop_and_resize mode.
            [default= `0.0` ]
        nearest_mode(string): Rounding mode for nearest-neighbor interpolation.
            [default= `'round_prefer_floor'` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if cubic_coeff_a is None:
        cubic_coeff_a = -0.75
    if exclude_outside is None:
        exclude_outside = 0
    if extrapolation_value is None:
        extrapolation_value = 0.0
    return F.ONNXResize(ctx, roi, scales, sizes, mode, coordinate_transformation_mode, cubic_coeff_a, exclude_outside, extrapolation_value, nearest_mode)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def fft(ctx, x, signal_ndim, normalized=False, n_outputs=-1, outputs=None):
    r"""
    Complex-to-complex Discrete Fourier Transform,
    
    .. math::
    
      X_{k_1, \ldots, k_d} = \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x_{n_1, \ldots, n_d} \exp\left(-2 \pi j \left( \sum_{i=0}^{d} \frac{k_i n_i}{N_i} \right) \right),
    
    where
    
    .. math::
    
      k_i = 0, \ldots, N_i - 1.
    
    This function now supports 1-D, 2-D, and 3-D DFT with or without the leading batch dimension(s).
    
    The input is expected to be complex-valued with at least signal_ndim + 1 dimensions.
    The last dimension has a shape of two where x[..., 0] is the real part and x[..., 1] the imaginary part.
    
    Example:
    
    .. code-block:: python
    
      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
      from nnabla.ext_utils import get_extension_context
    
      ctx = get_extension_context("cudnn")
      nn.set_default_context(ctx)
    
      # Example for a batched 2D-FFT and 2D-IFFT (batch-size: 2, data-size: 4x3)
      x_data = np.random.rand(2, 4, 3) + 1j * np.random.rand(2, 4, 3)
      x = nn.Variable.from_numpy_array(np.stack([np.real(x_data), np.imag(x_data)], axis=3))
      y = F.fft(x, signal_ndim=2, normalized=True)
      z = F.ifft(y, signal_ndim=2, normalized=True)
      z.forward()
    
      np.allclose(z.d[..., 0] + 1j*z.d[...,1], x_data)
    
    
    Args:
        x(~nnabla.Variable): Input.
        signal_ndim(int): The number of dimensions for each signal. It must be 1, 2, or 3.
        normalized(bool): Use unitary normalization. If `True`, the normalization constant :math:`\sqrt{\frac{1}{\prod_{i=1}^{d} N_i}}` is multiplied.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: FFT transformed signal.
    """
    return F.FFT(ctx, signal_ndim, normalized)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def ifft(ctx, x, signal_ndim, normalized=False, n_outputs=-1, outputs=None):
    r"""
    Complex-to-complex inverse Discrete Fourier Transform,
    
    .. math::
    
      X_{k_1, \ldots, k_d} = \frac{1}{\prod_{i=1}^{d} N_i} \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x_{n_1, \ldots, n_d} \exp\left(2 \pi j \left( \sum_{i=0}^{d} \frac{k_i n_i}{N_i} \right) \right),
    
    where
    
    .. math::
    
      k_i = 0, \ldots, N_i - 1.
    
    This function now supports 1-D, 2-D, and 3-D DFT with or without the leading batch dimension(s).
    
    The input is expected to be complex-valued with at least signal_ndim + 1 dimensions.
    The last dimension has a shape of two where x[..., 0] is the real part and x[..., 1] the imaginary part.
    
    
    Args:
        x(~nnabla.Variable): Input.
        signal_ndim(int): The number of dimensions for each signal. It must be 1, 2, or 3.
        normalized(bool): Use unitary normalization. If `True`, the normalization constant :math:`\frac{1}{\prod_{i=1}^{d} N_i}` becomes :math:`\sqrt{\frac{1}{\prod_{i=1}^{d} N_i}}`.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: IFFT transformed signal.
    """
    return F.IFFT(ctx, signal_ndim, normalized)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def stft(ctx, x, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect', as_istft_backward=False, n_outputs=-1, outputs=None):
    r"""
    Short-time Fourier transform.
    
    
    Args:
        x(~nnabla.Variable): Time domain sequence of size `batch_size x sample_size`.
        window_size(int): Size of STFT analysis window.
        stride(int): Number of samples that we shift the window, also called `hop size`.
        fft_size(int): Size of the FFT, the output will have `fft_size // 2+ 1` frequency bins.
        window_type(string): Analysis window, can be either `hanning`, `hamming` or `rectangular`.
            [default= `'hanning'` ]
        center(bool): If `True`, then the signal `x` is padded by half the FFT size using reflection padding.
            [default= `True` ]
        pad_mode(string): Padding mode, which can be `'constant'` or `'reflect'`. `'constant'` pads with `0`.
            [default= `'reflect'` ]
        as_istft_backward(bool): If `True`, then forward execution behaves as backward execution of ISTFT, treating input `x` as output gradient of ISTFT and outputs `y_r` and `y_i` as inputs gradient of ISTFT. This option is only used in nn.grad operator.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Real part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`.
        ~nnabla.Variable: Imaginary part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`.
    """
    return F.STFT(ctx, window_size, stride, fft_size, window_type, center, pad_mode, as_istft_backward)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def istft(ctx, y_r, y_i, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect', as_stft_backward=False, n_outputs=-1, outputs=None):
    r"""
    Inverse short-time Fourier transform.
    
    .. note::
      We use a constant square inverse window for the reconstruction of the time-domain signal, therefore, the first and last `window_size - stride` are not perfectly reconstructed.
    
    
    Args:
        y_r(~nnabla.Variable): Real part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`.
        y_i(~nnabla.Variable): Imaginary part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`.
        window_size(int): Size of STFT analysis window.
        stride(int): Number of samples that we shift the window, also called `hop size`.
        fft_size(int): Size of the FFT, the output will have `fft_size // 2+ 1` frequency bins.
        window_type(string): Analysis window, can be either `hanning`, `hamming` or `rectangular`.
            [default= `'hanning'` ]
        center(bool): If `True`, then the signal `x` is padded by half the FFT size using reflection padding.
            [default= `True` ]
        pad_mode(string): Padding mode corresponding to STFT `pad_mode`, which can be `'constant'` or `'reflect'`. `'constant'` pads with `0`. This option is ignored for the normal use of ISTFT. You need to set the same `pad_mode` only when `as_stft_backward == True`.
            [default= `'reflect'` ]
        as_stft_backward(bool): If `True`, then forward execution behaves as backward execution of STFT, treating inputs `y_r` and `y_i` as outputs gradient of STFT and output `x` as input gradient of STFT. This option is only used in nn.grad operator.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Time domain sequence of size `batch_size x sample_size`.
    """
    return F.ISTFT(ctx, window_size, stride, fft_size, window_type, center, pad_mode, as_stft_backward)(y_r, y_i, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def dropout(ctx, x, p=0.5, seed=-1, n_outputs=-1, outputs=None):
    r"""
    Dropout.
    Samples a number :math:`u` from a uniform distribution in :math:`[0, 1]` ,
    and ignores the input if :math:`u \leq p`.
    
    .. math::
        y = \left\{
        \begin{array}{ll}
          \frac{x}{1 - p} & (u > p) \\
          0 & ({\rm otherwise})
        \end{array} \right.
    
    Note:
        Usually dropout only applied during training as below
        (except `MC dropout`_). If you want to use dropout as an MC dropout, remove 'if train:'.
    
        .. code-block:: python
    
            h = PF.affine(x, num_hidden)
            if train:
                h = F.dropout(h, 0.5)
    
    .. _MC dropout: https://arxiv.org/abs/1506.02142
    
    Args:
        x(~nnabla.Variable): N-D array
        p(float): :math:`p` in definition.
            [default= `0.5` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Dropout(ctx, p, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def top_k_data(ctx, x, k, abs=False, reduce=True, base_axis=1, largest=True, with_index=False, n_outputs=-1, outputs=None):
    r"""
    Select the `k` largest values from each sample in `x` to
    propagate unmodified and set all other values to 0. If `abs` is
    True, the `k` largest values are selected by magnitude. If
    `reduce` is True (the default), all feature dimensions are
    reduced to a single dimension of size `k` that propagates only
    the `k` largest values. Otherwise, if `reduce` is False, input
    and output dimensions are identical. Dimensions before
    `base_axis` are treated as number of sample dimensions and `k`
    values get selected from all elements of a sample (dimensions
    from `base_axis`) regardless of shape.
    
    >>> import nnabla as nn, nnabla.functions as F
    >>> x = nn.Variable((4, 5, 6))
    >>> F.top_k_data(x, 3, reduce=False).shape
    (4, 5, 6)
    >>> F.top_k_data(x, 3, reduce=True).shape
    (4, 3)
    >>> F.top_k_data(x, 3, reduce=True, base_axis=2).shape
    (4, 5, 3)
    
    Args:
        x(~nnabla.Variable): N-D array
        k(int): Number of largest data values to propagate.
        abs(bool): Determine largest data values by magnitude.
            [default= `False` ]
        reduce(bool): Reduce feature size to one dimension of size `k`.
            [default= `True` ]
        base_axis(int): First dimension of the sample shape.
            [default= `1` ]
        largest(bool): Whether to select the `k` largest or smallest values.
            [default= `True` ]
        with_index(bool): Return top-k values and indices.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array.
        ~nnabla.Variable: N-D array of top-k indices.
    """
    return F.TopKData(ctx, k, abs, reduce, base_axis, largest, with_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def top_k_grad(ctx, x, k, abs=False, base_axis=1, n_outputs=-1, outputs=None):
    r"""
    Select the `k` largest gradients for each sample in `x` to
    back-propagate unmodified and set all other gradients to 0. If
    `abs` is True, the `k` largest gradients are selected by
    magnitude. Dimensions before `base_axis` are treated as number
    of sample dimensions and `k` gradients get selected from all
    gradients of a sample (dimensions from `base_axis`) regardless
    of shape.
    
    Args:
        x(~nnabla.Variable): N-D array
        k(int): Number of largest gradients to propagate.
        abs(bool): Determine largest gradients by magnitude.
            [default= `False` ]
        base_axis(int): First dimension of the sample shape.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: N-D array with same shape and data as `x`.
    """
    return F.TopKGrad(ctx, k, abs, base_axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def rand(ctx, low=0, high=1, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples numbers from a uniform distribution :math:`x \sim U(low, high)`
    given lowest value :math:`low`, upper bound :math:`high`,
    and shape of the returned Variable.
    
    Args:
        low(float): :math:`low` in definition.
            [default= `0` ]
        high(float): :math:`high` in definition.
            [default= `1` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument.
    """
    return F.Rand(ctx, low, high, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def randint(ctx, low=0, high=1, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples integer numbers from a uniform distribution :math:`x \sim U(low, high)`
    given lowest value :math:`low`, upper bound :math:`high`, and the shape of the returned Variable. The lowest
    value :math:`low` is included in the range, while the upper bound :math:`high` is excluded, corresponding to the half-open
    interval :math:`[low, high)`.
    
    Args:
        low(int): :math:`low` in definition.
            [default= `0` ]
        high(int): :math:`high` in definition.
            [default= `1` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument. The dtype is int32.
    """
    return F.Randint(ctx, low, high, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def randn(ctx, mu=0, sigma=1, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples numbers from a normal distribution :math:`x \sim N(\mu, \sigma)`
    given mean :math:`\mu`, standard deviation :math:`\sigma`,
    and shape of the returned Variable.
    
    Args:
        mu(float): :math:`\mu` in definition.
            [default= `0` ]
        sigma(float): :math:`\sigma` in definition.
            [default= `1` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument.
    """
    return F.Randn(ctx, mu, sigma, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def rand_binomial(ctx, n=1, p=0.5, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples numbers from a binomial distribution :math:`x \sim B(n, p)`
    given the numbers of trials :math:`n`, probability :math:`p`,
    and shape of the returned Variable.
    When :math:`n = 1`, this behaves like the Bernoulli distriburion.
    
    Args:
        n(int): :math:`n` in definition, the number of trials.
            [default= `1` ]
        p(float): :math:`p` in definition, probability of success.
            [default= `0.5` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument.
    """
    return F.RandBinomial(ctx, n, p, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def rand_beta(ctx, alpha=0.5, beta=0.5, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples numbers from a beta distribution :math:`x \sim \beta(\alpha, \beta)`.
    
    Args:
        alpha(float): :math:`\alpha`, scale parameter.
            [default= `0.5` ]
        beta(float): :math:`\beta`, scale parameter.
            [default= `0.5` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument.
    """
    return F.RandBeta(ctx, alpha, beta, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def rand_gamma(ctx, k=0.5, theta=1, shape=[], seed=-1, n_outputs=-1, outputs=None):
    r"""
    Samples numbers from a gamma distribution :math:`x \sim \frac {\gamma(k, \frac {x}{\theta})}{\Gamma(k)}`.
    
    Args:
        k(float): k, scale parameter.
            [default= `0.5` ]
        theta(float): :math:`\theta`, scale parameter.
            [default= `1` ]
        shape(:obj:`tuple` of :obj:`int`): Shape of returned variable.
            [default= `[]` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Variable with the shape specified in the argument.
    """
    return F.RandGamma(ctx, k, theta, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def random_choice(ctx, x, w, shape=[], replace=True, seed=-1, n_outputs=-1, outputs=None):
    r"""
    Generate random samples from population `x` with selection probabilities
    determined by the relative weights `w`. The number of samples to draw is
    given by the product of `shape`\s dimensions, and the samples are returned
    with the given `shape`. By default, samples are drawn with replacement,
    i.e. selection of a specific population member is solely determined by
    its associated weight. Sampling without replacement, where any population
    member may be drawn only once, is used if `replace` is set to False.
    
    For both `x` and `w` the innermost dimension corresponds to the individual
    populations and their weights from which samples are returned with the
    requested `shape` following all outermost dimensions of the input.
    
    .. code-block:: python
    
      import nnabla as nn
      import nnabla.functions as F
      import numpy as np
      nn.set_auto_forward(True)
    
      # x holds two populations
      x = nn.Variable.from_numpy_array(np.array([[11, 22, 33], [110, 220, 330]]))
      # w holds the weights for each population
      w = nn.Variable.from_numpy_array(np.array([[10, 20, 70], [70, 20, 10]]))
    
      # draw one sample from each population
      y = F.random_choice(x, w)  # y.shape => (2, 1)
    
      # draw 12 samples with shape (3, 4) from each population
      y = F.random_choice(x, w, shape=(3, 4))  # y.shape => (2, 3, 4)
    
    Note that weights must not be less than zero and for each population the
    sum of weights must be greater than zero. Additionally, sampling without
    replacement requires that the number of non-zero weights is not less than
    the number of samples to be drawn. These conditions are verified in "cpu"
    computation context but not when using "cuda" or "cudnn" acceleration
    (this would require additional device synchronization steps penalizing
    performance).
    
    Random sampling from an implicit array of index values (like categorical
    or multinomial) can be realized with input `x` constructed as indices.
    
    .. code-block:: python
    
      w = nn.Variable.from_numpy_array(np.array([1, 2, 3, 2, 1]))
      y = F.random_choice(F.arange(0, 5), w)
    
    Args:
        x(~nnabla.Variable): N-D array from which a random sample is generated.
        w(~nnabla.Variable): N-D array of associated weights of elements in `x`.
        shape(:obj:`tuple` of :obj:`int`): Number and shape of generated samples.
            [default= `[]` ]
        replace(bool): Whether sampling is with or without replacement.
            [default= `True` ]
        seed(int): Random seed.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.RandomChoice(ctx, shape, replace, seed)(x, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def random_crop(ctx, x, shape=None, base_axis=1, seed=-1, n_outputs=-1, outputs=None):
    r"""
    RandomCrop randomly extracts a portion of an array.
    
    Args:
        x(~nnabla.Variable): N-D array
        shape(:obj:`tuple` of :obj:`int`): The data size to extract. For example, to randomly extract a portion of the image (3,48,48) from a 3,64,64 image, specify (3,48,48).
            [default= `x.shape` ]
        base_axis(int): No Description
            [default= `1` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if shape is None:
        shape = x.shape
    return F.RandomCrop(ctx, shape, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def random_flip(ctx, x, axes=None, base_axis=1, seed=-1, n_outputs=-1, outputs=None):
    r"""
    Reverses the order of elements of the specified dimension of an array at 50% probability.
    
    Args:
        x(~nnabla.Variable): N-D array
        axes(repeated int64): The index of the axis to reverse the order of the elements. Axis indices take on values 0, 1, 2, and so on from the left. For example, to flip a 32 (W) by 24 (H) 100 RGB images (100, 3,24,32) vertically and horizontally at random, specify (2,3).
            [default= `[len(x.shape) - 1]` ]
        base_axis(int): No Description
            [default= `1` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    if axes is None:
        axes = [len(x.shape) - 1]
    return F.RandomFlip(ctx, axes, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def random_shift(ctx, x, shifts=None, border_mode='nearest', constant_value=0, base_axis=1, seed=-1, n_outputs=-1, outputs=None):
    r"""
    Randomly shifts the array elements within the specified range.
    
    Args:
        x(~nnabla.Variable): N-D array.
        shifts(repeated int64): Max absolute amount to shift elements. For example, to shift image data horizontally by :math:`\pm 2` pixels and vertically by :math:`\pm 3` pixels, specify (3,2).
            [default= `(0,) * len(x.shape)` ]
        border_mode(string): Specify how to process the ends of arrays whose values will be undetermined as a result of shifting. nearest: The data at the ends of the   original array is copied and used. reflect: Original data reflected at   the ends of the original array is used. constant: Constant value is used.
            [default= `'nearest'` ]
        constant_value(float): Value used for outside of the original array if border_mode='constant'.
            [default= `0` ]
        base_axis(int): No Description
            [default= `1` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if shifts is None:
        shifts = (0,) * len(x.shape)
    return F.RandomShift(ctx, shifts, border_mode, constant_value, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def random_erase(ctx, x, prob=0.5, area_ratios=(0.02, 0.4), aspect_ratios=(0.3, 3.3333), replacements=(0.0, 255.0), n=None, share=True, inplace=False, base_axis=1, seed=-1, channel_last=False, ste_fine_grained=True, n_outputs=-1, outputs=None):
    r"""Randomly erase patches of the inputs and replace with random values.
    
    Erasing is applied for each sample and for each `n` with the given probability, the randomly
    selected area ratio and aspect ratio if `share` is `True`;
    otherwise (`share`=`False`), for each feature additionally.
    
    Random patch are selected by random coordinates as the following,
    
    .. math::
    
      S_e &&= Uniform(s_l, s_h) \times S \\
      r_e &&= Uniform(r_l, r_h) \\
      H_e &&= \sqrt{S_e \times r_e} \\
      W_e &&= \sqrt{S_e / r_e} \\
      y_e &&= Uniform(0, H - H_e) \\
      x_e &&= Uniform(0, W - W_e),
    
    where :math:`S` is the area, :math:`s_l` and :math:`s_h` are the low and high values of
    the area ratio range, :math:`r_l` and :math:`r_h` are the low and high values
    of the aspect ratio range, :math:`H_e` and :math:`W_e` are height and width of a patch,
    and :math:`y_e` and :math:`x_e` are the start coordinates of a patch. If a pixel of the inputs
    falls in this patch, the value of that pixel is replaced with a random value in `replacements`
    range.
    
    Backward is implemented as passing gradients if `ste_fine_grained` is False; otherwise,
    the backward only occurs in regions not erased.
    
    References:
    
      * `Zhun Zhong, Liang Zheng, Guoliang Kang, Shaozi Li, Yi Yang,
        Random Erasing Data Augmentation,
        <https://arxiv.org/abs/1708.04896>`_
    
    Args:
        x(~nnabla.Variable): N-D array.
        prob(float): Probability to erase.
            [default= `0.5` ]
        area_ratios(repeated float): Low and high of the area ratio range.
            [default= `(0.02, 0.4)` ]
        aspect_ratios(repeated float): Low and high of the aspect ratios range.
            [default= `(0.3, 3.3333)` ]
        replacements(repeated float): Low and high of the replacement value range.
            [default= `(0.0, 255.0)` ]
        n(int): Max number of patches to be erased.
            [default= `1` ]
        share(bool): Use a same bounding box randomly picked over the feature dimension when being True. Default is True.
            [default= `True` ]
        inplace(bool): This option is obsolete and ignored. Output is never in-placed with input.
            [default= `False` ]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]
        ste_fine_grained(bool): Straight Through Estimator is fine-grained or not. Default is True.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if n is None:
        n = 1
    return F.RandomErase(ctx, prob, area_ratios, aspect_ratios, replacements, n, share, inplace, base_axis, seed, channel_last, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def image_augmentation(ctx, x, shape=None, pad=(0, 0), min_scale=1.0, max_scale=1.0, angle=0.0, aspect_ratio=1.0, distortion=0.0, flip_lr=False, flip_ud=False, brightness=0.0, brightness_each=False, contrast=1.0, contrast_center=0.0, contrast_each=False, noise=0.0, seed=-1, n_outputs=-1, outputs=None):
    r"""
    ImageAugmentation randomly alters the input image.
    
    Args:
        x(~nnabla.Variable): N-D array.
        shape(:obj:`tuple` of :obj:`int`): The output image data size.
            [default= `x.shape` ]
        pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension.
            [default= `(0, 0)` ]
        min_scale(float): The minimum scale ratio when randomly scaling the image. For example, to scale down to 0.8 times the size of the original image, specify "0.8". To not apply random scaling, set both min_scale and max_scale to "1.0".
            [default= `1.0` ]
        max_scale(float): The maximum scale ratio when randomly scaling the image. For example, to scale down to 2 times the size of the original image, specify "2.0".
            [default= `1.0` ]
        angle(float): The rotation angle range in radians when randomly rotating the image. The image is randomly rotated in the -Angle to +Angle range. For example, to rotate in a +-15 degree range, specify "0.26" (15 degrees/360 degrees * 2PI). To not apply random rotation, specify "0.0".
            [default= `0.0` ]
        aspect_ratio(float): The aspect ratio range when randomly deforming the image. For example, to deform aspect ratio of image from 1:1.3 to 1.3:1, specify "1.3". To not apply random deforming, specify "1.0".
            [default= `1.0` ]
        distortion(float): The distortion range when randomly distorting the image. To not apply distortion, specify "0.0".
            [default= `0.0` ]
        flip_lr(bool): Whether to randomly flip the image horizontally at 50% probability.
            [default= `False` ]
        flip_ud(bool): Whether to randomly flip the image vertically at 50% probability.
            [default= `False` ]
        brightness(float): The absolute range of values to randomly add to the brightness. A random value in the -Brightness to +Brightness range is added to the brightness. For example, to vary the brightness in the -0.05 to +0.05 range, specify "0.05". To not apply random addition to brightness, specify "0.0".
            [default= `0.0` ]
        brightness_each(bool): Whether to apply the random addition to brightness (as specified by brightness) to each color channel. True: brightness is added based on a different random number for each channel. False: brightness is added based on a random number common to all channels.
            [default= `False` ]
        contrast(float): The range in which to randomly vary the image contrast. The contrast is varied in the 1/Contrast times to Contrast times range. The output brightness is equal to (input - contrast_center) * contrast + contrast_center. For example, to vary the contrast in the 0.91 times to 1.1 times range, specify "1.1". To not apply random contrast variation, specify "1.0".
            [default= `1.0` ]
        contrast_center(float): Intensity center used for applying contrast.
            [default= `0.0` ]
        contrast_each(bool): Whether to apply the random contrast variation (as specified by contrast) to each color channel. True: contrast is varied based on a different random number for each channel. False: contrast is varied based on a random number common to all channels.
            [default= `False` ]
        noise(float): Sigma of normal random number to be added.
            [default= `0.0` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    if shape is None:
        shape = x.shape
    return F.ImageAugmentation(ctx, shape, pad, min_scale, max_scale, angle, aspect_ratio, distortion, flip_lr, flip_ud, brightness, brightness_each, contrast, contrast_center, contrast_each, noise, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sigmoid_cross_entropy(ctx, x, target, n_outputs=-1, outputs=None):
    r"""
    Element-wise cross entropy between `x` and the target variables, passed to a sigmoid function.
    
    .. math::
        y_i = - \left(x^{(1)}_i \ln \left(\sigma \left(x^{(0)}_i \right)\right) + \
        \left(1 - x^{(1)}_i\right) \ln \left(1 - \sigma \left(x^{(0)}_i \
        \right)\right)\right)
    
    where :math:`\sigma(s)=\frac{1}{1+\exp(-s)}`.
    
    Note:
        SigmoidCrossEntropy is equivalent to Sigmoid+BinaryCrossEntropy, but computing them at once has the effect of reducing computational error.
    
    Args:
        x(~nnabla.Variable): N-D array. Typically indicates a score. The value lies in :math:`[-\infty, \infty]`
            [parameter]
        target(~nnabla.Variable): N-D array of labels. Only 0 or 1 value is allowed.
            [parameter]

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses.
    """
    return F.SigmoidCrossEntropy(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_cross_entropy(ctx, x, target, n_outputs=-1, outputs=None):
    r"""
    Element-wise cross entropy between `x` and the target variables.
    
    .. math::
        y_i = - \left(x^{(1)}_i * \ln \left(x^{(0)}_i\right) + \left(1 - \
        x^{(1)}_i\right) * \ln \left(1 - x^{(0)}_i\right)\right).
    
    Args:
        x(~nnabla.Variable): Probabilities N-D array. :math:`-\infty` to :math:`\infty`.
        target(~nnabla.Variable): N-D array of labels. Usually set as 0 or 1, but, unlike SigmoidCrossEntropy, it allows probability (0 to 1) as inputs and backpropagation can be done.

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses.
    """
    return F.BinaryCrossEntropy(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def softmax_cross_entropy(ctx, x, target, axis=None, n_outputs=-1, outputs=None):
    r"""
    Element-wise cross entropy between the variables and the variables of a label given by a category index with Softmax normalization.
    
    .. math::
        y_{j} = -\ln \left(\frac{\exp(x_{j,t_j})}{\sum_{i'} \exp(x_{j,i'})}\right)
    
    along dimension specified by axis (:math:`i` is the axis where normalization is performed on).
    
    Note:
        SoftmaxCrossEntropy is equivalent to Softmax+CategoricalCrossEntropy, but computing them at once has the effect of reducing computational error.
    
    Args:
        x(~nnabla.Variable): N-D array. Typically indicates a score. :math:`(D_1 \times ... \times D_i \times ... \times D_N)`
            [parameter]
        target(~nnabla.Variable): N-D array of labels. :math:`(D_1 \times ... \times 1 \times ... \times D_N)` , each label should be the index from 0 to n-class, -1 if not belongs any class.
            [parameter]
        axis(int): Axis normalization is taken.
            [default= `len(x.shape) - 1` ]

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses. :math:`(D_1 \times ... \times 1 \times ... \times D_N)`
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.SoftmaxCrossEntropy(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def categorical_cross_entropy(ctx, x, target, axis=None, n_outputs=-1, outputs=None):
    r"""
    Element-wise cross entropy between `x` and the target `t` where targets are given by a category index.
    
    .. math::
        y_{j} = -\ln \left( x_{j, t_j} \right)
    
    along dimension specified by axis (:math:`i` is the axis where normalization is performed on).
    
    Args:
        x(~nnabla.Variable): N-D array. Typically indicates a score. :math:`(D_1 \times ... \times D_i \times ... \times D_N)`
            [parameter]
        target(~nnabla.Variable): N-D array of labels. :math:`(D_1 \times ... \times 1 \times ... \times D_N)`, each label should be the index from 0 to n-class, -1 if not belongs any class.
            [parameter]
        axis(int): Axis normalization is taken.
            [default= `len(x.shape) - 1` ]

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses. :math:`(D_1 \times ... \times 1 \times ... \times D_N)`
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.CategoricalCrossEntropy(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def squared_error(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element-wise squared error
    
    .. math::
        y_i = \left(x^{(0)}_i - x^{(1)}_i\right)^2.
    
    Args:
        x0(~nnabla.Variable): N-D array.
        x1(~nnabla.Variable): N-D array.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.SquaredError(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def absolute_error(ctx, x0, x1, n_outputs=-1, outputs=None):
    r"""
    Element-wise absolute error
    
    .. math::
        y_i = | x^{(0)}_i - x^{(1)}_i |.
    
    Args:
        x0(~nnabla.Variable): N-D array.
        x1(~nnabla.Variable): N-D array.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.AbsoluteError(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def huber_loss(ctx, x0, x1, delta=1.0, n_outputs=-1, outputs=None):
    r"""
    Element-wise Huber loss
    
    .. math::
        y_i= \left\{
        \begin{array}{ll}
          d^2 & (|d| < \delta)\\
          \delta (2 |d| - \delta) & ({\rm otherwise})
        \end{array} \right.
    
    where :math:`d = x^{(0)}_i - x^{(1)}_i`
    
    Args:
        x0(~nnabla.Variable): N-D array.
        x1(~nnabla.Variable): N-D array.
        delta(float): Delta
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses.
    """
    return F.HuberLoss(ctx, delta)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def epsilon_insensitive_loss(ctx, x0, x1, epsilon, n_outputs=-1, outputs=None):
    r"""
    Element-wise Epsilon Insensitive Loss
    
    .. math::
        y_i= \left\{
        \begin{array}{ll}
          | x^{(0)}_i - x^{(1)}_i | - \epsilon & if \ \ | x^{(0)}_i - x^{(1)}_i | > \epsilon \\
    			0 & otherwise
        \end{array} \right.
    
    Args:
        x0(~nnabla.Variable): N-D array.
        x1(~nnabla.Variable): N-D array.
        epsilon(float): Insensitive parameter.

    Returns:
        ~nnabla.Variable: N-D array of element-wise losses.
    """
    return F.EpsilonInsensitiveLoss(ctx, epsilon)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def kl_multinomial(ctx, p, q, base_axis=1, n_outputs=-1, outputs=None):
    r"""
    The Kullback Leibler Divergence for multinomial distributions.
    
    .. math::
        D = \sum_i p_i \log \left( \frac{p_i}{q_i} \right)
    
    Args:
        p(~nnabla.Variable): N-D array of the source categorical probabilities
        q(~nnabla.Variable): N-D array of the target categorical probabilities
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: Kullback Leibler divergence :math:`KL(p \parallel q)`.
    """
    return F.KLMultinomial(ctx, base_axis)(p, q, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def affine_grid(ctx, theta, size, align_corners=False, n_outputs=-1, outputs=None):
    r"""Generate the source grid based on the normalized target grid with `size`.
    The target grid is first normalized in [-1, 1], then
    tranformed by the affine transformation :math:`\theta` to generate
    the source grid. 2D and 3D grid are supported now.
    
    This function is normally used with the `warp_by_grid` function for
    constructing the spatial transformer.
    
    Args:
        theta(~nnabla.Variable): N-D array with the shape (:math:`B \times 2 \times 3`), the sample-wise affine transformation matrix.
        size(repeated int64): The grid size of (:math:`H \times W`) for 2D and (:math:`D \times H \times W`) for 3D.
        align_corners(bool): If `True`, the top-left and bottom-right pixels correspond to (-1, -1) and (1, 1) respectively since a pixel is located on the corner of a grid, and the target grid is normalized in [-1, 1].
            If `False`, the normalized target grid in [-1, 1] is scaled by `size - 1 / size` according to the respective spatial size (e.g., :math:`H` and :math:`W`) before the transformation since a pixel is located on a center of a cell in a grid.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: N-D array with the shape (:math:`B \times H \times W \times 2`) for 2D and (:math:`B \times D \times H \times W \times 3`) for 3D. The last dimension of 2 is for (x, y) and of 3 for (x, y, z). The `gird` is used as the source grid for the warping.
    """
    return F.AffineGrid(ctx, size, align_corners)(theta, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def warp_by_grid(ctx, x, grid, mode='linear', padding_mode='zero', align_corners=False, channel_last=False, n_outputs=-1, outputs=None):
    r"""Warp the input data by the grid.
    This function is normally used with the generated normalized grid by
    the `affine_grid` function for constructing the spatial transformer.
    
    Args:
        x(~nnabla.Variable): Input data to be warped with the shape (:math:`B \times C \times H_{in} \times W_{in}`) for 2D and (:math:`B \times C \times D_{in} \times H_{in} \times W_{in}`) for 3D.
        grid(~nnabla.Variable): Grid warping the input data with the shape (:math:`B \times H_{out} \times W_{out} \times 2`) for 2D and (:math:`B \times D_{out} \times H_{out} \times W_{out} \times 3`) for 3D. The last dimension of 2 is for (x, y) or 3 for (x, y, z).
        mode(string): Interpolation mode, linear or nearest.
            [default= `'linear'` ]
        padding_mode(string): Padding mode when the grid value is outside [-1, 1]. If this is "zero", 0 is used for padding. "reflect" uses the values reflected at the ends of the original input data like the mirror. "repeat" used the values at the ends of the original input data.
            [default= `'zero'` ]
        align_corners(bool): The target grid normalized in [-1, 1] is scaled by `size - 1 / size` according to the respective spatial size (e.g., :math:`H` and :math:`W`) before the transformation if this is `False`. If this is `True`, the top-left and bottom-right pixels correspond to (-1, -1) and (1, 1) respectively.
            [default= `False` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Output data warped by the grid.
    """
    return F.WarpByGrid(ctx, mode, padding_mode, align_corners, channel_last)(x, grid, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def warp_by_flow(ctx, data, flow, n_outputs=-1, outputs=None):
    r"""
    Transform the image(s) *data* by *flow* field(s) of offset vectors such
    that each output pixel corresponds to the input image pixel at the
    relative offset location given by horizontal and vertical flow values
    (in other words, the flow field describes the coordinate displacements
    for each output pixel to the corresponding input pixel). Both *data* and
    *flow* are 4-D variables (in "NCHW" layout) with identical shape except
    the *flow* channel dimension (which is always 2).
    
    .. math::
        output_{n,c,y,x} = data_{n,c,y',x'},
    
    where
    
    .. math::
        y' &=& y + flow_{n,1,y,x}, \\
        x' &=& x + flow_{n,0,y,x}.
    
    The output pixel values at :math:`y'` and :math:`x'` locations are
    obtained by bilinear interpolating between the 4 closest pixels of the
    input image. Pixel values outside of the input image are implicitly
    padded with the value of the closest boundary pixel.
    
    Args:
        data(~nnabla.Variable): Input image data with shape `(N, Channels, Height, Width)`.
        flow(~nnabla.Variable): Flow field vectors with shape `(N, 2, Height, Width)`.

    Returns:
        ~nnabla.Variable: Transformed image data with shape `(N, Channels, Height, Width)`.
    """
    return F.WarpByFlow(ctx)(data, flow, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_sigmoid(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise binary sigmoid function. In the forward pass, it computes
    
    .. math::
        f(x) = \begin{cases}
            1 & (x > 0) \\
            0 & ({\rm otherwise})\end{cases},
    
    but in the backward pass, a straight-through approximation of the gradient
    is used, i.e.,
    
    .. math::
        \frac{\partial f(x)}{\partial x} =
        \begin{cases}
            0 & (|x| \geq 1) \\
            \frac{1}{2} & ({\rm otherwise})
        \end{cases}.
    
    References:
    
        * `Courbariaux, Matthieu, and Yoshua Bengio. Binarynet: Training deep
          neural networks with weights and activations constrained to+ 1 or-1.
          <https://arxiv.org/abs/1602.02830>`_
    
    
    Args:
        x(~nnabla.Variable): Input .

    Returns:
        ~nnabla.Variable: Output.
    """
    return F.BinarySigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_tanh(ctx, x, n_outputs=-1, outputs=None):
    r"""
    Element-wise binary tanh function. In the forward pass, it computes
    
    .. math::
        f(x) = \begin{cases}
            1 & (x > 0) \\
            -1 & ({\rm otherwise})
        \end{cases},
    
    but in the backward pass, a straight-through approximation of the gradient
    is used, i.e.,
    
    .. math::
        \frac{\partial f(x)}{\partial x} =
        \begin{cases}
            0 & (|x| \geq 1) \\
            1 & ({\rm otherwise}) \end{cases}.
    
    References:
    
        * `Courbariaux, Matthieu, and Yoshua Bengio. Binarynet: Training deep
          neural networks with weights and activations constrained to+ 1 or-1.
          <https://arxiv.org/abs/1602.02830>`_
    
    Args:
        x(~nnabla.Variable): Input .

    Returns:
        ~nnabla.Variable: Output.
    """
    return F.BinaryTanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_connect_affine(ctx, x, weight, binary_weight, bias=None, base_axis=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None):
    r"""
    This function provides a BinaryConnect affine layer. It computes in
    the forward pass
    
    .. math::
    
        y_j = \sum_{i} sign(w_{j,i}) x_i,
    
    i.e., the weights :math:`w_{j,i}` are binarized to :math:`sign(w_{j,i})` and,
    hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the
    inner product computations do not require any multiplications anymore as
    they turn into additions/subtractions.
    
    This function should be used together with
    :meth:`~nnabla.functions.batch_normalization`.
    
    .. note::
    
        1) If you would like to share the binary weights between other
        layers, please use the standard, floating value weights (`weight`)
        and not the binary weights (`binary_weight`).
    
        2) The weights and the binary weights become in sync only after a call to
        :meth:`~nnabla.Variable.forward`, and not after a call to
        :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of
        the network, remember to call :meth:`~nnabla.Variable.forward`, once before
        doing so, otherwise the weights and the binary weights will not be in sync.
    
        3) CPU and GPU implementations now use floating values for `binary_weight`,
        since this function is for simulation purposes.
    
    References:
    
        * `M. Courbariaux, Y. Bengio, and J.-P. David. BinaryConnect:
          Training Deep Neural Networks with binary weights during propagations.
          <https://arxiv.org/abs/1511.00363>`_
    
    Args:
        x(~nnabla.Variable): Input .
        weight(~nnabla.Variable): Weight .
            [parameter]
        binary_weight(~nnabla.Variable): Binarized weight .
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        quantize_zero_to(float): Input value at zero is quantized to this value.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: Output.
    """
    inputs = [x, weight, binary_weight]
    if bias is not None:
        inputs += [bias]
    return F.BinaryConnectAffine(ctx, base_axis, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_connect_convolution(ctx, x, weight, binary_weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None):
    r"""
    This function provides a BinaryConnect convolution layer. It computes in
    the forward pass
    
    .. math::
    
        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j},
    
    i.e., the weights :math:`w_{n, m, i, j}` are binarized to
    :math:`sign(w_{n, m, i, j})` and, hence,
    each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the
    inner product computations do not require any multiplications anymore as
    they turn into additions/subtractions.
    
    This function should be used together with :meth:`~nnabla.functions.batch_normalization`.
    
    Reference
    
        * `M. Courbariaux, Y. Bengio, and J.-P. David. BinaryConnect:
          Training Deep Neural Networks with binary weights during propagations.
          <https://arxiv.org/abs/1511.00363>`_
    
    
    .. note::
    
        1) If you would like to share the binary weights between other
        layers, please use the standard, floating value weights (`weight`)
        and not the binary weights (`binary_weight`).
    
        2) The weights and the binary weights become in sync only after a call to
        :meth:`~nnabla.Variable.forward`, and not after a call to
        :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of
        the network, remember to call :meth:`~nnabla.Variable.forward`, once before
        doing so, otherwise the weights and the binary weights will not be in sync.
    
        3) CPU and GPU implementations now use floating values for `binary_weight`,
        since this function is for simulation purposes.
    
    Args:
        x(~nnabla.Variable): Input.
        weight(~nnabla.Variable): Weight.
            [parameter]
        binary_weight(~nnabla.Variable): Binarized weight.
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        quantize_zero_to(float): Input value at zero is quantized to this value.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: Output
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight, binary_weight]
    if bias is not None:
        inputs += [bias]
    return F.BinaryConnectConvolution(ctx, base_axis, pad, stride, dilation, group, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_weight_affine(ctx, x, weight, binary_weight, alpha, bias=None, base_axis=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None):
    r"""
    This function provides a Binary Weight Network affine layer. It computes in
    the forward pass
    
    .. math::
    
        y_j = \frac{1}{\|\mathbf{w}_j\|_{\ell_1}} \sum_{i} sign(w_{j,i}) x_i
    
    i.e., the weights :math:`w_{j,i}` are binarized to :math:`sign(w_{j,i})` and,
    hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the
    inner product computations turn into additions/subtractions which are followed
    by multiplication with the scaling factor
    :math:`\alpha_j = \frac{1}{\|\mathbf{w}_j\|_{\ell_1}}`.
    
    Reference
    
        * `Rastegari, Mohammad, et al. XNOR-Net: ImageNet Classification Using
          Binary Convolutional Neural Networks.
          <https://arxiv.org/abs/1603.05279>`_
    
    .. note::
    
        1) If you would like to share the binary weights with other layers, please
        use the standard, floating value weights (`weight`) and not the binary
        weights (`binary_weight`).
    
        2) The weights and the binary weights become in sync only after a call to
        :meth:`~nnabla.Variable.forward`, and not after a call to
        :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of
        the network, remember to call :meth:`~nnabla.Variable.forward`, once before
        doing so, otherwise the weights and the binary weights will not be in sync.
    
        3) CPU and GPU implementations now use floating values for `binary_weight`,
        since this function is for simulation purposes.
    
    Args:
        x(~nnabla.Variable): Input .
        weight(~nnabla.Variable): Weight.
            [parameter]
        binary_weight(~nnabla.Variable): Binarized weight.
            [parameter]
        alpha(~nnabla.Variable): Alpha.
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        quantize_zero_to(float): Input value at zero is quantized to this value.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: Output.
    """
    inputs = [x, weight, binary_weight, alpha]
    if bias is not None:
        inputs += [bias]
    return F.BinaryWeightAffine(ctx, base_axis, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_weight_convolution(ctx, x, weight, binary_weight, alpha, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None):
    r"""
    This function provides a Binary Weight Network convolution layer. It computes in
    the forward pass
    
    .. math::
    
        y_{n, a, b} = \frac{1}{\|\mathbf{w}_n\|_{\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.
    
    i.e., the weights :math:`w_{n, m, i, j}` are binarized to
    :math:`sign(w_{n, m, i, j})` and, hence, each weight is in :math:`\{-1,\,1\}`.
    By this weight binarization, the inner product computations turn into
    additions/subtractions which are followed by multiplication with the scaling
    factor :math:`\alpha_n = \frac{1}{\|\mathbf{w}_n\|_{\ell_1}}`.
    
    Reference
    
        * `Rastegari, Mohammad, et al. XNOR-Net: ImageNet Classification Using
          Binary Convolutional Neural Networks.
          <https://arxiv.org/abs/1603.05279>`_
    
    .. note::
    
        1) If you would like to share the binary weights between other standard layers, please
        use the standard, floating value weights (`weight`)
        and not the binary weights (`binary_weight`).
    
        2) The weights and the binary weights become in sync only after a call to
        :meth:`~nnabla.Variable.forward`, and not after a call to
        :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of
        the network, remember to call :meth:`~nnabla.Variable.forward`, once
        before doing so, otherwise the weights and the binary weights will not be
        in sync.
    
        3) CPU and GPU implementations now use floating values for `binary_weight`,
        since this function is for simulation purposes.
    
    Args:
        x(~nnabla.Variable): Input.
        weight(~nnabla.Variable): Weight.
            [parameter]
        binary_weight(~nnabla.Variable): Binarized weight.
            [parameter]
        alpha(~nnabla.Variable): Alpha.
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        quantize_zero_to(float): Input value at zero is quantized to this value.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: Output
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight, binary_weight, alpha]
    if bias is not None:
        inputs += [bias]
    return F.BinaryWeightConvolution(ctx, base_axis, pad, stride, dilation, group, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def inq_affine(ctx, x, weight, indicator_fixedweights, bias=None, base_axis=1, num_bits=4, inq_iterations=(), selection_algorithm='largest_abs', seed=-1, n_outputs=-1, outputs=None):
    r"""
    This function provides a INQ affine layer. It computes in
    the forward pass
    
    .. math::
    
        y_j = \sum_{i} w_{j,i} x_i,
    
    where the weights :math:`w_{j,i}` are quantized sequentially during
    training to power-of-two numbers. In the backward pass, only the non-fixed
    (i.e., learnable) weights are updated.
    
    References:
    
        * `Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
          Towards lossless CNNs with low-precision weights.
          <https://arxiv.org/abs/1702.03044>`_
    
    Args:
        x(~nnabla.Variable): Input .
        weight(~nnabla.Variable): Weight .
            [parameter]
        indicator_fixedweights(~nnabla.Variable): Indicates which weights are already fixed (0 = not fixed, 1 = fixed) .
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        num_bits(int): Number of bits per weight. Needs to be >= 2 as two bits are used to code `zero` and sign of weight.
            [default= `4` ]
        inq_iterations(repeated int64): List which specifies after how many forward passes we fix 50% of the learnable weights. If we have done as many iterations as specified in the last element of `inq_iterations`, then all weights are fixed.
            [default= `()` ]
        selection_algorithm(string): Chooses algorithm that we use for selecting the weights to fix ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
            [default= `'largest_abs'` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Output.
    """
    inputs = [x, weight, indicator_fixedweights]
    if bias is not None:
        inputs += [bias]
    return F.INQAffine(ctx, base_axis, num_bits, inq_iterations, selection_algorithm, seed)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def inq_convolution(ctx, x, weight, indicator_fixedweights, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, num_bits=4, inq_iterations=(), selection_algorithm='largest_abs', seed=-1, n_outputs=-1, outputs=None):
    r"""
    This function provides a INQ convolution layer. It computes in
    the forward pass
    
    .. math::
    
        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} w_{n, m, i, j} x_{m, a + i, b + j},
    
    where the weights :math:`w_{j,i}` are quantized sequentially during
    training to power-of-two numbers. In the backward pass, only the non-fixed
    (i.e., learnable) weights are updated.
    
    Reference
    
        * `Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
          Towards lossless CNNs with low-precision weights.
          <https://arxiv.org/abs/1702.03044>`_
    
    Args:
        x(~nnabla.Variable): Input.
        weight(~nnabla.Variable): Weight.
            [parameter]
        indicator_fixedweights(~nnabla.Variable): Indicates which weights are already fixed (0 = not fixed, 1 = fixed) .
            [parameter]
        bias(~nnabla.Variable): Bias.
            [optional][parameter]
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
            [default= `(0,) * (len(x.shape) - (base_axis+1))` ]
        stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
            [default= `(1,) * (len(x.shape) - (base_axis+1))` ]
        group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction.
            [default= `1` ]
        num_bits(int): Number of bits per weight. Needs to be >= 2 as two bits are used to code `zero` and sign of weight.
            [default= `4` ]
        inq_iterations(repeated int64): List which specifies after how many forward passes we fix 50% of the learnable weights. If we have done as many iterations as specified in the last element of `inq_iterations`, then all weights are fixed.
            [default= `()` ]
        selection_algorithm(string): Chooses algorithm that we use for selecting the weights to fix ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
            [default= `'largest_abs'` ]
        seed(int): Random seed. When -1, seed is sampled from global random number generator.
            [default= `-1` ]

    Returns:
        ~nnabla.Variable: Output
    """
    if pad is None:
        pad = (0,) * (len(x.shape) - (base_axis+1))
    if stride is None:
        stride = (1,) * (len(x.shape) - (base_axis+1))
    if dilation is None:
        dilation = (1,) * (len(x.shape) - (base_axis+1))
    inputs = [x, weight, indicator_fixedweights]
    if bias is not None:
        inputs += [bias]
    return F.INQConvolution(ctx, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def fixed_point_quantize(ctx, x, sign=True, n=8, delta=0.0625, ste_fine_grained=True, n_outputs=-1, outputs=None):
    r"""This function simulates to uniformly quantize values in fixed-point number representation.
    
    In the forward pass,
    
    .. math::
    
       q_i= \left\{
    	   \begin{array}{ll}
    			max & if \ \ \ x_i > max \\
    		  sign(x_i) \times floor(|x_i| \delta^{-1} + 2^{-1}) \times \delta & if \ \ min \le x_i \le max \\
    	  	min & if \ \ x_i < min \\
    	   \end{array} \right.,
    
    where :math:`\delta` is the step size,
    :math:`(min, max) :=(- (2^{n-1} - 1)\delta, (2^{n-1} - 1)\delta)` if :math:`sign` is true,
    :math:`(min, max) := (0, (2^n - 1) \delta)` otherwise, and
    :math:`n` is the total bit-width used.
    
    In the backward pass when using `ste_fine_grained` as false,
    
    .. math::
    
       \frac{\partial q_i}{\partial x_i} = 1.
    
    In the backward pass when using `ste_fine_grained` as true,
    
    .. math::
    
       \frac{\partial q_i}{\partial x_i}= \left\{
    	   \begin{array}{ll}
    			0 & if \ \ \ x_i > max \\
    		  1 & if \ \ min \le x_i \le max \\
    	  	0 & if \ \ x_i < min \\
    	   \end{array} \right..
    
    .. note::
    
    
    	Quantized values are stored as floating point number, since this function is for simulation purposes.
    
    
    Args:
        x(~nnabla.Variable): N-D array
        sign(bool): Indicate the signed number or the unsigned number. Default is true.
            [default= `True` ]
        n(int): Bit width used. Note that `sign` consumes one bit. :math:`n-1` is used for number representation in `signed` case.
            [default= `8` ]
        delta(float): Step size.
            [default= `0.0625` ]
        ste_fine_grained(bool): Straight Through Estimator is fine-grained or not.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.FixedPointQuantize(ctx, sign, n, delta, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def min_max_quantize(ctx, x, qr_min, qr_max, ql_min, ql_max, decay=0.999, x_min_max=False, ema=False, ste_fine_grained=True, eps=0.01, n_outputs=-1, outputs=None):
    r"""This function simulates to uniformly quantize values in the range of min and max quantization levels.
    
    Min-max quantization is defined as the following equation
    
    .. math::
    
        y = round \left(\frac{\min(\max(x, m), M) - m}{scale} \right) \times scale + m,
    
    where the :math:`scale` is defined as
    
    .. math::
    
        scale = \frac{M - m}{M_q - m_q},
    
    and
    
    .. math::
    
        m_q = ql_{min}, \\
        M_q = ql_{max}, \\
        m = qr_{min}, \\
        M = qr_{max}.
    
    In the backward pass when using `ste_fine_grained` as false,
    
        .. math::
    
          \frac{\partial q_i}{\partial x_i} = 1.
    
    
    In the backward pass when using `ste_fine_grained` as true,
    
        .. math::
    
           \frac{\partial q_i}{\partial x_i}= \left\{
         \begin{array}{ll}
           0 & if \ \ \ x_i > M \\
           1 & if \ \ m \le x_i \le M \\
           0 & if \ \ x_i < m \\
         \end{array} \right..
    
    :math:`qr_{min}` and :math:`qr_{max}` are treaded as follows.
    
        * `x_min_max` is `True` and `ema` is `True`:
          Exponential moving average are computed for each :math:`min(x)` and :math:`max(x)`
          then stored in :math:`qr_{min}` and :math:`qr_{max}`.
        * `x_min_max` is `True` and `ema` is `False`:
          :math:`min(x)` and :math:`max(x)` are computed then stored in :math:`qr_{min}` and :math:`qr_{max}`.
        * `x_min_max` is `False` and `ema` is `True`:
          Exponential moving average stored in :math:`qr_{min}` and :math:`qr_{max}` are used.
        * `x_min_max` is `False` and `ema` is `False`
          Gradients of :math:`qr_{min}` and :math:`qr_{max}` are computed in the backward pass.
    
    More precisely, in inference of the min-max quantization, one has to consider *zero-point (zp)*
    which corresponds
    to the real value 0, and its data type is an integer. *zero-point* is defined as
    
        .. math::
    
           && zp_f = ql_{min} -\frac{qr_{min}}{scale}, \\
           && zp = \left\{
         \begin{array}{ll}
           ql_{max} & if \ \ \ zp_f >= ql_{max} \\
           round(zp_f) & if \ \ otherwise \\
           ql_{min}  & if \ \ zp_f <= ql_{min} \\
         \end{array} \right..
    
    Accordingly, in order to simulate quantization effect of *zero-point*,
    during both forward and backward pass, :math:`qr_{min}` and :math:`qr_{max}` are adjusted as follows,
    
        .. math::
    
           qr_{min}^{adj} = ql_{min} - zp * scale, \\
           qr_{max}^{adj} = ql_{max} - zp * scale.
    
    These operations are often called *nudge*.
    
    Finally, in the formulas of the min-max quantization, :math:`m` and :math:`M` are replaced by
    :math:`qr_{min}^{adj}` and :math:`qr_{max}^{adj}` respectively.
    
    .. note::
    
    	Quantized values are stored as floating point number, since this function is for simulation purposes.
    
    
    Args:
        x(~nnabla.Variable): N-D array innput.
        qr_min(~nnabla.Variable): Minimum value for the quantization range, modified during forward execution when x_min_max is True.
        qr_max(~nnabla.Variable): Maximum value for the quantization range, modified during forward execution when x_min_max is True.
        ql_min(~nnabla.Variable): Minimum value for the quantization level, typically 0.
        ql_max(~nnabla.Variable): Maximum value for the quantization level, typically 255.
        decay(float): Decay rate for the exponential moving average.
            [default= `0.999` ]
        x_min_max(bool): Use the min and max of x to compute quantization ranges.
            [default= `False` ]
        ema(bool): Use the exponential moving average for the min and max quantization ranges.
            [default= `False` ]
        ste_fine_grained(bool): Straight Through Estimator is fine-grained or not.
            [default= `True` ]
        eps(float): Epsilon, or small value to ensure :math:`qr_{max} - qr_{min}` must be greater than the epsilon.
            [default= `0.01` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.MinMaxQuantize(ctx, decay, x_min_max, ema, ste_fine_grained, eps)(x, qr_min, qr_max, ql_min, ql_max, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def pow2_quantize(ctx, x, sign=True, with_zero=True, n=8, m=1, ste_fine_grained=True, n_outputs=-1, outputs=None):
    r"""
    This function simulates to quantize values in the power of 2 number representation,
    in other words, it is linear (uniform) quantization in :math:`log_2` domain.
    
    In the forward pass of `signed` case,
    
    .. math::
    
       q_i= \left\{
    	   \begin{array}{ll}
    			max_{+} & if \ \ \overline{q_i} > max_{+} \\
    			\overline{q_i} & if \ \ min_{+} \le \overline{q_i} \le max_{+} \\
    		  min_{+} & if \ \ 0 \le \overline{q_i} < min_{+} \\
    		  min_{-} & if \ \ min_{-} < \overline{q_i} < 0 \\
    		  \overline{q_i} & if \ \ max_{-} \le \overline{q_i} \le min_{-}\\
    	  	max_{-} & if \ \ \overline{q_i} < max_{-} \\
    	   \end{array} \right.,
    
    where
    
    .. math::
    
       && max_{+} = 2^{m}, min_{+} = 2^{m - (2^{n-1} - 1)},\\
       && max_{-} = -2^{m}, min_{-} = -2^{m - (2^{n-1} - 1)},\\
       && \overline{q_i} = sign(x_i) \times 2^{round(\log_2 |x_i|)}.
    
    This quantization uses the geometric mean between two power-of-two numbers
    as quantization threshold.
    
    In the forward pass of `unsigned` case,
    
    .. math::
    
       q_i= \left\{
    	   \begin{array}{ll}
    			max & if \ \ \overline{q_i} > max \\
    			\overline{q_i} & if \ \ min \le \overline{q_i} \le max \\
    		  min & if \ \ 0 < \overline{q_i} < min \\
    	   \end{array} \right.,
    
    where
    
    .. math::
    
       && max = 2^{m}, min = 2^{m - (2^{n} - 1)},\\
       && \overline{q_i} = 2^{int(\log_2 |x_i|)}.
    
    
    When using `with_zero` as true, a pruning threshold is used to round an input to
    0 or :math:`min`. The pruning threshold is defined in this function as the following,
    
    .. math::
    
       pruning\ threshold = min \times 2^{-\frac{1}{2}}.
    
    If an absolute value of the input is lesser than this value, the input is rounded to 0, otherwise :math:`min`.
    
    In the backward pass when using ste_fine_grained as false,
    
    .. math::
    
       \frac{\partial q_i}{\partial x_i} = 1.
    
    In the backward pass when using ste_fine_grained as true,
    
    .. math::
    
       \frac{\partial q_i}{\partial x_i}= \left\{
    	   \begin{array}{ll}
    			0 & if \ \ \overline{q_i} > max_{+} \\
    			1 & if \ \ otherwise \\
    	  	0 & if \ \ \overline{q_i} < max_{-} \\
    	   \end{array} \right..
    
    
    There are some literatures using pow2 quantization in their proposed methods.
    
    References:
    
      * `Miyashita Daisuke, Lee H. Edward, Murmann Boris.
        Convolutional Neural Networks using Logarithmic Data Representation.
        <https://arxiv.org/abs/1603.01025>`_
    
      * `Aojun Zhou, Anbang Yao, Yiwen Guo, Lin Xu, Yurong Chen.
        Incremental Network Quantization: Towards Lossless CNNs with Low-precision Weights.
        <https://arxiv.org/abs/1702.03044>`_
    
    .. note::
    
    
    	Quantized values are stored as floating point number, since this function is for simulation purposes.
    
    
    Args:
        x(~nnabla.Variable): N-D array
        sign(bool): Indicate the signed number or the unsigned number. Default is true.
            [default= `True` ]
        with_zero(bool): Indicate using zero as a quantized value. Default is true. Note that `zero` consumes one bit.
            [default= `True` ]
        n(int): Bit width used, Note that `sign` consumes one bit. :math:`n-1` is used for number representation in `signed` case. Default is 8.
            [default= `8` ]
        m(int): :math:`2^m` is the upper bound of the dynamic range and :math:`-2^m` is the lower bound, :math:`m \in \mathcal{Z}`. Default is 1.
            [default= `1` ]
        ste_fine_grained(bool): Straight Through Estimator is fine-grained or not.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.Pow2Quantize(ctx, sign, with_zero, n, m, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def prune(ctx, x, rate=0.9, n_outputs=-1, outputs=None):
    r"""
    Prune the input as the following equation,
    
    .. math::
    
        q_i = \left \{
          \begin{array}{ll}
          0   & abs(x_i) < threshold \\
          x_i & otherwise
          \end{array}
          \right.
    
    where :math:`threshold` is determined by `threshold = np.sort(np.abs(x))[int((x.size - 1) * rate)]`.
    
    Args:
        x(~nnabla.Variable): N-D array
        rate(float): Sparse rate, or pruning rate.
            [default= `0.9` ]

    Returns:
        ~nnabla.Variable: N-D array with the same shape as x
    """
    return F.Prune(ctx, rate)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def quantize_linear(ctx, x, scale, zero_point, round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, dtype=1, n_outputs=-1, outputs=None):
    r"""Quantize linearly inputs with the scale and zero point.
    
    .. math::
    
        y = saturate(round(x / scale) + zero_point).
    
    :math:`saturate` rage is determined by `dtype` and :math:`round` mode is selected
    by `round_mode`. :math:`zero_point` is constrained by the `dtype` range and its values are
    rounded by `round_mode`.
    
    This function aligns with ONNX.
    
    Args:
        x(~nnabla.Variable): Input N-D array.
        scale(~nnabla.Variable): Scale N-D array. The values must be positive number.
        zero_point(~nnabla.Variable): Zero point N-D array.
        round_mode(string): Rounding mode. HALF_AWAY_FROM_ZERO or HALF_TO_EVEN.
            [default= `'HALF_AWAY_FROM_ZERO'` ]
        narrow_range(bool): If true, this function does not use the minimum quantized value. For example, if `dtype` is int8 (the range is in [-128, 127]), the output range is corrected in [-127, 127].
            [default= `False` ]
        dtype(int): Data type for the output. The int value is compatible to the enumtype for the data type defined in `the numpy <https://github.com/numpy/numpy/blob/master/numpy/core/include/numpy/ndarraytypes.h>`_.
            [default= `1` ]

    Returns:
        ~nnabla.Variable: Input N-D array.
    """
    return F.QuantizeLinear(ctx, round_mode, narrow_range, dtype)(x, scale, zero_point, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def dequantize_linear(ctx, x, scale, zero_point, n_outputs=-1, outputs=None):
    r"""Dequantize linearly inputs with the scale and zero point.
    
    .. math::
    
        y = (x - zero_point) * scale.
    
    :math:`zero_point` is constrained by the `dtype` range.
    
    This function aligns with ONNX.
    
    Args:
        x(~nnabla.Variable): Input N-D array.
        scale(~nnabla.Variable): Scale N-D array. The values must be positive number. This should be same as one used in QuantizeLinear.
        zero_point(~nnabla.Variable): Zero point N-D array. This should be same as one used in QuantizeLinear.

    Returns:
        ~nnabla.Variable: Input N-D array.
    """
    return F.DequantizeLinear(ctx)(x, scale, zero_point, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def top_n_error(ctx, x, target, axis=None, n=1, n_outputs=-1, outputs=None):
    r"""
    Top N error along the dimension specified by the axis, the element of outputs is
    
    .. math::
    
        y_i = \left \{
        \begin{array}{l}
        1 \ (x_i \ is \ not \ within \ N-th \ place) \\
        0 \ (x_i \ is \ within \ N-th \ place)
        \end{array}
        \right.
    
    Args:
        x(~nnabla.Variable): Probabilities N-D array. :math:`D_1 \times ... \times D_i \times ... \times D_N`
        target(~nnabla.Variable): N-D array of labels. :math:`D_1 \times ... \times 1 \times ... \times D_N`
        axis(int): Axis on which the top N error is calculated.
            [default= `len(x.shape) - 1` ]
        n(int): top N
            [default= `1` ]

    Returns:
        ~nnabla.Variable: Element-wise error N-D array. (:math:`D_1 \times ... \times 1 \times ... \times D_N`)
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.TopNError(ctx, axis, n)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def binary_error(ctx, x, target, n_outputs=-1, outputs=None):
    r"""
    Elementwise binary error.
    
    .. math::
        y_i = \left \{
        \begin{array}{l}
        0 ((x^{(0)} \geq 0.5) = (x^{(1)} \geq 0.5)) \\
        1 ((x^{(0)} \geq 0.5) \neq (x^{(1)} \geq 0.5))
        \end{array}
        \right.
    
    
    Args:
        x(~nnabla.Variable): Probabilities N-D array. :math:`-\infty` to :math:`\infty`.
        target(~nnabla.Variable): Labels N-D array. Usually set as 0 or 1, but, it allows probability (0 to 1) as inputs.

    Returns:
        ~nnabla.Variable: Element-wise errors N-D array.
    """
    return F.BinaryError(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def confusion_matrix(ctx, x, target, axis=None, n_outputs=-1, outputs=None):
    r"""
    Confusion matrix.
    The return value is already summed over samples.
    
    Args:
        x(~nnabla.Variable): Probabilities N-D array. (:math:`D_1 \times ... \times D_i \times ... \times D_N`)
        target(~nnabla.Variable): Labels N-D array. (:math:`D_1 \times ... \times 1 \times ... \times D_N`)
        axis(int): Axis on which the confusion matrix is calculated.
            [default= `len(x.shape) - 1` ]

    Returns:
        ~nnabla.Variable: Confusion matrix 2-D array. Col index is estimated class. Row index is label class.
    """
    if axis is None:
        axis = len(x.shape) - 1
    return F.ConfusionMatrix(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def vat_noise(ctx, x, w, base_axis=1, eps=1.0, n_outputs=-1, outputs=None):
    r"""
    Noise for virtual adversarial training.
    
    This layer is a special layer for GUI network designing, specialized for getting
    the noise of virtual adversarial training.
    
    In the backward process, the weight parameter will be replaced with the gradient.
    
    Forward
    
    .. math::
        y_i = \frac{\epsilon x_i}{\sqrt{\sum_k x_k^2 + c}}
    
    Backward
    
    .. math::
        \delta x_i = 0
    
    .. math::
        w_i = \epsilon \delta y_i
    
    Note:
        This layer is a special layer for GUI network designing.
    
    References:
        * `Miyato et.al, Distributional Smoothing with Virtual Adversarial Training.
          <https://arxiv.org/abs/1507.00677>`_
    
    Args:
        x(~nnabla.Variable): N-D array of noise input. Noise is standard Gaussian noise initially, but the next step, fed back gradient variable.
        w(~nnabla.Variable): N-D array for keep gradient values.
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
            [default= `1` ]
        eps(float): Noise norm (l2) factor.
            [default= `1.0` ]

    Returns:
        ~nnabla.Variable: N-D array
    """
    return F.VATNoise(ctx, base_axis, eps)(x, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def unlink(ctx, x, n_outputs=-1, outputs=None):
    r"""
    This function behaves as an identity function on the forward pass,
    and deletes the gradient for the background pass.
    
    This layer is a special layer for GUI network designing, used for getting
    zero backward operation by adding this layer.
    
    Forward
    
    .. math::
        y_i = x_i
    
    Backward
    
    .. math::
        \delta x_i = 0
    
    Note:
        This layer is a special layer for GUI network designing.
    
    Args:
        x(~nnabla.Variable): N-D array.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    return F.Unlink(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def sink(ctx, *x, **kw):
    r"""
    Creates a dummy variable used to call forward or backward function
    of multiple variables at one place.
    
    This takes any numbers of input variables with any shape,
    and creates a single 0-shape outputs.
    The forward pass does nothing. The backward pass set ones
    to the input grads if one_input_grad is set as true.
    
    Note:
        ``sink`` can only be called at the very end of the graph, and
        ``grad`` of input variables are cleared
         when ``y.backward(clear_buffer=True)`` is called.
    
    Args:
        *x(~nnabla.Variable): Any number of inputs with any shape.
            [variadic]
        one_input_grad(bool): Set grads of inputs as one during backward. It is useful to set false if you want to set external gradients to the input variables.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: Dummy variable.
    """
    assert len(x) >= 1, "sink must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    one_input_grad = kw.pop('one_input_grad', True)
    return F.Sink(ctx, one_input_grad)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



[docs]@function_api
def nms_detection2d(ctx, x, thresh=None, nms=None, nms_per_class=None, n_outputs=-1, outputs=None):
    r"""
    Non-Maximum Suppression (NMS) to 2D Object detector output.
    The input is a 3-dimensional tensor with shape of ``(B, N, 5 + C)``
    where ``B`` denotes batch size, ``N`` denotes the number of detection box
    candidates, and ``C`` denotes the number of classes of object detection.
    ``5 + C`` consists of the box coordinates ``x, y, w, h`` in normalized
    coordinates (size of each x and y are 1.0), objectness
    (learned to predict IoU value to ground truth box), and the class probabilities of ``C`` classes.
    It outputs a tensor with the same dimensions as the input, where all
    values are copied from the input to the output, except the class
    probabilities are multiplied by objectness, and possibly suppressed to 0
    by NMS.
    During NMS, all of combination of pairs of bounding boxes is compared.
    For each pair, the bounding box with a lower detection score
    (described below) is suppressed if the overlap ratio (the IoU)
    is greater than the value of ``nms``.
    
    There are two suppression modes for NMS.
    
    1. Suppress by class probability (``nms_per_class`` is ``True``):
    For each bounding box, the detection score is calculated by
    ``objectness * probability[class_id]`` for each class.
    The suppression is done for each class independently.
    
    2. Suppress by objectness (``nms_per_class`` is ``False``):
    The suppression is done for each bounding box using ``objectness``
    as a detection score. All class probabilities becomes 0 for
    every suppressed boxes.
    
    References:
        * `Joseph Redmon, Ali Farhadi, YOLO9000: Better, Faster, Stronger.
          <https://arxiv.org/abs/1612.08242>`_
    
    Args:
        x(~nnabla.Variable): A 3-dimensional array.
        thresh(float): Detection score threshold.
            [default= `0.5` ]
        nms(float): IoU threshold for Non-maximum suppression (NMS).
            [default= `0.45` ]
        nms_per_class(bool): If true, NMS is applied for each class.
            [default= `True` ]

    Returns:
        ~nnabla.Variable: A 3-dim array with the same dimensions with the input.
    """
    if thresh is None:
        thresh = 0.5
    if nms is None:
        nms = 0.45
    if nms_per_class is None:
        nms_per_class = True
    return F.NmsDetection2d(ctx, thresh, nms, nms_per_class)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def onnx_non_max_suppression(ctx, boxes, scores, center_point_box=None, max_output_boxes_per_class=None, iou_threshold=None, score_threshold=None, n_outputs=-1, outputs=None):
    r"""
    Non-Maximum Suppression (NMS) to 2D Object detector output. This function
    provides a ONNX-compatible interface of Non-Maximum Suppression.
    The first input is a 3-dimensional bounding box tensor with shape of
    ``(B, N, 4)`` where ``B`` denotes batch size and ``N`` denotes the 
    number of detection box candidates.
    ``4`` consists of the box coordinates ``y1, x1, y2, x2`` in normalized
    coordinates (size of each x and y are 1.0).
    The second input is a 3-dimensional score tensor with shape of
    ``(B, C, N)`` where ``C`` denotes the number of classes of object
    detection.
    It outputs the indices of the selected boxes as a tensor with shape of
    ``(M, 3)`` where ``M`` denotes the number of the selected boxes.
    ``3`` consists of 3-dimensional indices
    ``batch_index, class_index, box_index``.
    
    References:
        * `Joseph Redmon, Ali Farhadi, YOLO9000: Better, Faster, Stronger.
          <https://arxiv.org/abs/1612.08242>`_
        * `ONNX Operators documentation.
          <https://github.com/onnx/onnx/blob/main/docs/Operators.md>`
    
    Args:
        boxes(~nnabla.Variable): A 3-dimensional array.
        scores(~nnabla.Variable): A 3-dimensional array.
        center_point_box(int): Bounding box format (0 or 1).
            [default= `0` ]
        max_output_boxes_per_class(int): The maximum number of boxes selected per batch per class.
            [default= `0` ]
        iou_threshold(float): IoU threshold for Non-maximum suppression (NMS).
            [default= `0.0` ]
        score_threshold(float): Detection score threshold.
            [default= `0.0` ]

    Returns:
        ~nnabla.Variable: A 2-dimensional array.
    """
    if center_point_box is None:
        center_point_box = 0
    if max_output_boxes_per_class is None:
        max_output_boxes_per_class = 0
    if iou_threshold is None:
        iou_threshold = 0.0
    if score_threshold is None:
        score_threshold = 0.0
    return F.ONNXNonMaxSuppression(ctx, center_point_box, max_output_boxes_per_class, iou_threshold, score_threshold)(boxes, scores, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def max_pooling_backward(ctx, dy, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None):
    r"""
    Max pooling backward. This aims to support the n-th order gradients of 
    the max pooling. The document of this function must not be shown, and 
    the function must not be called in the end-user side.
    
    Args:
        dy(~nnabla.Variable): Input variable.
        x(~nnabla.Variable): Input variable.
        kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis.
        stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis.
            [default= `kernel` ]
        ignore_border(bool): If false, kernels covering borders are also considered for the output.
            [default= `True` ]
        pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension.
            [default= `(0,) * len(kernel)` ]
        channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a. NHWC order.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: Output
    """
    if stride is None:
        stride = kernel
    if pad is None:
        pad = (0,) * len(kernel)
    return F.MaxPoolingBackward(ctx, kernel, stride, ignore_border, pad, channel_last)(dy, x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def patch_correlation(ctx, x1, x2, patch=(1, 1), shift=(0, 0), patch_step=(1, 1), shift_step=(1, 1), padding=(0, 0, 0, 0), n_outputs=-1, outputs=None):
    r"""
      Multiplicative patch-wise comparision between inputs `x1` and `x2`, which
      must both be 4-dimensional NCHW (with `channel_last=False`) or NHWC (with
      `channel_last=True`) arrays (where *N* is the number of samples, *H* and
      *W* are the sample height and width and *C* is the number of channels).
      The function returns a 5-D array with shape :math:`(N, C_y, C_x, H_o, W_o)`
      where :math:`H_o, W_o` are determined by the possible patch locations within
      the, optionally padded, input image size and :math:`C_y, C_x` are determined
      by the optionally shifted patch positions.
    
      Mathmatically, the patch correlation is formulated as
    
      .. math::
    
        O(s_y, s_x, h_0, w_0) =
        \sum_{c} \sum_{k_h} \sum_{k_w} I_1(c, h + k_h, w + k_w) \times I_2(c, h + k_h + s_h, w + k_w + s_w),
    
      where :math:`I_1(c, h, w)` and :math:`I_2(c, h, w)` are the inputs at :math:`c`-th channel,
      :math:`h`-th height, and :math:`w`-th width, :math:`k_h, k_w` indices for the patch size
      and :math:`s_h, s_w` indices for the shifts.
    
      A single correlation value (per sample) is produced if the patch extends
      to the image dimensions and all other parameters use the default values.
    
      >>> import numpy as np, nnabla as nn, nnabla.functions as F
      >>> nn.set_auto_forward(True)
      >>> N, C, H, W = (1, 2, 3, 4)
      >>> x = nn.Variable.from_numpy_array(np.ones([N, C, H, W]))
      >>> F.patch_correlation(x, x, patch=(H, W)).d
      array([[[[[24.]]]]], dtype=float32)
    
      A patch that is smaller than the image size moves horizontal and vertical
      producing a value per position. The `patch_step` argument may be used to
      control the position increments.
    
      >>> F.patch_correlation(x, x, patch=(H-1, W-1)).d
      array([[[[[12., 12.],
                [12., 12.]]]]], dtype=float32)
      >>> F.patch_correlation(x, x, patch=(H-1, W-1), patch_step=(2, 1)).d
      array([[[[[12., 12.]]]]], dtype=float32)
    
      Multiple correlations may be performed at each position between the patch
      from `x1` and patches from `x2` at relative offsets striding the maximum
      vertical and horizontal distance given by the `shift` values at increments
      of `shift_step`. The shifted correlation values can be obtained for the
      from the second and third output dimension for the vertical and horizontal
      shifts.
    
      >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1)).shape
      (1, 1, 3, 1, 4)
      >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1)).d
      array([[[[[0., 6., 6., 6.]],
               [[6., 6., 6., 6.]],
               [[6., 6., 6., 0.]]]]], dtype=float32)
      >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1), shift_step=(1, 2)).d
      array([[[[[0., 6., 6., 6.]],
               [[6., 6., 6., 0.]]]]], dtype=float32)
    
      Padding with zero values may be applied individually to the top, bottom,
      left and right side of the input image.
    
      >>> F.patch_correlation(x, x, patch=(H, W), padding=(0, 1, W, W)).d
      array([[[[[ 0.,  6., 12., 18., 24., 18., 12.,  6.,  0.],
                [ 0.,  4.,  8., 12., 16., 12.,  8.,  4.,  0.]]]]], dtype=float32)
    
      This function may be used to implement the FlowNetC correlation layer.
    
      >>> N, C, H, W = (1, 256, 44, 60)
      >>> x1, x2 = nn.Variable((N, C, H, W)), nn.Variable((N, C, H, W))
      >>> F.patch_correlation(x1, x2, shift=20, shift_step=2).shape
      (1, 21, 21, 44, 60)
    
      References:
    
          * `Fischer et al., FlowNet: Learning Optical Flow with Convolutional
            Networks. <https://arxiv.org/abs/1504.06852>`_
    
    Args:
        x1(~nnabla.Variable): Input N-D array with shape :math:`(N, H, W, C)`.
        x2(~nnabla.Variable): Input N-D array with shape :math:`(N, H, W, C)`.
        patch(:obj:`tuple` of :obj:`int`): A tuple with height and width of the correlation patch. A single integer expands to identical height and width.
            [default= `(1, 1)` ]
        shift(:obj:`tuple` of :obj:`int`): A tuple of maximum vertical and horizontal displacement of patches from `x2` that are correlated with a single patch from `x1`. A single integer expands to identical vertical and horizontal displacement.
            [default= `(0, 0)` ]
        patch_step(:obj:`tuple` of :obj:`int`): A tuple of vertical and horizontal increments for advancing the position of the correlation patch within the input image shape. A single integer expands to identical vertical and horizontal increments.
            [default= `(1, 1)` ]
        shift_step(:obj:`tuple` of :obj:`int`): A tuple of vertical and horizontal increments for advancing the relative offset position within the shift range. A single integer expands to identical vertical and horizontal increments.
            [default= `(1, 1)` ]
        padding(:obj:`tuple` of :obj:`int`): A tuple of top, bottom, left and right padding extent. A tuple of two values yields identical top/bottom and left/right padding from the first and second tuple value. A single integer expands to identical padding extent for all sides.
            [default= `(0, 0, 0, 0)` ]

    Returns:
        ~nnabla.Variable: 
            N-D array with shape :math:`(N, C_y, C_x, H_o, W_o)`.
            
            A spatial size of the output is calculated as
            
            .. math::
            
              H_o = \frac{H + (top\_pad + bottom\_pad) - patch_v }{patch\_step_v} + 1.
            
            A channel size of the output is calculated as
            
            .. math::
            
              C_y = \frac{2 \times shift_v}{shift\_step_v} + 1.
            
            :math:`W_o` and :math:`C_x` are the same calculation with differenct components.
            
    """
    return F.PatchCorrelation(ctx, patch, shift, patch_step, shift_step, padding)(x1, x2, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def unique(ctx, x, flatten=True, axis=None, sorted=True, with_index=False, with_inverse=False, with_counts=False, n_outputs=-1, outputs=None):
    r"""
    Find the unique elements of input array.
    
    Args:
        x(~nnabla.Variable): A N-D array.
        flatten(bool): If True, unique values of the flatten input array are returned.
            [default= `True` ]
        axis(int): If flatten is True and axis is specified, unique slices along axis are returned.
            [default= `None` ]
        sorted(bool): If True, unique values/slices sorted in ascending order are returned.
            [default= `True` ]
        with_index(bool): If True, `indices` is returned.
            [default= `False` ]
        with_inverse(bool): If True, `inverse_indices` is returned.
            [default= `False` ]
        with_counts(bool): If True, `counts` is returned.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: A N-D array.
        ~nnabla.Variable: A 1-D array. It's indices of `y` elements first occurance in `x`. If `flatten` is True, it contains indices to flattend input array `x`. If `flatten` is False and `axis` is specified, it contains indices to input array `x` on `axis`.
        ~nnabla.Variable: A 1-D array. It's indices of `x` elements corresponding to `y`. If `flatten` is True, it contains indices to output array `y`. If `flatten` is False and `axis` is specified, it contains indices to output array `y` on `axis`.
        ~nnabla.Variable: A 1-D array. It's the count of each element of 'y' in input array `x`.
    """
    return F.Unique(ctx, flatten, axis, sorted, with_index, with_inverse, with_counts)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def eye_like(ctx, x, k=0, n_outputs=-1, outputs=None):
    r"""
    Generate a 2-D array with ones on the diagonal, specified by `k`, and zeros elsewhere.
    The shape of the output array is the same as the input array.
    
    Args:
        x(~nnabla.Variable): A 2-D array.
        k(int): Index of the diagonal. The default value 0 means the main diagonal, a positive value means an upper diagonal, and a negative value means a lower diagonal.
            [default= `0` ]

    Returns:
        ~nnabla.Variable: A 2-D array.
    """
    return F.EyeLike(ctx, k)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def mod2(ctx, x0, x1, fmod=False, n_outputs=-1, outputs=None):
    r"""
    Element-wise remainder function.
    The behavior of this opeator is determined by x0's dtype and the `fmod` argument:
    
    .. math::
        y_i = \left\{
        \begin{array}{ll}
          \text{numpy.fmod}(x_{0,i}, x_{1,i})
            & (x_{0} \text{has a floating-point type or fmod is True})\\
          \text{numpy.mod}(x_{0,i}, x_{1,i})
            & (\text{otherwise})
        \end{array} \right..
    
    Args:
        x0(~nnabla.Variable): A N-D array.
        x1(~nnabla.Variable): A N-D array.
        fmod(bool): If True, this operator behaves like numpy.fmod, otherwise it behaves like numpy.mod.
            [default= `False` ]

    Returns:
        ~nnabla.Variable: A N-D array.
    """
    return F.Mod2(ctx, fmod)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def bit_shift(ctx, x, shift, direction='LEFT', n_outputs=-1, outputs=None):
    r"""
    Element-wise bit shift function.
    
    Args:
        x(~nnabla.Variable): A N-D array. Its dtype must be one of the unsigned integer types.
        shift(~nnabla.Variable): A N-D array. Its dtype is casted to x's dtype at run-time.
        direction(string): Direction of bit shift.
            [default= `'LEFT'` ]

    Returns:
        ~nnabla.Variable: A N-D array.
    """
    return F.BitShift(ctx, direction)(x, shift, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)



@function_api
def einsum(ctx, *x, **kw):
    r"""Evaluates the Einstein summation convention on the inputs.
    See the numpy.einsum documentation for more information about equation.
    
    Args:
        *x(~nnabla.Variable): List of N-D array.
            [variadic]
        equation(string): A string that folllows Einstein summation convention.
            [default= `` ]

    Returns:
        ~nnabla.Variable: A N-D array.
    """
    assert len(x) >= 1, "einsum must take more than 1 inputs"
    n_outputs = kw.pop('n_outputs', -1)
    outputs = kw.pop('outputs', None)
    equation = kw.pop('equation', )
    return F.Einsum(ctx, equation)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)