Source code for nnabla.function_bases

# Copyright 2018,2019,2020,2021 Sony Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
# *WARNING*
# THIS FILE IS AUTO-GENERATED BY CODE GENERATOR.
# PLEASE DO NOT EDIT THIS FILE BY HAND!
# If you want to modify this file, edit following files.
# - python/src/nnabla/function_bases.py.tmpl
# - build-tools/code_generator/generate.py

from __future__ import absolute_import

from .context import get_current_context
from . import function as F
from .auto_forward import get_auto_forward

# Templates for function_api source building.
FUNCTION_API_HEADER = "def {name}{signature}:"
FUNCTION_API_BODY = '''ctx = get_current_context()
return _func_(ctx, {shortsignature})'''

def function_api(func):
    """
    Decorator for making function called with current context.
    Some tricky things are done here so that signature and docstring are available.
    """
    from .utils.signature_utils import SignatureEx

    name = func.__name__
    doc = func.__doc__
    if doc is None:
        doc = "No docstring."

    # Parsing argspecs
    sig = SignatureEx.from_callable(func)
    sig = sig.drop_arg('ctx', raise_if_not_found=True)
    # Creating signature
    # e.g. (x, weights, biases=None, n_outputs=None)
    signature = '(' + sig.format_argument_signature() + ')' + sig.format_return_annotation()
    # Creating signature without params and defaults
    # e.g. x, weights, biases, n_outputs
    shortsignature = sig.format_caller_argument_signature()

    # Create code by string
    src = (FUNCTION_API_HEADER + '\n' + '\n'.join(map(lambda x: '    ' +
                                                      x, FUNCTION_API_BODY.splitlines()))).format(**locals())

    # Evaluate source code from string
    code = compile(src, "<{name}>".format(**locals()), 'single')
    execdict = dict(_func_=func, get_current_context=get_current_context)
    exec(code, execdict)

    # Get created function.
    newfunc = execdict[name]
    # DOC newfunc.__doc__ = FUNCTION_API_DOC.format(**locals())
    doc += '''

    Note:
        All nnabla functions in :obj:`nnabla.functions` are decorated with the :obj:`nnabla.function_bases.function_api` decorator,
        which queries the current context and passes it into the first argument of the
        original function. The original function always takes a context as the first argument.

    '''
    newfunc.__doc__ = doc
    newfunc.__source__ = src
    newfunc.__function_api_base__ = func
    newfunc.__module__ = __name__
    return newfunc





[docs]@function_api def affine(ctx, x, weight, bias=None, base_axis=1, n_outputs=-1, outputs=None): r""" Affine layer, also called as the fully connected layer. It calculates: .. math:: {\mathbf y} = {\mathbf A} {\mathbf x} + {\mathbf b}. where :math:`{\mathbf x}` is the input and :math:`{\mathbf y}` is the output. Args: x(~nnabla.Variable): Input N-D array with shape (:math:`M_0 \times ... \times M_{B-1} \times D_B \times ... \times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix. weight(~nnabla.Variable): Weight matrix with shape (:math:`(D_B \times ... \times D_N) \times L_{0} \times \ldots \times L_{I}`) [parameter] bias(~nnabla.Variable): Bias vector (:math:`L_{0} \times \ldots \times L_{I}`) [optional][parameter] base_axis(int): Base axis of Affine operation. Dimensions up to base_axis is treated as sample dimension. [default= `1` ] Returns: ~nnabla.Variable: :math:`(B + 1)`-D array. (:math:`M_0 \times ... \times M_{B-1} \times L_{0} \times \ldots \times L_{I}`) """ inputs = [x, weight] if bias is not None: inputs += [bias] return F.Affine(ctx, base_axis)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def rnn(ctx, x, h, weight_l0, weight=None, bias=None, num_layers=1, nonlinearity='tanh', dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None): r""" RNN function implements Elman RNN with nonlinearity to input sequence. RNN function is defined as following: .. math:: {\mathbf h_t} = {\mathbf \tanh}( {\mathbf w_{ih}} *{\mathbf x_t} + {\mathbf b_{ih}} + {\mathbf w_{hh}}* {\mathbf h_{(t-1)}} + {\mathbf b_{hh}}). We use the following notations to describe the inputs and outputs below. :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size. References: * `Jeffrey Elman, Finding Structure in Time. <https://crl.ucsd.edu/~elman/Papers/fsit.pdf>`_ Args: x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`. h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`. weight_l0(~nnabla.Variable): Input N-D array with shape :math:`(D, H, I + H)`. [parameter] weight(~nnabla.Variable): Input N-D array with shape :math:`(L-1, D, H, D * H + H)`. [optional][parameter] bias(~nnabla.Variable): Input N-D array with shape :math:`(L, D, H)`. [optional][parameter] num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1. [default= `1` ] nonlinearity(string): Type of nonlinearity applied to input sequcne. Must be either tanh or relu. Default is tanh. [default= `'tanh'` ] dropout(float): Dropout ratio applied to parameters. Default is 0.0. [default= `0.0` ] bidirectional(bool): If True, bidirectional computation will be performed in each layer. Default is False. [default= `False` ] training(bool): Backpropagation will be performed only when it is true. Default is True. [default= `True` ] Returns: ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)` ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)` """ if dropout is None: dropout = 0.0 inputs = [x, h, weight_l0] if weight is not None: inputs += [weight] if bias is not None: inputs += [bias] return F.RNN(ctx, num_layers, nonlinearity, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def lstm(ctx, x, h, c, weight_l0, weight=None, bias=None, num_layers=1, dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None): r""" N-Step LSTM layer. .. math:: {\mathbf f_t} &=& {\mathbf \sigma}( {\mathbf W_f} *{\mathbf x_t} + {\mathbf U_f}* {\mathbf h_{(t-1)}} + {\mathbf b_f})\\ {\mathbf i_t} &=& {\mathbf \sigma}( {\mathbf W_i} *{\mathbf x_t} + {\mathbf U_i}* {\mathbf h_{(t-1)}} + {\mathbf b_i})\\ {\mathbf o_t} &=& {\mathbf \sigma}( {\mathbf W_o} *{\mathbf x_t} + {\mathbf U_o}* {\mathbf h_{(t-1)}} + {\mathbf b_o})\\ {\mathbf c_t} &=& {\mathbf f_t}\odot {\mathbf c_{(t-1)}} + {\mathbf i_t}\odot {\mathbf \tanh}({\mathbf W_c}*{\mathbf x_t} + {\mathbf U_c} *{\mathbf h_{(t-1)}} + {\mathbf b_c})\\ {\mathbf h_t} &=& {\mathbf o_t} \odot {\mathbf \tanh}({\mathbf c_t}). We use the following notations to describe the inputs and outputs below. :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size. References: * `S. Hochreiter and J. Schmidhuber, Long Short-Term Memory. <https://www.bioinf.jku.at/publications/older/2604.pdf>`_ Args: x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`. h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`. c(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`. weight_l0(~nnabla.Variable): weight parameters for the first layer. Shape is :math:`(D, 4, H, I + H)`. [parameter] weight(~nnabla.Variable): weight parameters for the second layer and above. Shape is :math:`(L-1, D, 4, H, D * H + H)`. [optional][parameter] bias(~nnabla.Variable): Bias vector (:math:`L`). Shape is :math:`(L, D, 4, H)`. [optional][parameter] num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1. [default= `1` ] dropout(float): Dropout ratio applied to parameters. Default is 0.0. [default= `0.0` ] bidirectional(bool): If True, bidirecitonal computation will be performed in each layer. Default is False. [default= `False` ] training(bool): Backpropagation will be performed only when it is True. Default is True. [default= `True` ] Returns: ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)`. Its memory layout can be reshaped as :math:`(T, B, D, H)`. ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)` ~nnabla.Variable: Output :math:`c_n` with shape :math:`(L, D, B, H)` """ if dropout is None: dropout = 0.0 inputs = [x, h, c, weight_l0] if weight is not None: inputs += [weight] if bias is not None: inputs += [bias] return F.LSTM(ctx, num_layers, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def gru(ctx, x, h, weight_l0, weight=None, bias=None, num_layers=1, dropout=None, bidirectional=False, training=True, n_outputs=-1, outputs=None): r""" N-Step GRU layer. .. math:: {\mathbf r_t} &=& {\mathbf \sigma}( {\mathbf W_r} *{\mathbf x_t} + {\mathbf U_r}* {\mathbf h_{(t-1)}} + {\mathbf b_r})\\ {\mathbf z_t} &=& {\mathbf \sigma}( {\mathbf W_z} *{\mathbf x_t} + {\mathbf U_z}* {\mathbf h_{(t-1)}} + {\mathbf b_z})\\ {\mathbf n_t} &=& {\mathbf \tanh}( {\mathbf W_n}{\mathbf x_t}+ {\mathbf b_{in}}+ {\mathbf r_n}\odot( {\mathbf U_n}{\mathbf h_{t-1}}+ {\mathbf b_{hn}})) \\ {\mathbf h_t} &=& (1- {\mathbf z_t})\odot {\mathbf n_t} + {\mathbf z_t}\odot {\mathbf h_{t-1}}. We use the following notations to describe the inputs and outputs below. :math:`T`: sequcne length, :math:`B`: batch size, :math:`I`: input size, :math:`L`: number of layers, :math:`D`: number of directions, can be either 1 or 2, :math:`H`: hidden size. References: * `K. cho et al., Learning Phrases Representations using RNN Encoder-Decoder for Statistical Machine Translation. <https://www.aclweb.org/anthology/D14-1179>`_ Args: x(~nnabla.Variable): Input N-D array with shape :math:`(T, B, I)`. h(~nnabla.Variable): Input N-D array with shape :math:`(L, D, B, H)`. weight_l0(~nnabla.Variable): weight parameters for the first layer. Shape is :math:`(D, 3, H, I + H)`. [parameter] weight(~nnabla.Variable): weight parameters for the second layer and above. Shape is :math:`(L-1, D, 3, H, D * H + H)`. [optional][parameter] bias(~nnabla.Variable): Bias vector (:math:`L`). Shape is :math:`(L, D, 4, H)`. [optional][parameter] num_layers(int): Number of layers in the network. If set to 1, only the weights for the first layer will be invoked. Default is 1. [default= `1` ] dropout(float): Dropout ratio applied to parameters. Default is 0.0. [default= `0.0` ] bidirectional(bool): If True, bidirecitonal computation will be performed in each layer. Default is False. [default= `False` ] training(bool): Backpropagation will be performed only when it is True. Default is True. [default= `True` ] Returns: ~nnabla.Variable: Output :math:`y` with shape :math:`(T, B, D * H)`. Its memory layout can be reshaped as :math:`(T, B, D, H)`. ~nnabla.Variable: Output :math:`h_n` with shape :math:`(L, D, B, H)` """ if dropout is None: dropout = 0.0 inputs = [x, h, weight_l0] if weight is not None: inputs += [weight] if bias is not None: inputs += [bias] return F.GRU(ctx, num_layers, dropout, bidirectional, training)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def convolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, n_outputs=-1, outputs=None): r""" N-D Convolution with bias. See references for dilated convolution (a.k.a. atrous convolution). References: * `Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. <https://arxiv.org/abs/1606.00915>`_ * `Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. <https://arxiv.org/abs/1511.07122>`_ Note: Convolution is a computationally intensive operation that should preferrably be run with the `cudnn` backend. NNabla then uses CuDNN library functions to determine and cache the fastest algorithm for the given set of convolution parameters, which results in additional memory consumption which may pose a problem for GPUs with insufficient memory size. In that case, the `NNABLA_CUDNN_WORKSPACE_LIMIT` environment variable can be used to restrict the choice of algorithms to those that fit the given workspace memory limit, expressed in bytes. In some cases it may also be desired to restrict the automatic search to algorithms that produce deterministic (reproducable) results. This can be requested by setting the the environment variable `NNABLA_CUDNN_DETERMINISTIC` to a non-zero value. Args: x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`). weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C' \times C \times K_1 \times ... \times K_N`). [parameter] bias(~nnabla.Variable): Bias vector (:math:`C'`). [optional][parameter] base_axis(int): base axis :math:`B`. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`). A spatial size of the output is calculated as .. math:: L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1, where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions. """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight] if bias is not None: inputs += [bias] return F.Convolution(ctx, base_axis, pad, stride, dilation, group, channel_last)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def fused_convolution(ctx, x, weight, bias=None, beta=None, gamma=None, mean=None, variance=None, z=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, decay_rate=0.9, eps=1e-05, batch_stat=True, nonlinearity='relu', nonlinearity_args=list(), pad_mode='constant', constant_value=0, n_outputs=-1, outputs=None): r""" Fused operation of Pad, Convolution, Batch Normalization, Add2 and Activation. This is an equivalent operation to the following, but may be more computationally efficient depending on the backend implementation (currently we don't provide an efficient implementation on any backend). .. code-block:: python h = F.pad(x, *pad_opts) h = F.convolution(h, weight, bias, pad=(0, ...), *conv_opts) h = F.batch_normalization(h, beta, gamma, mean, variance, *bn_opts) y = F.relu(h + z) You can optionally disable either of pad, batch normalization, residual addition and activation. Args: x(~nnabla.Variable): N-D array of input. weight(~nnabla.Variable): `weight` in :meth:`~nnabla.functions.convolution`. [parameter] bias(~nnabla.Variable): `bias` in :meth:`~nnabla.functions.convolution`. [optional][parameter] beta(~nnabla.Variable): `beta` in :meth:`~nnabla.functions.batch_normalization`. [optional][parameter] gamma(~nnabla.Variable): `gamma` in :meth:`~nnabla.functions.batch_normalization`. [optional][parameter] mean(~nnabla.Variable): `mean` in :meth:`~nnabla.functions.batch_normalization`. [optional] variance(~nnabla.Variable): `variance` in :meth:`~nnabla.functions.batch_normalization`. [optional] z(~nnabla.Variable): N-D array of a residual input. By specifying None, the activation function will follow immediately after BN operation. [optional] base_axis(int): `base_axis` in :meth:`~nnabla.functions.convolution`. Note that the batch normalization `axes` is determined by this and `channel_last` option. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): `pad_width` in :meth:`~nnabla.functions.pad`. If `len(pad) == (len(x.shape) - (base_axis+1))`, considered as `pad` in :meth:`~nnabla.functions.convolution`. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): `stride` in :meth:`~nnabla.functions.convolution`. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): `dilation` in :meth:`~nnabla.functions.convolution`. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): `group` in :meth:`~nnabla.functions.convolution`. [default= `1` ] channel_last(bool): `channel_last` in :meth:`~nnabla.functions.convolution`.group [default= `False` ] decay_rate(float): `decay_rate` in :meth:`~nnabla.functions.batch_normalization`. [default= `0.9` ] eps(float): `eps` in :meth:`~nnabla.functions.batch_normalization`. [default= `1e-05` ] batch_stat(bool): `batch_stat` in :meth:`~nnabla.functions.batch_normalization`. [default= `True` ] nonlinearity(string): Activation type as string. The following is a list of available activation types and optional parameters specified as a vector of float by `nonlinearity_args`. =============== =============================== Activation type Arguments (`nonlinearity_args`) =============== =============================== identity No argument relu No argument sigmoid No argument tanh No argument leaky_relu [alpha] (see LeakyReLU doc) elu [alpha] (see ELU doc) relu6 No argument =============== =============================== [default= `'relu'` ] nonlinearity_args(repeated float): Optional arguments of nonlinearity as a vector of float. See the description of the `nonlinearity` argument. [default= `list()` ] pad_mode(string): `mode` in :meth:`~nnabla.functions.pad`. [default= `'constant'` ] constant_value(float): `constant_value` in :meth:`~nnabla.functions.pad`. [default= `0` ] Returns: ~nnabla.Variable: N-D array """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight] if bias is not None: inputs += [bias] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] if mean is not None: inputs += [mean] if variance is not None: inputs += [variance] if z is not None: inputs += [z] return F.FusedConvolution(ctx, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def depthwise_convolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, multiplier=1, n_outputs=-1, outputs=None): r""" N-D Depthwise Convolution with bias. References: * `F. Chollet. Xception: Deep Learning with Depthwise Separable Convolutions. <https://arxiv.org/abs/1610.02357>`_ Args: x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`). weight(~nnabla.Variable): :math:`(1 + N)`-D array (:math:`C \times K_1 \times ... \times K_N`). [parameter] bias(~nnabla.Variable): Bias vector (:math:`C'`). [optional][parameter] base_axis(int): base axis :math:`B`. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] multiplier(int): Number of output feature maps per input feature map. [default= `1` ] Returns: ~nnabla.Variable: :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`). The output map size :math:`C'` is :math:`C` multiplied by :math:`m` .. math:: C' = m \times C, where :math:`m` is the multiplier. A spatial size of the output is calculated as .. math:: L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1, where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions. """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight] if bias is not None: inputs += [bias] return F.DepthwiseConvolution(ctx, base_axis, pad, stride, dilation, multiplier)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def deconvolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, output_padding=None, n_outputs=-1, outputs=None): r""" N-D deconvolution, also known as transposed convolution, with bias operates backward convolution (derivative of the output w.r.t. the input) plus channel-wise learned bias. The weights are specified in the same manner as :meth:`~nnabla.functions.convolution` , as if it was an ordinary convolution function. The forward operation of :meth:`~nnabla.functions.deconvolution` will then be operationally equivalent to the backward pass of :meth:`~nnabla.functions.convolution` . Therefore, the number of input channels (can be seen as output channels of forward convolution) is specified in the first dimension, and the number of the output channels divided by the number of groups is specified in the second dimension. For `stride > 1`, a parameter-wise identical deconvolution on the output of a convolution may not produce the same output shape as the input to the convolution if, due to striding, the convolution did not fully cover the input spatial dimension. The `output_padding` parameter can then be used to appropriately increase the calculated output shape. Note that this is used to find the output shape for the deconvolution operation, but not to add zero-padding to the output. Args: x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`). weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C \times C' \times K_1 \times ... \times K_N`). [parameter] bias(~nnabla.Variable): Bias vector (:math:`C'`). [optional][parameter] base_axis(int): base axis :math:`B`. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] output_padding(:obj:`tuple` of :obj:`int`): Additional size added to the output shape. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] Returns: ~nnabla.Variable: :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`). A spatial size of the output is calculated as .. math:: L'_i =s_i (L_i - 1) - 2 p_i + d_i (k_i - 1) + 1, where :math:`s_i` is the stride, :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, and :math:`k_i` is the kernel size for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions. """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) if output_padding is None: output_padding = (0,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight] if bias is not None: inputs += [bias] return F.Deconvolution(ctx, base_axis, pad, stride, dilation, group, channel_last, output_padding)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def depthwise_deconvolution(ctx, x, weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, divisor=1, n_outputs=-1, outputs=None): r""" Depthwise deconvolution computes the transposed depthwise convolution with bias for one-dimensional and two-dimensional input data. Args: x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`). weight(~nnabla.Variable): :math:`(1 + N)`-D array (:math:`C \times K_1 \times ... \times K_N`). [parameter] bias(~nnabla.Variable): Bias vector (:math:`C'`). [optional][parameter] base_axis(int): base axis :math:`B`. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] divisor(int): Number of input feature maps per output feature map. [default= `1` ] Returns: ~nnabla.Variable: :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`). The output map size :math:`C'` is :math:`C` multiplied by :math:`m` .. math:: C' = \frac{C}{d}, where :math:`d` is the divisor. A spatial size of the output is calculated as .. math:: L'_i =s_i (L_i - 1) - 2 p_i + d_i (k_i - 1) + 1, where :math:`s_i` is the stride, :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, and :math:`k_i` is the kernel size for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions. """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight] if bias is not None: inputs += [bias] return F.DepthwiseDeconvolution(ctx, base_axis, pad, stride, dilation, divisor)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def deformable_convolution(ctx, x, weight, offset, mask=None, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, deformable_group=1, channel_last=False, n_outputs=-1, outputs=None): r""" 2-D Deformable Convolution with bias. Another convolution with fixed output channels must be passed externally to calculate the offsets and mask. Mask should be normalized to :math:`[0,1]` interval. .. math:: \begin{eqnarray} y(p) = \sum_{k=1}^{K} w_k \cdot x(p + p_k + \Delta p_k) \cdot \Delta m_k, \end{eqnarray} where :math:`x` and :math:`y` are input and output, :math:`w_k` is the weight, :math:`p` is the pixel location of interest, :math:`p_k` is the fixed displacement e.g., :math:`p_k \in \{(-1, -1), (-1, 0), \ldots (1, 1)\}` for the 2D 3x3 receptive field, :math:`\Delta p_k` is the learnable displacement, and :math:`\Delta m_k` is the learnable scale normalized in :math:`[0, 1]` by a function like the sigmoid. Note that :math:`\Delta p_k` and :math:`\Delta m_k` are sample-dependent, location-dependent, and feature-independent. References: * `Dai et al., Deformable Convolutional Networks. <https://arxiv.org/abs/1703.06211>`_ * `Zhu et al., Deformable ConvNets v2: More Deformable, Better Results. <https://arxiv.org/abs/1811.11168>`_ Args: x(~nnabla.Variable): :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C \times L_1 \times ... \times L_N`). weight(~nnabla.Variable): :math:`(2 + N)`-D array (:math:`C' \times C \times K_1 \times ... \times K_N`). [parameter] offset(~nnabla.Variable): Offsets for deformable convolutions. Shape is fixed to :math:`(N, deformable{\_}group \times 2 \times Kh \times Kw, H, W)`. Offsets must be calculated externally through a separate convolution layer. mask(~nnabla.Variable): Normalized mask for deformable convolutions v2. Shape is fixed to :math:`(N, deformable{\_}group \times Kh \times Kw, H, W)`. Masks must be calculated externally together with the offsets through a separate convolution layer. [optional] bias(~nnabla.Variable): Bias vector (:math:`C'`). [optional][parameter] base_axis(int): base axis :math:`B`. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] deformable_group(int): Number of deformable groups of channels. [default= `1` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: :math:`(B + 1 + N)`-D array (:math:`M_1 \times ... \times M_B \times C' \times L'_1 \times ... \times L'_N`). A spatial size of the output is calculated as .. math:: L'_i = \frac{L_i + 2 p_i - d_i (k_i - 1) - 1}{s_i} + 1, where :math:`L_i` is the spatial size, :math:`p_i` is the padding, :math:`d_i` is the dilation, :math:`k_i` is the kernel size, and :math:`s_i` is the stride for :math:`i`-th spatial dimension. The same calculation can also be applied to the other spatial dimensions. """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight, offset] if mask is not None: inputs += [mask] if bias is not None: inputs += [bias] return F.DeformableConvolution(ctx, base_axis, pad, stride, dilation, group, deformable_group, channel_last)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def adaptive_separable_convolution(ctx, x, vertical_kernel, horizontal_kernel, n_outputs=-1, outputs=None): r""" 2-D Adaptive Separable Convolution for NCHW (the channel-first tensor). Sample and pixel dependent vertical and horizontal kernels are dynamically generated ones, which are used for approximating a feature-independent 2-D kernel in this function. Thus, the kernel used in this function is dependent on samples and pixels but independent on features. If the padding is needed, use the pad function to the input :math:`x` before this function. Adaptive separable convolution is formulated as .. math:: \tilde{I}(c, h, w) = \sum_{j, i} K_v(j, h, w) \times K_h(i, h, w) \times I(c, h + j, w + i), where :math:`I(c, h, w)` and :math:`\tilde{I}(c, h, w)` are the input and output images at :math:`c`-th channel, :math:`h`-th height, :math:`w`-th width. :math:`K_V(:, h, w)` and :math:`K_h(:, h, w)` are vertical and horizontal 1-D kernels at :math:`h`-th height and :math:`w`-th width. References: * `Simon Niklaus, Long Mai, Feng Liu, Video Frame Interpolation via Adaptive Separable Convolution, <https://arxiv.org/abs/1708.01692>`_ * `Mart Kartasev, Carlo Rapisarda, Dominik Fay, Implementing Adaptive Separable Convolution for Video Frame Interpolation, <https://arxiv.org/abs/1809.07759>`_ Args: x(~nnabla.Variable): :math:`4-D` array (:math:`B \times C \times H \times W`) vertical_kernel(~nnabla.Variable): :math:`4-D` array (:math:`B \times K_v \times H \times W`) horizontal_kernel(~nnabla.Variable): :math:`4-D` array (:math:`B \times K_h \times H \times W`) Returns: ~nnabla.Variable: :math:`4-D` array (:math:`B \times C \times H - K_v + 1 \times W - K_h + 1`) """ return F.AdaptiveSeparableConvolution(ctx)(x, vertical_kernel, horizontal_kernel, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def max_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None): r""" Max pooling. It pools the maximum values inside the scanning kernel: .. math:: y_{i_1, i_2} = \max_{k_1, k_2 \in K} (x_{i_1 + k_1, i_2 + k_2}) where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output. Args: x(~nnabla.Variable): Input variable. kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis. stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis. [default= `kernel` ] ignore_border(bool): If false, kernels covering borders are also considered for the output. [default= `True` ] pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension. [default= `(0,) * len(kernel)` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: Maximum values variable """ if stride is None: stride = kernel if pad is None: pad = (0,) * len(kernel) return F.MaxPooling(ctx, kernel, stride, ignore_border, pad, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def average_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, including_pad=True, n_outputs=-1, outputs=None): r""" Average pooling. It pools the averaged values inside the scanning kernel: .. math:: y_{i_1, i_2} = \frac{1}{K_1 K_2} \sum_{k1} \sum_{k2} x_{i_1 + k_1, i_2 + k_2} where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output. Args: x(~nnabla.Variable): Input variable. kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis. stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis. [default= `kernel` ] ignore_border(bool): If false, kernels covering borders are also considered for the output. [default= `True` ] pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension. [default= `(0,) * len(kernel)` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] including_pad(bool): If true, border padding values are considered for the output. [default= `True` ] Returns: ~nnabla.Variable: Average values variable """ if stride is None: stride = kernel if pad is None: pad = (0,) * len(kernel) return F.AveragePooling(ctx, kernel, stride, ignore_border, pad, channel_last, including_pad)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def global_average_pooling(ctx, x, n_outputs=-1, outputs=None): r""".. WARNING:: This function is experimental support, so please do not actively use it. Global average pooling. It pools an averaged value from the whole image Args: x(~nnabla.Variable): Input variable. Returns: ~nnabla.Variable: Average values variable """ return F.GlobalAveragePooling(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sum_pooling(ctx, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None): r""" Sum pooling. It pools the summed values inside the scanning kernel: .. math:: y_{i_1, i_2} = \sum_{k1} \sum_{k2} x_{i_1 + k_1, i_2 + k_2} where :math:`x_{i_1 + k_1, i_2 + k_2}` is the input and :math:`y_{i_1, i_2}` is the output. Args: x(~nnabla.Variable): Input variable. kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis. stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis. [default= `kernel` ] ignore_border(bool): If false, kernels covering borders are also considered for the output. [default= `True` ] pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension. [default= `(0,) * len(kernel)` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: Summed values variable """ if stride is None: stride = kernel if pad is None: pad = (0,) * len(kernel) return F.SumPooling(ctx, kernel, stride, ignore_border, pad, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def unpooling(ctx, x, kernel, channel_last=False, n_outputs=-1, outputs=None): r""" Inverse operation of pooling. It spreads the input values: .. math:: y_{k_1 i_1 + j_1, k_2 i_2 + j_2} = x_{i_1, i_2} where :math:`_{i_1, i_2}` is the input and :math:`y_{k_1 i_1 + j_1, k_2 i_2 + j_2}` is the output. Args: x(~nnabla.Variable): Input variable. kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis. channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: Spread values variable """ return F.Unpooling(ctx, kernel, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def embed(ctx, x0, w, n_outputs=-1, outputs=None): r""" Embed slices of a matrix/tensor with indexing array/tensor. Args: x0(~nnabla.Variable): Indices with shape :math:`(I_0, ..., I_N)` w(~nnabla.Variable): Weights with shape :math:`(W_0, ..., W_M)` [parameter] Returns: ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)` """ return F.Embed(ctx)(x0, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def roi_align(ctx, input, boxes, output_size, spatial_scale=(1.0, 1.0), sampling_ratio=None, channel_last=None, n_outputs=-1, outputs=None): r""" Map Regions of Interest (RoI) defined by bounding `boxes` to features of `output_size` height and width using bilinear interpolation with `sampling_ratio` points in the interpolation grid. >>> import numpy as np, nnabla as nn, nnabla.functions as F >>> nn.set_auto_forward(True) >>> input = F.pad(F.constant(1, (1, 1, 2, 2)) * 2, (1, 1, 1, 1), "constant", 1) >>> print(input.d) [[[[1. 1. 1. 1.] [1. 2. 2. 1.] [1. 2. 2. 1.] [1. 1. 1. 1.]]]] >>> boxes = nn.Variable.from_numpy_array([[0, 0, 0, 4, 4], [0, 1, 1, 3, 3]]) >>> output = F.roi_align(input, boxes, (2, 2)) >>> print(output.d[0]) [[[[1.25 1.25] [1.25 1.25]]] >>> print(output.d[1]) [[[2. 2. ] [2. 2. ]]]] The `spatial_scale` argument tuple may be used to appropriately scale the box coordinates, for example, to scale normalized box coordinate to the input height and width dimensions. >>> input = F.reshape(F.arange(1, 13), (1, 1, 3, 4)) >>> print(input.d) >>> boxes = nn.Variable.from_numpy_array([[0, 1/4, 1/3, 3/4, 2/30]]) >>> output = F.roi_align(input, boxes, (1, 2), spatial_scale=(3, 4)) >>> print(input.d) [[[[6. 7.]]]] References: * `He et al., Mask R-CNN. <https://arxiv.org/abs/1703.06870v3>`_ Args: input(~nnabla.Variable): N-D array with shape :math:`(N, H, W, C)` or :math:`(N, C, H, W)`. boxes(~nnabla.Variable): N-D array with shape :math:`(K, 5)` containing box coordinates in (b, x1, y1, x2, y2) format where b is the batch index. Note that an invalid (out-of-range) batch index will generate an error only when running on CPU; when using a GPU context the batch index values are clipped to the range of input samples. output_size(:obj:`tuple` of :obj:`int`): the height and width of the output feature maps. spatial_scale(repeated float): Scaling factor from box to input coordinates, as (x, y). [default= `(1.0, 1.0)` ] sampling_ratio(int): The number of sampling points used for interpolation. Computed as `ceil((y2 - y1) / output_size[0])` for height and likewise for width if `sampling_ratio <= 0`. [default= `-1` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: N-D array with shape :math:`(K, C, output\_size[0], output\_size[1])` or :math:`(K, output\_size[0], output\_size[1], C)`. """ if sampling_ratio is None: sampling_ratio = -1 if channel_last is None: channel_last = False return F.RoiAlign(ctx, output_size, spatial_scale, sampling_ratio, channel_last)(input, boxes, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sigmoid(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise sigmoid function. .. math:: f(x) = \frac{1}{1 + \exp(-x)}, Args: x(~nnabla.Variable): Input Returns: ~nnabla.Variable: Output """ return F.Sigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def swish(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise swish function, by Ramachandran et al. (2017). .. math:: y_i = \frac{x_i}{1 + \exp(-x_i)}, References: * `Prajit Ramachandran, Barret Zoph, and Quoc V. Le, Swish: a Self-Gated Activation Function, arXiv:1710.05941 [cs.NE] <https://arxiv.org/abs/1710.05941>`_ Args: x(~nnabla.Variable): Input Returns: ~nnabla.Variable: Output """ return F.Swish(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def tanh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic tangent (tanh) function. .. math:: y_i = \tanh (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Tanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def relu(ctx, x, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise Rectified Linear Unit (ReLU) function. .. math:: y_i = \max (0, x_i) Args: x(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ReLU(ctx, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def leaky_relu(ctx, x, alpha=0.1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise Leaky Rectified Linear Unit (ReLU) function. It is defined as: .. math:: y_i = \alpha * \min(0, x_i) + \max (0, x_i) Args: x(~nnabla.Variable): N-D array alpha(float): The slope value multiplied to negative numbers. :math:`\alpha` in the definition. [default= `0.1` ] inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LeakyReLU(ctx, alpha, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def softmax(ctx, x, axis=None, n_outputs=-1, outputs=None): r""" Softmax normalization. Calculates .. math:: y_i = \frac{\exp(x_i)}{\sum_j \exp(x_j)} along the dimension specified by `axis`, where :math:`x_i` is the input and :math:`y_i` is the output. Args: x(~nnabla.Variable): N-D array. Typically indicates a score. axis(int): Axis normalization is taken. [default= `len(x.shape) - 1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ if axis is None: axis = len(x.shape) - 1 return F.Softmax(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def log_softmax(ctx, x, axis=None, n_outputs=-1, outputs=None): r""" Fused operation of Softmax normalization followed by log, which is defined as .. math:: y_i = \log \frac{\exp(x_i)}{\sum_j \exp(x_j)}, where :math:`y_i` is the input and :math:`x_i` is the output at i-th channel. An advantage of this fusion is reducing the numerical instability due to the log application. The original definition can be rewritten as .. math:: y_i = x_i - \max_j(x_j) - \log\left(\sum_j \exp(x_j - \max_k(x_k))\right). It is more stable as a log is always applied to a value :math:`\ge e`, while a log can be evaluated for 0 in the non-fused operation. Also, backward gradient computation is more stable than the original one as it doesn't perform division by x due to a gradient of log. The definition is as following. .. math:: dx_i = dy_i - y_i * \sum_j dy_j where :math:`dx_i` and :math:`dy_i` denote gradients of loss wrt :math:`x_i` and :math:`y_i` respectively. Args: x(~nnabla.Variable): N-D array. Typically indicates a score. axis(int): Axis normalization is taken. [default= `len(x.shape) - 1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ if axis is None: axis = len(x.shape) - 1 return F.LogSoftmax(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def elu(ctx, x, alpha=1.0, n_outputs=-1, outputs=None): r""" Element-wise Exponential Linear Unit (ELU) function. .. math:: y_i= \left\{ \begin{array}{ll} x_i & (x > 0)\\ \alpha (\exp(x_i) - 1) & (x \leq 0) \end{array} \right.. References: * `Clevart et al., Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs). <http://arxiv.org/abs/1511.07289>`_ Args: x(~nnabla.Variable): N-D array alpha(float): Coefficient for negative outputs. :math:`\alpha` in definition [default= `1.0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ELU(ctx, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def selu(ctx, x, scale=1.05070098735548, alpha=1.673263242354377, n_outputs=-1, outputs=None): r""" Element-wise Scaled Exponential Linear Unit (SELU) function by Klambauer et al. (2017). .. math:: y_i= \lambda \left\{ \begin{array}{ll} x_i & (x > 0)\\ \alpha (\exp(x_i) - 1) & (x \leq 0) \end{array} \right.. The coefficients :math:`\lambda` and :math:`\alpha` default to the following values :math:`\lambda_{01}` and :math:`\alpha_{01}`, respectively, provided by Klambauer et al. (2017): .. math:: \begin{array}{lll} \lambda_{01} &=& \left( 1 - \operatorname{erfc}\left( \frac{1}{\sqrt{2}} \right) \sqrt{e} \right) \sqrt{2 \pi} \\ && \left( 2 \operatorname{erfc} \left( \sqrt{2} \right) e^2 + \pi \operatorname{erfc}\left( \frac{1}{\sqrt{2}} \right)^2 e \right. \\ && \left. - 2(2 + \pi) \operatorname{erfc} \left( \frac{1}{\sqrt{2}} \right) \sqrt{e} + \pi + 2 \right)^{-1/2} \\ &\approx& 1.0507 \\ \alpha_{01} &=& - \frac {\sqrt {\frac {2}{\pi}}} {\operatorname{erfc} \left( \frac{1}{\sqrt{2}} \right) \exp \left(\frac {1} {2} \right) - 1} \\ &\approx& 1.67326 \end{array} References: * `Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017). Self-Normalizing Neural Networks. In Advances in Neural Information Processing Systems (NIPS). <https://arxiv.org/abs/1706.02515>`_ Args: x(~nnabla.Variable): N-D array scale(float): The coefficient :math:`\lambda` in the definition. [default= `1.05070098735548` ] alpha(float): The coefficient :math:`\alpha` in the definition. [default= `1.673263242354377` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.SELU(ctx, scale, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def crelu(ctx, x, axis=1, n_outputs=-1, outputs=None): r""" Element-wise Concatenated Rectified Linear Unit (CReLU) function. This function calculates the ReLU of :math:`x` and :math:`-x` , then concatenates the results together at a specified axis, and returns the resulting array. References: * `Wenling Shang, Kihyuk Sohn, Diogo Almeida, Honglak Lee. Understanding and Improving Convolutional Neural Networks via Concatenated Rectified Linear Units. <https://arxiv.org/abs/1603.05201>`_ Args: x(~nnabla.Variable): N-D array. axis(int): The ReLU activations of positive inputs and negative inputs are concatenated at axis. [default= `1` ] Returns: ~nnabla.Variable: N-D array where axis dimension is doubled by concatenating. """ return F.CReLU(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def celu(ctx, x, alpha=1.0, axis=1, n_outputs=-1, outputs=None): r""" Element-wise Concatenated Exponential Linear Unit (CELU) function. Concatenates ELU outputs of positive and negative inputs together at specified axis. Args: x(~nnabla.Variable): N-D array. alpha(float): Coefficient for negative outputs. :math:`\alpha` in definition. [default= `1.0` ] axis(int): The ELU activations of positive inputs and negative inputs are concatenated at axis. [default= `1` ] Returns: ~nnabla.Variable: N-D array where axis dimension is doubled by concatenating. """ return F.CELU(ctx, alpha, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def prelu(ctx, x0, x1, base_axis=1, n_outputs=-1, outputs=None): r""" Element-wise Parametrized Rectified Linear Unit function. Calculates: .. math:: y_i = \max(0, x_i) + w_i \min(0, x_i) where negative slope :math:`w` is learned and can vary across channels (an axis specified with `base_axis`). Args: x0(~nnabla.Variable): (N-D array) Input x1(~nnabla.Variable): (N-D array) Weights base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] Returns: ~nnabla.Variable: N-D array. """ return F.PReLU(ctx, base_axis)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def gelu(ctx, x, n_outputs=-1, outputs=None): r""" Gaussian Error Unit (GELU) function. .. math:: GELU(x) = xP(X \leq x) = x \Phi (x) which is approximated by .. math:: GELU(x) = 0.5x (1 + \tanh ( \sqrt(2/\pi)(x + 0.044715x^3) )) References: * `Dan Hendrycks and Kevin Gimpel. Gaussian Error Linera Units (GELUs). <https://arxiv.org/abs/1606.08415>`_ Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.GELU(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def mish(ctx, x, n_outputs=-1, outputs=None): r""" Mish activation function. .. math:: Mish(x) = x \tanh(\log(1+\exp(x_i))) References: * `Diganta Misra. Mish A Self Regularized Non-Monotonic Neural Activation Function. <https://arxiv.org/abs/1908.08681>`_ Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Mish(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def relu6(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise ReLU6 function. Capping ReLU activation to 6 is often observed to learn sparse features earlier. .. math:: ReLU6(x) = \min(\max(0,x,),6) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ReLU6(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def hard_sigmoid(ctx, x, n_outputs=-1, outputs=None): r""" Segment-wise linear approximation of sigmoid. Preferable when speed of computation is more important than precision. Returns :math:`0` if :math:`x < -2.5`. Returns :math:`1` if :math:`x> 2.5`. Returns :math:`0.2x + 0.5` if :math:`-2.5 <= x <= 2.5`. Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.HardSigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def hard_tanh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise HardTanh function. Computationally cheaper than Tanh function. Returns :math:`1` if :math:`x > 1`. Returns :math:`-1` if :math:`x < -1`. Returns :math:`x` otherwise. Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.HardTanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def log_sigmoid(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise LogSigmoid function. .. math:: LogSigmoid(x) = \log(1/(1+\exp(-x_i))) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LogSigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def softplus(ctx, x, beta=1.0, n_outputs=-1, outputs=None): r""" Element-wise SoftPlus function. Unlike Sigmoid and Tanh that have upper and lower bound, SoftPlus is only lower-bounded by 0. .. math:: SoftPlus(x) = \frac{1}{\beta} * \log(1+\exp(\beta * x_i)) Args: x(~nnabla.Variable): N-D array beta(float): the `beta` value for SoftPlus formulation [default= `1.0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.SoftPlus(ctx, beta)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def softsign(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise SoftSign. Can be used in place of Tanh function. While Tanh converges exponentially, SoftSign converges polynomially. .. math:: SoftSign(x) = x/(1+|x|) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.SoftSign(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def tanh_shrink(ctx, x, n_outputs=-1, outputs=None): r""" Element-wies TanhShrink function. .. math:: TanhShrink(x) = x - \tanh(x) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.TanhShrink(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sinc(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise Sinc function. Unlike other popular activation functions, it has rises and falls. returns :math:`1` if :math:`x = 0`. returns :math:`\sin(x)/x` otherwise. Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Sinc(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def fused_batch_normalization(ctx, x, beta, gamma, mean, variance, z=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, nonlinearity='relu', n_outputs=-1, outputs=None): r""" Batch normalization fused with add2 (adding a residual input) and activation. This is an equivalent operation to the following, but is more computationally efficient: .. code-block:: python h = F.batch_normalization(x, beta, gamma, mean, variance, *opts) y = F.relu(h + z) Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. gamma(~nnabla.Variable): N-D array of gamma which is learned. mean(~nnabla.Variable): N-D array of running mean (modified during forward execution). variance(~nnabla.Variable): N-D array of running variance (modified during forward execution). z(~nnabla.Variable): N-D array of a residual input. By specifying None, the activation function will follow immediately after BN operation. [optional] axes(repeated int64): Axes mean and variance are taken. [default= `(1,)` ] decay_rate(float): Decay rate of running mean and variance. [default= `0.9` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] batch_stat(bool): Use mini-batch statistics rather than running ones. [default= `True` ] nonlinearity(string): Activation chosen from ('relu'). [default= `'relu'` ] Returns: ~nnabla.Variable: N-D array """ inputs = [x, beta, gamma, mean, variance] if z is not None: inputs += [z] return F.FusedBatchNormalization(ctx, axes, decay_rate, eps, batch_stat, nonlinearity)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def batch_normalization(ctx, x, beta=None, gamma=None, mean=None, variance=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, no_scale=False, no_bias=False, n_outputs=-1, outputs=None): r""" Batch normalization. .. math:: \begin{eqnarray} \mu &=& \frac{1}{M} \sum x_i \\ \sigma^2 &=& \frac{1}{M} \left(\sum x_i - \mu\right)^2 \\ \hat{x}_i &=& \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ y_i &=& \hat{x}_i \gamma + \beta. \end{eqnarray} At testing time, the mean and variance values used are those that were computed during training by moving average. References: * `Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. <https://arxiv.org/abs/1502.03167>`_ Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. [optional] gamma(~nnabla.Variable): N-D array of gamma which is learned. [optional] mean(~nnabla.Variable): N-D array of running mean (modified during forward execution). [optional] variance(~nnabla.Variable): N-D array of running variance (modified during forward execution). [optional] axes(repeated int64): Axes mean and variance are taken. [default= `(1,)` ] decay_rate(float): Decay rate of running mean and variance. [default= `0.9` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] batch_stat(bool): Use mini-batch statistics rather than running ones. [default= `True` ] no_scale(bool): If `True`, the scale term is omitted. [default= `False` ] no_bias(bool): If `True`, the bias term is omitted. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ inputs = [x] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] if mean is not None: inputs += [mean] if variance is not None: inputs += [variance] return F.BatchNormalization(ctx, axes, decay_rate, eps, batch_stat, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def group_normalization(ctx, x, beta=None, gamma=None, num_groups=None, channel_axis=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None): r""" Applies Group Normalization over an input tensor, which is defined as: .. math:: \begin{eqnarray} \mu^g &=& \frac{1}{H} \sum_{i=1}^{H} x_i^g \\ \sigma^g &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^g - \mu^g\right)^2 + \epsilon} \\ y &=& \frac{x - \mu^g}{\sigma^g} \gamma + \beta \end{eqnarray} where :math:`x` and :math:`y` are input and output variable, :math:`\mu^g` and :math:`\sigma^g` are the mean and std of each group which contains `num_channels / num_groups` channels, and :math:`\gamma` and :math:`\beta` are adaptive gains and biases. The input channels, specified by :attr:`channel_axis`, are separated into :attr:`num_groups` groups, and the mean and std are calculated over the each group. For example, if the input shape is [B, C, H, W] (= channel_axis=1, batch_axis=0), an input variable is once reshaped to [B, num_groups, C / num_groups, H, W] and standardize by its mean and std whose shapes are [B, num_groups, 1, 1, 1]. Finally, an output variable is reshaped again to the original input shape (= [B, C, H, W] in the case above). References: * `Yuxin Wu, Kaiming He, Group Normalization. <https://arxiv.org/abs/1803.08494>`_ Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. [optional] gamma(~nnabla.Variable): N-D array of gamma which is learned. [optional] num_groups(int): A number of groups. The channel dim of 'x' must be integer multiple of `num_groups`. [default= `1` ] channel_axis(int): Channel axis. [default= `1` ] batch_axis(repeated int64): Axes mean and variance are taken. [default= `(0,)` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] no_scale(bool): If `True`, the scale term is omitted. [default= `False` ] no_bias(bool): If `True`, the bias term is omitted. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if num_groups is None: num_groups = 1 if channel_axis is None: channel_axis = 1 inputs = [x] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] return F.GroupNormalization(ctx, num_groups, channel_axis, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def instance_normalization(ctx, x, beta=None, gamma=None, channel_axis=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None): r""" Applies Instance Normalization over an input tensor, which is defined as .. math:: \begin{eqnarray} \mu^i &=& \frac{1}{H} \sum_{i=1}^{H} x_i^i \\ \sigma^i &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^i - \mu^i\right)^2 + \epsilon} \\ y &=& \frac{x - \mu^i}{\sigma^i} \gamma + \beta \end{eqnarray} where :math:`x` and :math:`y` are input and output variable, :math:`\mu^i` and :math:`\sigma^i` are the mean and std of each instance which is separately calculated for each batch and channel, and :math:`\gamma` and :math:`\beta` are adaptive gains and biases. If the input shape is [B, C, H, W] (= channel_axis=1, batch_axis=0), the shape of calculated mean and std are [B, C, 1, 1] References: * `Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky, Instance Normalization: The Missing Ingredient for Fast Stylization. <https://arxiv.org/abs/1607.08022>`_ Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. [optional] gamma(~nnabla.Variable): N-D array of gamma which is learned. [optional] channel_axis(int): Channel axis. [default= `1` ] batch_axis(repeated int64): Axes mean and variance are taken. [default= `(0,)` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] no_scale(bool): If `True`, the scale term is omitted. [default= `False` ] no_bias(bool): If `True`, the bias term is omitted. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if channel_axis is None: channel_axis = 1 inputs = [x] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] return F.InstanceNormalization(ctx, channel_axis, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def layer_normalization(ctx, x, beta=None, gamma=None, batch_axis=(0,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None): r""" Applies Layer Normalization over an input tensor, which is defined as .. math:: \begin{eqnarray} \mu^l &=& \frac{1}{H} \sum_{i=1}^{H} x_i^l \\ \sigma^l &=& \sqrt{\frac{1}{H} \sum_{i=1}^{H} \left(x_i^l - \mu^l\right)^2 + \epsilon} \\ y &=& \frac{x - \mu^l}{\sigma^l} \gamma + \beta \end{eqnarray} where :math:`x` and :math:`y` are input and output variable, :math:`\mu^l` and :math:`\sigma^l` are the mean and std of each layer which is separately calculated for each batch, and :math:`\beta` and :math:`\gamma` are adaptive biases and gains. If the input shape is [B, C, H, W] (= batch_axis=0), the shape of calculated mean and std are [B, 1, 1, 1] References: * `Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton, Layer Normalization. <https://arxiv.org/abs/1607.06450>`_ Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. [optional] gamma(~nnabla.Variable): N-D array of gamma which is learned. [optional] batch_axis(repeated int64): Axes mean and variance are taken. [default= `(0,)` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] no_scale(bool): If `True`, the scale term is omitted. [default= `False` ] no_bias(bool): If `True`, the bias term is omitted. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ inputs = [x] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] return F.LayerNormalization(ctx, batch_axis, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def norm_normalization(ctx, x, p=None, axes=None, eps=1e-12, n_outputs=-1, outputs=None): r""" Norm normalization. .. math:: y = \frac{x_i}{\|x\|_p} Args: x(~nnabla.Variable): N-D array. p(float): Order of the norm. [default= `2` ] axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced. [default= `range(x.ndim)` ] eps(float): Epsilon for the normalization. This `eps` is added before taking the p-th root in the norm computation. [default= `1e-12` ] Returns: ~nnabla.Variable: N-D array """ if p is None: p = 2 if axes is None: axes = range(x.ndim) return F.NormNormalization(ctx, p, axes, eps)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def sync_batch_normalization(ctx, x, beta, gamma, mean, variance, comm, group=None, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, n_outputs=-1, outputs=None): r""" Synchronized Batch Normalization: For some tasks (e.g., semantic segmentation), batch size will be too small and BatchNormalization layer might not work well. SyncBatchNorlization layer solves these problems by synchronizing batch stats (mean and var) between multiple processes. .. math:: \begin{eqnarray} \mu &=& \frac{1}{M} \sum x_i \\ \sigma^2 &=& \frac{1}{M} \left(\sum x_i - \mu\right)^2 \\ \hat{x}_i &=& \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ y_i &=& \hat{x}_i \gamma + \beta. \end{eqnarray} References: * Implementing Synchronized Multi-GPU Batch Normalization https://hangzhang.org/PyTorch-Encoding/notes/syncbn.html Note: Since v1.32.0, the gradients of beta and gamma are not synchronized after backward computation (they had been synchronized previously). Users are responsible for synchronizing the gradients of beta and gamma by performing all-reduce, which is naturally done by performing all-reduce for gradients of all the parameters as we do usually in data parallel distributed training. Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. gamma(~nnabla.Variable): N-D array of gamma which is learned. mean(~nnabla.Variable): N-D array of running mean (modified during forward execution). variance(~nnabla.Variable): N-D array of running variance (modified during forward execution). comm(Communicator): The communicator group(string): The name of the communicator group [default= `world` ] axes(repeated int64): Axes mean and variance are taken. [default= `(1,)` ] decay_rate(float): Decay rate of running mean and variance. [default= `0.9` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] batch_stat(bool): Use mini-batch statistics rather than running ones. [default= `True` ] Returns: ~nnabla.Variable: N-D array """ if group is None: group = world return F.SyncBatchNormalization(ctx, comm, group, axes, decay_rate, eps, batch_stat)(x, beta, gamma, mean, variance, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def tensor_normalization(ctx, x, beta=None, gamma=None, axes=(1,), eps=1e-05, no_scale=False, no_bias=False, n_outputs=-1, outputs=None): r""" General tensor normalization. Input variable `x` is normalized by mean and std calculated by `x` itself. Mean and variance are calculated along `axes`. For example, if the input shape is (B, C, H, W) and axes is [0, 1], the shape of calculated mean and std are (B, C, 1 ,1). Args: x(~nnabla.Variable): N-D array of input. beta(~nnabla.Variable): N-D array of beta which is learned. [optional] gamma(~nnabla.Variable): N-D array of gamma which is learned. [optional] axes(repeated int64): Axes mean and variance are taken. [default= `(1,)` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] no_scale(bool): If `True`, the scale term is omitted. [default= `False` ] no_bias(bool): If `True`, the bias term is omitted. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ inputs = [x] if beta is not None: inputs += [beta] if gamma is not None: inputs += [gamma] return F.TensorNormalization(ctx, axes, eps, no_scale, no_bias)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def weight_normalization(ctx, w, g, dim=0, eps=1e-12, n_outputs=-1, outputs=None): r""" Weight normalization. .. math:: \mathbf{w}_{WN} = g \dfrac{\mathbf{w}}{\|\mathbf{w}\|} where :math:`\mathbf{w}` is the input weights to be normalized. and :math:`g` is learnable multiplication factors each of which is applied to each data at `dim`. References: * `Tim Salimans, Diederik P. Kingma, Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks. <https://arxiv.org/abs/1602.07868>`_ Args: w(~nnabla.Variable): N-D array of learnable weights. g(~nnabla.Variable): 1-D array of learnable scales. dim(int): Output dimension. For the other dimensions, the norms are computed. [default= `0` ] eps(float): Epsilon for the normalization. This `eps` is added before taking the sqrt in the norm computation. [default= `1e-12` ] Returns: ~nnabla.Variable: N-D array """ return F.WeightNormalization(ctx, dim, eps)(w, g, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def weight_standardization(ctx, w, channel_axis=None, eps=1e-05, n_outputs=-1, outputs=None): r""" Applies Weight Standardization over an input weight, which is defined as .. math:: \begin{eqnarray} \mu_{W_i} &=& \frac{1}{I} \sum_{j=1}^{I} W_{ij} \\ \sigma_{W_i} &=& \sqrt{\frac{1}{I} \sum_{i=1}^{I} \left(W_{ij} - \mu_{W_{i}}\right)^2 + \epsilon} \\ \hat{W_{ij}} &=& \frac{W_{ij} - \mu_{W_i}}{\sigma_{W_i}} \\ y &=& \hat{W} \ast x \end{eqnarray} References: * `Siyuan Qiao, Huiyu Wang, Chenxi Liu, Wei Shen, Alan Yuille, Weight Standardization <https://arxiv.org/pdf/1903.10520v1.pdf>`_ Args: w(~nnabla.Variable): N-D array of learnable weights. channel_axis(int): An axis for output channel. Default value is 0 which assumes the weights of convolution. [default= `0` ] eps(float): Tiny value to avoid zero division by std. [default= `1e-05` ] Returns: ~nnabla.Variable: N-D array """ if channel_axis is None: channel_axis = 0 return F.WeightStandardization(ctx, channel_axis, eps)(w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def spectral_norm(ctx, w, u, dim=0, itr=1, eps=1e-12, test=False, output_u=False, n_outputs=-1, outputs=None): r""" Spectral Normalization. .. math:: W_{sn} = \frac{W}{\sigma(W)} where :math:`W` is the input matrix, and the :math:`\sigma(W)` is the spectral norm of :math:`W`. The spectral norm is approximately computed by the power iteration. References: Takeru Miyato, Toshiki Kataoka, Masanori Koyama, Yuichi Yoshida, "Spectral Normalization for Generative Adversarial Networks", International Conference on Learning Representations. 2018. Args: w(~nnabla.Variable): N-D array of learnable weights. This is normally network parameter. u(~nnabla.Variable): 1-D array of singular vector. When `test == False`, the data region of `u` will be updated during forward calculation. dim(int): Output dimension. Default is 0. If the dimension is not 0, then the specified dimension becomes the most-left dimension by transposing. [default= `0` ] itr(int): Number of power iterations. Default is 1. [default= `1` ] eps(float): Epsilon for the normalization. This `eps` is added before taking the sqrt in the norm computation. [default= `1e-12` ] test(bool): When in `True`, `u` will not be updated. Default is `False`. [default= `False` ] output_u(bool): Output original `u` or not. `u` is updated when `test == False` but you can get original `u` as output with this option. Default is `False`. [default= `False` ] Returns: ~nnabla.Variable: Spectrally normalized :math:`W_{sn}` with the same shape as :math:`W`. """ return F.SpectralNorm(ctx, dim, itr, eps, test, output_u)(w, u, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def mean_subtraction(ctx, x, rmean, t, base_axis=1, update_running_mean=True, n_outputs=-1, outputs=None): r""" It subtracts the mean of the elements of the input array, and normalizes it to :math:`0`. Preprocessing arrays with this function has the effect of improving accuracy in various tasks such as image classification. At training time, this function is defined as .. math:: \begin{eqnarray} \mu &=& \frac{1}{M} \sum x_i \\ y_i &=& x_i - \mu \end{eqnarray} At testing time, the mean values used are those that were computed during training by moving average. Note: The backward performs an approximated differentiation that takes into account only the latest mini-batch. Args: x(~nnabla.Variable): N-D array of input. rmean(~nnabla.Variable): N-D array of running mean (modified during forward execution). t(~nnabla.Variable): Scalar of num of iteration of running mean (modified during forward execution). base_axis(int): Base axis of Mean Subtraction operation. Dimensions up to base_axis is treated as sample dimension. [default= `1` ] update_running_mean(bool): Update running mean during forward execution. [default= `True` ] Returns: ~nnabla.Variable: N-D array. """ return F.MeanSubtraction(ctx, base_axis, update_running_mean)(x, rmean, t, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def clip_grad_by_value(ctx, x, min, max, n_outputs=-1, outputs=None): r"""In forward pass, the function behaves as the identity. In backward pass, .. math:: g_x = \begin{cases} max & (g_y > max) \\ g_y & (otherwise) \\ min & (g_y < min) \end{cases}. A typical case for use is to prevent the gradient explosion through a whole computational graph. For example, if you want to clip gradient values for each feature map, .. code-block:: python x = nn.Variable([16, 3, 32, 32]) min = F.broadcast(nn.Variable.from_numpy_array(np.asarray([-1.0]).reshape((1, 1, 1, 1))), (16, 3, 32, 32)) max = F.broadcast(nn.Variable.from_numpy_array(np.asarray([1.0]).reshape((1, 1, 1, 1))), (16, 3, 32, 32)) c = F.clip_grad_by_value(x, min=min, max=max) h = PF.convolution(c, 64, (3, 3), pad=(1, 1)) Args: x(~nnabla.Variable): N-D array of input. min(~nnabla.Variable): N-D array of minimum input value by which the gradients of the `y` are clipped. Note that the shape of `min` must be the same as `x`'s and the backward to `min` is not performed. max(~nnabla.Variable): N-D array of maximum input value by which the gradients of the `y` are clipped. Note that the shape of `max` must be the same as `x`'s and the backward to `max` is not performed. Returns: ~nnabla.Variable: N-D array. """ return F.ClipGradByValue(ctx)(x, min, max, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def clip_grad_by_norm(ctx, x, clip_norm=None, axes=None, n_outputs=-1, outputs=None): r""" In the forward pass, the function behaves like the identity. In the backward pass, .. math:: g_x = N \times \frac{g_y}{\|g_y\|_2}. where :math:`g_x` is the gradient w.r.t the input, :math:`g_y` is the gradient w.r.t. the output, and :math:`N` is `clip_norm` where the norm of :math:`g_y` becomes. this is the case that `axes` is not set. When `axes` is set, the norm is computed over `axes`. A typical case for use is to prevent the gradient explosion through a whole computational graph. For example, if you want to normalize gradient values over feature axis, .. code-block:: python x = nn.Variable([16, 3, 32, 32]) c = F.clip_grad_by_norm(x, axes=(1, )) h = PF.convolution(c, 64, (3, 3), pad=(1, 1)) Args: x(~nnabla.Variable): N-D array of input. clip_norm(float): Clip to the norm of input to `clip_norm` in the backward pass. [default= `1.0` ] axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar. This is used in the forward pass. [default= `range(x.ndim)` ] Returns: ~nnabla.Variable: N-D array. """ if clip_norm is None: clip_norm = 1.0 if axes is None: axes = range(x.ndim) return F.ClipGradByNorm(ctx, clip_norm, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def sum(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None): r""" Reduces a matrix along a specified axis with the sum function. Args: x(~nnabla.Variable): N-D array. axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = range(x.ndim) return F.Sum(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def cumsum(ctx, x, axis=None, exclusive=False, reverse=False, n_outputs=-1, outputs=None): r""" Cumulative sum along a given axis. Args: x(~nnabla.Variable): N-D array. axis(int): Axis along which cumulative sum is to be calculated [default= `0` ] exclusive(bool): If True, perform exclusive cumsum [default= `False` ] reverse(bool): If True, perform cumsum in reverse direction [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axis is None: axis = 0 return F.CumSum(ctx, axis, exclusive, reverse)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def mean(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None): r""" Reduces a matrix along a specified axis with the mean function. Args: x(~nnabla.Variable): N-D array. axes(repeated int64): Axes to be reduced. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = range(x.ndim) return F.Mean(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def max(ctx, x, axes=None, keep_dims=False, with_index=False, only_index=False, n_outputs=-1, outputs=None): r""" Reduction along axis or axes with max operation. Args: x(~nnabla.Variable): N-D array. axes(repeated int64): Axes to be reduced. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] with_index(bool): Return values and indices. [default= `False` ] only_index(bool): Return only indices. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = range(x.ndim) return F.Max(ctx, axes, keep_dims, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def min(ctx, x, axes=None, keep_dims=False, with_index=False, only_index=False, n_outputs=-1, outputs=None): r""" Reduction along axis or axes with min operation. Args: x(~nnabla.Variable): N-D array. axes(repeated int64): Axes to be reduced. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] with_index(bool): Return values and indices. [default= `False` ] only_index(bool): Return only indices. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = range(x.ndim) return F.Min(ctx, axes, keep_dims, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def norm(ctx, x, p=None, axes=None, keep_dims=False, n_outputs=-1, outputs=None): r""" Reduction along axis or axes with norm operation. .. math:: y = \|x\|_p = \left( \sum_i |x_i|^p \right)^{\frac{1}{p}} Args: x(~nnabla.Variable): N-D array. p(float): Order of the norm. [default= `2` ] axes(repeated int64): Axes to be reduced. If empty list is given, all dimensions are reduced to scalar. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if p is None: p = 2 if axes is None: axes = range(x.ndim) return F.Norm(ctx, p, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def prod(ctx, x, axes=None, keep_dims=False, n_outputs=-1, outputs=None): r""" Reduction along axis or axes with product operation. Note: Backward computation is not accurate in a zero value input. Args: x(~nnabla.Variable): N-D array. axes(repeated int64): Axes to be reduced. [default= `range(x.ndim)` ] keep_dims(bool): Flag whether the reduced axis is kept. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = range(x.ndim) return F.Prod(ctx, axes, keep_dims)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def cumprod(ctx, x, axis=None, exclusive=False, reverse=False, n_outputs=-1, outputs=None): r""" Cumulative product along a given axis. Note: Backward computation is not accurate in a zero value input. Args: x(~nnabla.Variable): N-D array. axis(int): Axis along which cumulative product is to be calculated [default= `0` ] exclusive(bool): If True, perform exclusive cumprod [default= `False` ] reverse(bool): If True, perform cumprod in reverse direction [default= `False` ] Returns: ~nnabla.Variable: N-D array """ if axis is None: axis = 0 return F.CumProd(ctx, axis, exclusive, reverse)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def reduce_sum(ctx, x, n_outputs=-1, outputs=None): r""" Reduction along an axis with sum operation. Note: This is deprecated. Use ``sum`` instead. Args: x(~nnabla.Variable): N-D array. Returns: ~nnabla.Variable: N-D array """ return F.ReduceSum(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def reduce_mean(ctx, x, n_outputs=-1, outputs=None): r""" Reduction by mean along an axis. Note: This is deprecated. Use ``mean`` instead. Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array """ return F.ReduceMean(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def add2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise addition. .. math:: y_i = x^{(0)}_i + x^{(1)}_i Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.Add2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def add_n(ctx, *x, **kw): r""" Element-wise addition. .. math:: y_i = x^{(0)}_i + . . . + x^{(n-1)}_i Args: *x(~nnabla.Variable): N-D arrays [variadic] Returns: ~nnabla.Variable: N-D array """ assert len(x) >= 1, "add_n must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) return F.AddN(ctx)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def bc_add2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Note: This shouldn't be called by users. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.BcAdd2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sub2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise subtraction. .. math:: y_i = x^{(0)}_i - x^{(1)}_i Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.Sub2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def mul2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise multiplication. .. math:: y_i = x^{(0)}_i x^{(1)}_i Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.Mul2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def mul_n(ctx, *x, **kw): r""" Element-wise multiplication. .. math:: y_i = x^{(0)}_i . . . x^{(n-1)}_i Args: *x(~nnabla.Variable): N-D arrays [variadic] Returns: ~nnabla.Variable: N-D array """ assert len(x) >= 1, "mul_n must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) return F.MulN(ctx)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def div2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise division. .. math:: y_i = \frac{x^{(0)}_i} {x^{(1)}_i} Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.Div2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def pow2(ctx, x0, x1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise power function. .. math:: y_i = {(x^{(0)}_i)} ^ {x^{(1)}_i} Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array """ return F.Pow2(ctx, inplace)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def add_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise scalar addition. .. math:: y_i = x_i + v Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.AddScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def mul_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise scalar multiplication. .. math:: y_i = v x_i Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.MulScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def pow_scalar(ctx, x, val=1, inplace=False, n_outputs=-1, outputs=None): r""" Element-wise scalar power function. .. math:: y_i = (x_i) ^ v Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.PowScalar(ctx, val, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def r_sub_scalar(ctx, x, val=1, n_outputs=-1, outputs=None): r""" Element-wise scalar subtraction. .. math:: y_i = v - x_i Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.RSubScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def r_div_scalar(ctx, x, val=1, n_outputs=-1, outputs=None): r""" Element-wise scalar division. .. math:: y_i = \frac{v}{x_i} Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.RDivScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def r_pow_scalar(ctx, x, val=1, n_outputs=-1, outputs=None): r""" Element-wise scalar power function. .. math:: y_i = v ^ {x_i} Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.RPowScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sign(ctx, x, alpha=1.0, n_outputs=-1, outputs=None): r""" Element-wise sign function. In the forward pass, it is defined as .. math:: f(x) = \begin{cases} 1 & (x > 0) \\ -1 & (x < 0) \\ \alpha & (x = 0) \end{cases}. In the backward pass, it is defined as .. math:: \frac{\partial f(x)}{\partial x} = 1, or in other words, it behaves as the identity function for the gradient in the backward pass. Args: x(~nnabla.Variable): Input alpha(float): Value in case of :math:`x = 0`. [default= `1.0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Sign(ctx, alpha)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def minimum2(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element-wise minimum. .. math:: y_i = \min(x^{(0)}_i, x^{(1)}_i) Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array of min value """ return F.Minimum2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def maximum2(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element-wise maximum. .. math:: y_i = \max(x^{(0)}_i, x^{(1)}_i) Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array of max value """ return F.Maximum2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def minimum_scalar(ctx, x, val=1.0, n_outputs=-1, outputs=None): r""" Element-wise scalar minimum. .. math:: y_i = \min(x_i, v) Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1.0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.MinimumScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def maximum_scalar(ctx, x, val=1.0, n_outputs=-1, outputs=None): r""" Element-wise scalar maximum. .. math:: y_i = \max (x_i, v) Args: x(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1.0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.MaximumScalar(ctx, val)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_and(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Elementwise logical AND. .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i \neq 0 \;\&\; x^{(1)}_i \neq 0) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.LogicalAnd(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_or(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Elementwise logical OR. .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 0 & (x^{(0)}_i = 0 \;\&\; x^{(1)}_i = 0) \\ 1 & otherwise \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.LogicalOr(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_xor(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Elementwise logical XOR. .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i = 0 \;\&\; x^{(1)}_i = 0) \\ 1 & (x^{(0)}_i \neq 0 \;\&\; x^{(1)}_i \neq 0) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.LogicalXor(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def equal(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise 'equal' .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i = x^{(1)}_i) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.Equal(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def not_equal(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise 'not equal' .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 0 & (x^{(0)}_i = x^{(1)}_i) \\ 1 & otherwise \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.NotEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def greater_equal(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise comparison. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i \geq x^{(1)}_i) \\ 0 & (x^{(0)}_i < x^{(1)}_i) \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.GreaterEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def greater(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise comparison. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i > x^{(1)}_i) \\ 0 & (x^{(0)}_i \leq x^{(1)}_i) \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.Greater(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def less_equal(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise comparison. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i \leq x^{(1)}_i) \\ 0 & (x^{(0)}_i > x^{(1)}_i) \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.LessEqual(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def less(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element wise comparison. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,x^{(1)}_i) = \begin{cases} 1 & (x^{(0)}_i < x^{(1)}_i) \\ 0 & (x^{(0)}_i \geq x^{(1)}_i) \end{cases}. Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: No Description """ return F.Less(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def searchsorted(ctx, sorted_sequence, values, right=None, n_outputs=-1, outputs=None): r""" Finds indices in the innermost dimension of a sorted sequance where values must be inserted in order to maintain value Args: sorted_sequence(~nnabla.Variable): N-D array of sorted sequence where search is to be performed. Note that this must be a sorted array values(~nnabla.Variable): N-D array of Search values right(bool): :If True, given a value v, the function returns index i such that sorted_sequence[i-1] <= v < sorted_sequence[i] (index of closest upper bound of v). By default, this is false so the function returns index i such that a[i-1] < v <= a[i] (index of closest lower bound of v) [default= `False` ] Returns: ~nnabla.Variable: N-D array containing the required indices """ if right is None: right = False return F.SearchSorted(ctx, right)(sorted_sequence, values, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_and_scalar(ctx, x0, val, n_outputs=-1, outputs=None): r""" Elementwise logical AND with scalar. .. math:: f(x_i,v) = \begin{cases} 1 & (x_i \neq 0 \;\&\; v \neq 0) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable val(bool): No Description Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LogicalAndScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_or_scalar(ctx, x0, val, n_outputs=-1, outputs=None): r""" Elementwise logical OR with scalar. .. math:: f(x_i,v) = \begin{cases} 0 & (x_i = 0 \;\&\; v = 0) \\ 1 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable val(bool): No Description Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LogicalOrScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_xor_scalar(ctx, x0, val, n_outputs=-1, outputs=None): r""" Elementwise logical XOR with scalar. .. math:: f(x_i,v) = \begin{cases} 1 & (x_i = 0 \;\&\; v = 0) \\ 1 & (x_i \neq 0 \;\&\; v \neq 0) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable val(bool): No Description Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LogicalXorScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise 'equal' with a scalar .. math:: f(x_i,v) = \begin{cases} 1 & (x_i = v) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.EqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def not_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise 'not equal' with a scalar .. math:: f(x_i,v) = \begin{cases} 0 & (x_i = v) \\ 1 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.NotEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def greater_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise comparison with a scalar. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,v) = \begin{cases} 1 & (x^{(0)}_i \geq v \\ 0 & (x^{(0)}_i < v \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.GreaterEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def greater_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise comparison with a scalar. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,v) = \begin{cases} 1 & (x^{(0)}_i > v \\ 0 & (x^{(0)}_i \leq v \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.GreaterScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def less_equal_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise comparison with a scalar. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,v) = \begin{cases} 1 & (x^{(0)}_i \leq v) \\ 0 & (x^{(0)}_i > v) \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LessEqualScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def less_scalar(ctx, x0, val=1, n_outputs=-1, outputs=None): r""" Element wise comparison with a scalar. The :math:`i^{th}` element of the output is: .. math:: f(x^{(0)}_i,v) = \begin{cases} 1 & (x^{(0)}_i < v) \\ 0 & (x^{(0)}_i \geq v) \end{cases}. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LessScalar(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def logical_not(ctx, x0, n_outputs=-1, outputs=None): r""" Element-wise logical NOT operation .. math:: f(x_i) = \begin{cases} 1 & (x_i = 0) \\ 0 & otherwise \end{cases}. Args: x0(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.LogicalNot(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def isnan(ctx, x0, n_outputs=-1, outputs=None): r""" Test element-wise for NaN and return a ``0/1`` array. Args: x0(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.IsNaN(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def isinf(ctx, x0, n_outputs=-1, outputs=None): r""" Test element-wise for ``inf/-inf`` and return a ``0/1`` array. Args: x0(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.IsInf(ctx)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def reset_nan(ctx, x0, val=0, n_outputs=-1, outputs=None): r""" Replace NaNs with a scalar value specified by ``val``. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ResetNaN(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def reset_inf(ctx, x0, val=0, n_outputs=-1, outputs=None): r""" Replace ``-inf/inf`` with a scalar value specified by ``val``. Args: x0(~nnabla.Variable): Input variable val(float): Value of the scalar [default= `0` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ResetInf(ctx, val)(x0, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def where(ctx, condition, x_true, x_false, n_outputs=-1, outputs=None): r""" Return elements, either from ``x_true`` or ``x_false``, depending on ``condition``. If rank of ``condition`` is higher than those of ``x_true`` and ``x_false``, the first dimensions of ``x_true`` and ``x_false`` must match the dimensions of ``condition``. Example: .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F a = nn.Variable.from_numpy_array(np.random.rand(2, 3)) x = nn.Variable.from_numpy_array(np.random.rand(2, 3, 4)) y = nn.Variable.from_numpy_array(np.random.rand(2, 3, 4)) z = F.where(F.greater_scalar(a, 0.5), x, y) z.forward() # Numpy equivalent z_numpy = np.where(a.d > 0.5, x.d, y.d) assert np.allclose(z_numpy, z.d) Args: condition(~nnabla.Variable): N-d array. For all i, when ``condition[i] == true``, yield ``x_true[i]``, otherwise ``x_false[i]``. x_true(~nnabla.Variable): N-d array with higher or equal rank to ``condition``. x_false(~nnabla.Variable): N-d array with higher or equal rank to ``condition``. Returns: ~nnabla.Variable: N-D array with the same shape as condition """ return F.Where(ctx)(condition, x_true, x_false, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def constant(ctx, val=0, shape=[], n_outputs=-1, outputs=None): r""" Generate a constant-valued array. Args: val(float): Constant value. [default= `0` ] shape(:obj:`tuple` of :obj:`int`): Shape of the output array. [default= `[]` ] Returns: ~nnabla.Variable: N-D array where all values are the specified constant. """ return F.Constant(ctx, val, shape)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def arange(ctx, start, stop, step=1, n_outputs=-1, outputs=None): r""" Generate a range of values within the half-open interval ``[start, stop)`` (the interval including start but excluding stop) with `step` increments. Args: start(float): Start value. stop(float): End value. step(float): Step value. [default= `1` ] Returns: ~nnabla.Variable: 1-D array with the generated values. """ return F.Arange(ctx, start, stop, step)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def linspace(ctx, start, stop, num, n_outputs=-1, outputs=None): r""" Generate a one-dimensional vector/tensor of size `num` whose values are evenly spaced from `start` to `end`, inclusive. Args: start(float): Start value. stop(float): End value. num(int): Size of the constructed vector/tensor. Returns: ~nnabla.Variable: 1-D array with the generated values. """ return F.Linspace(ctx, start, stop, num)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def abs(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise absolute value function. .. math:: y_i = |x_i| Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: Element-wise absolute variable """ return F.Abs(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def exp(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise natural exponential function. .. math:: y_i = \exp(x_i). Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: Element-wise exp variable """ return F.Exp(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def log(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise natural logarithm function. .. math:: y_i = \ln(x_i). Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: Element-wise log variable """ return F.Log(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def identity(ctx, x, n_outputs=-1, outputs=None): r""" Identity function. .. math:: y = x Args: x(~nnabla.Variable): N-D array. Returns: ~nnabla.Variable: N-D array """ return F.Identity(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def batch_matmul(ctx, a, b, transpose_a=False, transpose_b=False, n_outputs=-1, outputs=None): r""" Batch matrix multiplication. Two of batchs of matrices are multiplied for each sample in a batch. A batch of matrices is composed as [..., P, Q] where the last two dimensions compose matrix dimensions, and the first dimensions up to the third last dimension are considered as batch samples. These batch dimensions are internally broadcasted when the size of a dimension is 1. Example: .. code-block:: python import nnabla as nn import nnabla.functions as F import numpy as np nn.set_auto_forward(True) # Same batch size a = nn.Variable.from_numpy_array(np.random.rand(2, 2, 3, 4)) b = nn.Variable.from_numpy_array(np.random.rand(2, 2, 4, 3)) c = F.batch_matmul(a, b) # Different batch size with the broadcast a = nn.Variable.from_numpy_array(np.random.rand(2, 1, 3, 4)) b = nn.Variable.from_numpy_array(np.random.rand(1, 3, 4, 3)) c = F.batch_matmul(a, b) .. WARNING:: Since the version 1.13, the behavior of the batch dimensions changed, it supported the internal broadcast when the size of a dimension is 1. Accordingly, this function does not supports different batch dimensions between two inputs even if the total sample size for each input is same. Args: a(~nnabla.Variable): N-D array with >= 2-dim. The last two dimensions will be treated as a matrix. b(~nnabla.Variable): N-D array with >= 2-dim. The last two dimensions will be treated as a matrix. The product of the size of 0-th dimension through the size of the third last dimension must be same as that of the input ``a``. transpose_a(bool): Transpose the last two axes of ``a`` in matrix multiplication. [default= `False` ] transpose_b(bool): Transpose the last two axes of ``b`` in matrix multiplication. [default= `False` ] Returns: ~nnabla.Variable: Output of sample-wise matrix multiplication in a batch. When ``a`` is of a shape of [N, P, Q], ``b`` is of a shape of [N, Q, R], and transpose options are all False, the output will be a shape of [N, P, R]. """ return F.BatchMatmul(ctx, transpose_a, transpose_b)(a, b, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def round(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise round function. In the forward pass, this function simply computes `round` to the nearest integer value. .. math:: y_i = round(x_i). In the backward pass, the simple Straight-Through Estimator (STE) is applied, .. math:: \frac{\partial y_i}{\partial x_i} = 1. Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Round(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def ceil(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise ceil function. In the forward pass, this function simply returns the smallest integer which is not less than the input. .. math:: y_i = ceil(x_i). In the backward pass, the simple Straight-Through Estimator (STE) is applied, .. math:: \frac{\partial y_i}{\partial x_i} = 1. Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Ceil(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def floor(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise floor function. In the forward pass, this function simply returns the largest integer which is not greater than the input. .. math:: y_i = floor(x_i). In the backward pass, the simple Straight-Through Estimator (STE) is applied, .. math:: \frac{\partial y_i}{\partial x_i} = 1. Args: x(~nnabla.Variable): Input variable Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Floor(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sin(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise sine (sin) function. .. math:: y_i = \sin (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Sin(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def cos(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise cosine (cos) function. .. math:: y_i = \cos (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Cos(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def tan(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise tangent (tan) function. .. math:: y_i = \tan (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Tan(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sinh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic sine (sinh) function. .. math:: y_i = \sinh (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Sinh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def cosh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic cosine (cosh) function. .. math:: y_i = \cosh (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Cosh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def asin(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise arcsine (asin) function. .. math:: y_i = \arcsin (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ASin(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def acos(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise arccosine (acos) function. .. math:: y_i = \arccos (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ACos(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def atan(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise arctangent (atan) function. .. math:: y_i = \arctan (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ATan(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def atan2(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element-wise arctangent (atan) function with 2 input variables. .. math:: y_i = \arctan2 (x_{i1}, x_{i2}) Args: x0(~nnabla.Variable): N-D array x1(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as input variables """ return F.ATan2(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def asinh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic arcsine (asinh) function. .. math:: y_i = \text{arcsinh} (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ASinh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def acosh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic arccosine (acosh) function. .. math:: y_i = \text{arccosh} (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ACosh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def atanh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise hyperbolic arctangent (atanh) function. .. math:: y_i = \text{arctanh} (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.ATanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def erf(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise Error function. .. math:: y_i = \text{erf} (x_i) Args: x(~nnabla.Variable): N-D array Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Erf(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def concatenate(ctx, *x, **kw): r""" Concatenate a variable number of input arrays along the specified axis. Args: *x(~nnabla.Variable): N-D arrays. [variadic] axis(int): Axis [default= `len(x[0].shape) - 1` ] Returns: ~nnabla.Variable: Concatenate variable """ assert len(x) >= 1, "concatenate must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) axis = kw.pop('axis', len(x[0].shape) - 1) return F.Concatenate(ctx, axis)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def split(ctx, x, axis=0, n_outputs=-1, outputs=None): r""" Split arrays at the specified axis. note: This function should not be called directly when constructing models. Instead, use :meth:`nnabla.functions.split` which automatically sets `n_output` from the input's shape and axis. Args: x(~nnabla.Variable): N-D array axis(int): Axis [default= `0` ] Returns: ~nnabla.Variable: list of N-D arrays [variadic][parameter] """ return F.Split(ctx, axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def stack(ctx, *x, **kw): r""" Joins two or more arrays on a new axis. Note: Unlike :meth:`nnabla.functions.concatenate` , which joins arrays on an existing axis, Stack joins arrays on a new axis. Args: *x(~nnabla.Variable): N-D arrays. The sizes of all the arrays to be stacked must be the same. [variadic] axis(int): The axis on which to concatenate arrays. Axis indices take on values 0, 1, 2, and so on from the left. For example, to stack four (3,28,28) inputs on the second axis, specify 1. In this case, the output size will be (3,4,28,28). [default= `0` ] Returns: ~nnabla.Variable: Output """ assert len(x) >= 1, "stack must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) axis = kw.pop('axis', 0) return F.Stack(ctx, axis)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def slice(ctx, x, start=None, stop=None, step=None, n_outputs=-1, outputs=None): r""" Slice arrays along specified axis. Args: x(~nnabla.Variable): N-D array start(repeated int64): Start indices for each axis [default= `(0,) * len(x.shape)` ] stop(repeated int64): Stop indices for each axis [default= `tuple(x.shape)` ] step(repeated int64): Step indices for each axis [default= `(1,) * len(x.shape)` ] Returns: ~nnabla.Variable: Sliced N-D array """ if start is None: start = (0,) * len(x.shape) if stop is None: stop = tuple(x.shape) if step is None: step = (1,) * len(x.shape) return F.Slice(ctx, start, stop, step)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def pad(ctx, x, pad_width, mode='constant', constant_value=0, n_outputs=-1, outputs=None): r""" Pad the input N-D array `x` over the number of dimensions given by half the length of the `pad_width` iterable, where every two values in `pad_width` determine the before and after pad size of an axis. The `pad_width` iterable must hold an even number of positive values which may cover all or fewer dimensions of the input variable `x`. If `pad_width` covers fewer dimensions then it applies to the innermost dimensions of `x`. .. code-block:: python x = nn.Variable.from_numpy_array(np.ones((2, 3, 4))) assert F.pad(x, (1, 1, 2, 2)).shape == (2, 5, 8) Padding is performed according to the requested `mode`: constant Pads with a value given by the keyword argument `constant_value`. .. code-block:: python x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int)) y = F.pad(x, (3, 3), 'constant', constant_value = -1) y.forward() assert np.all(y.d == np.array([-1, -1, -1, 1, 2, 3, 4, -1, -1, -1])) reflect Pads with the reflection of the vector mirrored on the first and last values of the vector along each axis. .. code-block:: python x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int)) y = F.pad(x, (3, 3), 'reflect') y.forward() assert np.all(y.d == np.array([4, 3, 2, 1, 2, 3, 4, 3, 2, 1])) repeat Pads with the edge value of the vector along each axis. .. code-block:: python x = nn.Variable.from_numpy_array(np.array([1, 2, 3, 4], dtype=np.int)) y = F.pad(x, (3, 3), 'repeat') y.forward() assert np.all(y.d == np.array([1, 1, 1, 1, 2, 3, 4, 4, 4, 4])) Args: x(~nnabla.Variable): N-D array pad_width(repeated int64): Iterable of *before* and *after* pad values. mode(string): Padding mode string. [default= `'constant'` ] constant_value(float): Fill value if mode is `constant`. [default= `0` ] Returns: ~nnabla.Variable: Padded N-D array with the same number of dimensions as the input. .. code-block:: python x = nn.Variable((3, 3, 4, 2)) # a shape like (B, C, H, W) # 1-D padding: last dim by 1 left and 2 on the right side assert F.pad(x, (1, 2)).shape == (3, 3, 4, 5) # 2-D padding: last dim by (1, 1) and 2nd to last by (2, 2) assert F.pad(x, (2, 2, 1, 1)).shape == (3, 3, 8, 4) # 3-D padding: dims C by (0, 1), H by (2, 1), and W by (3, 3) assert F.pad(x, (0, 1, 2, 1, 3, 3)).shape == (3, 4, 7, 8) """ return F.Pad(ctx, pad_width, mode, constant_value)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def transpose(ctx, x, axes, n_outputs=-1, outputs=None): r""" Transposes tensor dimensions. Args: x(~nnabla.Variable): N-D array axes(repeated int64): Source axis indices for each axis. Returns: ~nnabla.Variable: Transposed N-D array. """ return F.Transpose(ctx, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def broadcast(ctx, x, shape, n_outputs=-1, outputs=None): r""" Broadcasting ND-array to the specified shape. Args: x(~nnabla.Variable): N-D array shape(:obj:`tuple` of :obj:`int`): Shape broadcasted to. The size must be the same in axis where ``x``'s shape is not 1. Returns: ~nnabla.Variable: Broadcasted N-D array """ return F.Broadcast(ctx, shape)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def broadcast_to(ctx, x, y, axis=None, n_outputs=-1, outputs=None): r""".. WARNING:: This function is experimental support, so please do not actively use it. Broadcasting ND-array to the specified buffer. Args: x(~nnabla.Variable): N-D array y(~nnabla.Variable): N-D array axis(int): Target axis to start broadcasting. If this is not set, broadcast will try to fit y to x starting from the last dimension [default= `-1` ] Returns: ~nnabla.Variable: Broadcasted N-D array """ if axis is None: axis = -1 return F.BroadcastTo(ctx, axis)(x, y, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def tile(ctx, x, reps, n_outputs=-1, outputs=None): r""" Forward input `x` repeated the number of times given by `reps`. If `reps` is a sequence, the output has dimension of ``d = max(len(reps), x.ndim)`` and either `x` is promoted to be d-dimensional by prepending new axes or `reps` is promoted to x.ndim by prepending 1's. Args: x(~nnabla.Variable): N-D array reps(repeated int64): The number of repetitions of `x` along each axis. Returns: ~nnabla.Variable: N-D array """ return F.Tile(ctx, reps)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def one_hot(ctx, x, shape, n_outputs=-1, outputs=None): r""" This function creates one-hot vector based on input indices. The range [-shape[i], -1] of input indices are regarded as [0, shape[i]-1], and an input index outside [-shape[i], shape[i]-1] generates a vector filled with zero. Example: .. code-block:: python import nnabla as nn import nnabla.functions as F import numpy as np labels = nn.Variable.from_numpy_array(np.array([[9], [4], [5], [-9], [10]])) print(labels.shape) # (5, 1) num_class = 10 y_train = F.one_hot(labels, shape=(num_class, )) y_train.forward() print(y_train.shape) # (5, 10) print(y_train.d) # [[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.] # [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] # [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] # Can also be used for ndarray. labels = nn.Variable.from_numpy_array(np.array([[1, 7], [4, 7], [8, 6], [5, 0], [2, 6]])) print(labels.shape) # (5, 2) num_class_1, num_class_2 = 10, 8 y_train = F.one_hot(labels, shape=(num_class_1, num_class_2)) y_train.forward() print(y_train.shape) # (5, 10, 8) print(y_train.d) # [[[0. 0. 0. 0. 0. 0. 0. 0.] [[0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 1.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 1. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] ... [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0.] # [0. 0. 0. 0. 0. 0. 0. 0.]], [0. 0. 0. 0. 0. 0. 0. 0.]]] Args: x(~nnabla.Variable): N-D array representing label's indice. shape(:obj:`tuple` of :obj:`int`): Number of classes. When nd-labels are given, dimensions must match. See the example above. Returns: ~nnabla.Variable: N-D array one-hot vector/tensor. """ return F.OneHot(ctx, shape)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def flip(ctx, x, axes=None, n_outputs=-1, outputs=None): r""" Reverses the order of elements of the specified dimension of an array. Args: x(~nnabla.Variable): N-D array axes(repeated int64): The index of the dimension to reverse the order of the elements. Axis indices take on values 0, 1, 2, and so on from the left. For example, to flip a 32 (W) by 24 (H) 100 RGB image (100,3,24,32) vertically and horizontally, specify (2,3). [default= `[len(x.shape) - 1]` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = [len(x.shape) - 1] return F.Flip(ctx, axes)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def shift(ctx, x, shifts=None, border_mode='nearest', n_outputs=-1, outputs=None): r""" Shifts the array elements by the specified amount. Args: x(~nnabla.Variable): N-D array. shifts(repeated int64): The amount to shift elements. For example, to shift image data to the right by 2 pixels and up 3 pixels, specify (-3,2). [default= `(0,) * len(x.shape)` ] border_mode(string): Specify how to process the ends of arrays whose values will be undetermined as a result of shifting. nearest: The data at the ends of the original array is copied and used. reflect: Original data reflected at the ends of the original array is used. [default= `'nearest'` ] Returns: ~nnabla.Variable: N-D array. """ if shifts is None: shifts = (0,) * len(x.shape) return F.Shift(ctx, shifts, border_mode)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def sort(ctx, x, axis=-1, reverse=False, with_index=False, only_index=False, n_outputs=-1, outputs=None): r""" Sorts the elements of `x` along a given `axis` in ascending order by value. A negative `axis` counts from the last dimension of `x`, so the default of -1 sorts along the last dimension. If `reverse` is True, then the elements are sorted in descending order. If `with_index` is True, result is a tuple ``(sorted, indices)`` or only ``indices`` if `only_index` is True. Setting `only_index` to True implies that `with_index` is also True. Args: x(~nnabla.Variable): N-D array. axis(int): Axis along which to sort. [default= `-1` ] reverse(bool): Sort in descending order. [default= `False` ] with_index(bool): Return sorted values and index. [default= `False` ] only_index(bool): Return only the sort index. [default= `False` ] Returns: ~nnabla.Variable: list of N-D arrays [variadic][parameter] """ return F.Sort(ctx, axis, reverse, with_index, only_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def reshape(ctx, x, shape, inplace=True, n_outputs=-1, outputs=None): r""" Reshapes the input variable in-place. It does not create a copy of the variable. The output variable (y) has a new shape but points to the same data as the input variable (x). This means that if the data in the output variable (y) is modified, the data in the input variable (x) also gets modified since the reshape was done in-place. Note: This function has the same behavior as the :meth:`nnabla.Variable.reshape` method. Args: x(~nnabla.Variable): N-D array. shape(:obj:`tuple` of :obj:`int`): Dimensions for each axis. ``-1`` can be specified only in one shape dimension. The value is calculated from the size of the array and remaining dimensions. inplace(bool): The output array is shared with the input array if True. [default= `True` ] Returns: ~nnabla.Variable: Reshaped N-D array """ return F.Reshape(ctx, shape, inplace)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def shape(ctx, x, start=None, end=None, n_outputs=-1, outputs=None): r""" Get the shape of a tensor. Optional attributes start and end can be used to compute a slice of the input tensor's shape. If start axis is omitted, the slice starts from axis 0. Args: x(~nnabla.Variable): N-D array. start(int): If start axis is omitted, the slice starts from axis 0. [default= `0` ] end(int): The end axis, if specified, is exclusive (and the returned value will not include. [default= `0` ] Returns: ~nnabla.Variable: 1-D array """ if start is None: start = 0 if end is None: end = 0 return F.Shape(ctx, start, end)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def matrix_diag(ctx, x, n_outputs=-1, outputs=None): r""" Returns an array where the last two dimensions consist of the diagonal matrix. Args: x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N`). Returns: ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N \times M_N`). """ return F.MatrixDiag(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def matrix_diag_part(ctx, x, n_outputs=-1, outputs=None): r""" Returns an array in which the values of the last dimension consist of the diagonal elements of the last two dimensions of an input array. Args: x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N \times M_N`). Returns: ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N`). """ return F.MatrixDiagPart(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def trilu(ctx, x, k=0, upper=True, n_outputs=-1, outputs=None): r""" Returns an array in which the values of the last dimension consist of the triangular matrix of the last two dimensions of an input array. Args: x(~nnabla.Variable): N-D array with shape (:math:`M_0 \times \ldots \times M_N`). k(int): The number diagonals above or below the main diagonal to exclude or include. [default= `0` ] upper(bool): Determine whether upper or lower part of matrix is retained. [default= `True` ] Returns: ~nnabla.Variable: N-D array with shape (:math:`M_0 \times \ldots \times M_N`). """ return F.Trilu(ctx, k, upper)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def meshgrid(ctx, *x, **kw): r""" Return coordinate matrices from coordinate vectors. Given N 1-D arrays, this function returns N-D coordinate arrays for vectorized evaluations on an N-D grid. Example: >>> x,y = F.meshgrid(F.arange(0,3), F.arange(0,2)) >>> x.d array([[0., 1., 2.], [0., 1., 2.]], dtype=float32) >>> y.d array([[0., 0., 0.], [1., 1., 1.]], dtype=float32) >>> i,j = F.meshgrid(F.arange(0,3), F.arange(0,2), ij_indexing=True) >>> i.d array([[0., 0.], [1., 1.], [2., 2.]], dtype=float32) >>> j.d array([[0., 1.], [0., 1.], [0., 1.]], dtype=float32) Args: *x(~nnabla.Variable): N-D arrays. [variadic] ij_indexing(bool): If set true (Matrix ('ij') indexing ), the broadcasting dimensions are swapped. Default is False (Cartesian ('xy') indexing ). [default= `False` ] Returns: ~nnabla.Variable: N-D arrays [variadic] """ assert len(x) >= 1, "meshgrid must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) ij_indexing = kw.pop('ij_indexing', False) return F.Meshgrid(ctx, ij_indexing)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def batch_det(ctx, x, n_outputs=-1, outputs=None): r""" Batch-wise determinant function. .. math:: Y_b = \det(X_b), where :math:`X_b` and :math:`Y_b` are the :math:`b`-th input and output, respectively. Args: x(~nnabla.Variable): batched N-D array Returns: ~nnabla.Variable: batched N-D array of determinant """ return F.BatchDet(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def batch_inv(ctx, x, n_outputs=-1, outputs=None): r""" Returns an array of inverted matrix Args: x(~nnabla.Variable): batched N-D array Returns: ~nnabla.Variable: batched N-D array of inverted matrix """ return F.BatchInv(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def batch_logdet(ctx, x, n_outputs=-1, outputs=None): r""" Batch-wise log absolute determinant function. .. math:: Y_b = \log(|\det(X_b)|), where :math:`X_b` and :math:`Y_b` are the :math:`b`-th input and output, respectively. Args: x(~nnabla.Variable): batched N-D array Returns: ~nnabla.Variable: batched N-D array of log absolute determinant """ return F.BatchLogdet(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def batch_cholesky(ctx, x, upper=False, n_outputs=-1, outputs=None): r""" Batch-wise cholesky decomposition of symmetric positive definite matrix. The gradient of this function will be a symmetric matrix. This function does not check whether given matrix is symmetric positive define matrix or not. Args: x(~nnabla.Variable): batched N-D array upper(bool): If true, will return an upper triangular matrix. Otherwise will return a lower triangular matrix. [default= `False` ] Returns: ~nnabla.Variable: batched N-D array of lower/upper triangular matrix. """ return F.BatchCholesky(ctx, upper)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def assign(ctx, dst, src, n_outputs=-1, outputs=None): r""" Assign source array to destination array just like `tf.assign`. This is useful to synchronize or manually update parameters. .. code-block:: python dst = nn.Variable((2, 3, 4)) src = nn.Variable((2, 3, 4)) assign = F.assign(dst, src) assign.forward() assert np.allclose(dst.d, src.d) # dst and src have identical values. assert np.allclose(assign.d dst.d) # returned Variable is also identical to dst. Unlike TensorFlow, the returned Variable has a backward path to `dst`: .. math:: g_{dst} = g_{y} Args: dst(~nnabla.Variable): A destination N-D array src(~nnabla.Variable): A source N-D array Returns: ~nnabla.Variable: An assigned array """ return F.Assign(ctx)(dst, src, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def gather(ctx, x, Indices, axis=None, batch_dims=None, n_outputs=-1, outputs=None): r"""Gather from the input data according to the index. Given the input data :math:`X` of :math:`(D_{0}, \ldots, D_{N-1})` shape and the indices :math:`IDX` of :math:`(I_{0}, \ldots, I_{M-1})` shape, in case of `batch_dims = 0`, the gather outputs .. math:: && Y[d_{0}, \ldots, d_{axis - 1}, i_{0}, \ldots, i_{M-1}, d_{axis + 1}, \ldots, d_{N-1}] = \\ && X[d_{0}, \ldots, d_{axis - 1}, IDX[i_{0}, \ldots, i_{M-1}], d_{axis + 1}, \ldots, d_{N-1}]. Generally, the gather outputs .. math:: && Y[d_{0}, \ldots, d_{axis - 1}, i_{B}, \ldots, i_{M-1}, d_{axis + 1}, \ldots, d_{N-1}] = \\ && X[d_{0}, \ldots, d_{axis - 1}, IDX[i_{0}, \ldots, i_{B - 1}, i_{B} \ldots, i_{M-1}], d_{axis + 1}, \ldots d_{N-1}]. where :math:`B` = `batch_dims`. `x.shape[:batch_dims]` must be equal to `indices.shape[:batch_dims]`. Output shape is `x.shape[:axis] + indices.shape[batch_dims:] + x.shape[axis + 1]`. Args: x(~nnabla.Variable): Data from which to gather. Indices(~nnabla.Variable): Index with which to gather. axis(int): Axis in `x` to gather from. `axis` must be greater than or equal to `batch_dims`. [default= `0` ] batch_dims(int): The number of batch dimensions. [default= `0` ] Returns: ~nnabla.Variable: Gathered output. """ if axis is None: axis = 0 if batch_dims is None: batch_dims = 0 return F.Gather(ctx, axis, batch_dims)(x, Indices, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def gather_nd(ctx, x, indices, n_outputs=-1, outputs=None): r""" Gather elements or slices from `data` according to `indices`, which must be at least two-dimensional with the first dimension :math:`M` being less or equal to the :math:`N` dimensions of `data`. Given `data` with shape :math:`(X_0, X_1, ..., X_{N-1})` and indices with shape :math:`(M, Y_0, ..., Y_{K-1})` output has shape :math:`(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})`. If :math:`M == N`, output shape is simply :math:`(Y_0, ..., Y_{K-1})`. The forward of :func:`~nnabla.functions.gather_nd` is equivalent to: .. code-block:: python def gather_nd(data, index): import numpy as np tmp_index = index.reshape(index.shape[0], -1) tmp_index = (idx + (Ellipsis,) for idx in zip(*new_index)) out_shape = index.shape[1:] + data.shape[index.shape[0]:] return np.vstack(data[idx] for idx in tmp_index).reshape(*out_shape) Examples: >>> import numpy as np, nnabla as nn, nnabla.functions as F >>> nn.set_auto_forward(True) >>> data = F.arange(1, 11).reshape([2, 5]) >>> print(data.d) [[ 1. 2. 3. 4. 5.] [ 6. 7. 8. 9. 10.]] >>> F.gather_nd(data, [[1, 1, 0]]).shape (3, 5) >>> F.gather_nd(data, [[1, 1, 0], [0, 1, 0]]).shape (3,) >>> print(F.gather_nd(data, [[1, 1, 0], [0, 1, 0]]).d) [6. 7. 1.] >>> print(F.gather_nd(data, [[1, 1, 0]]).d) [[ 6. 7. 8. 9. 10.] [ 6. 7. 8. 9. 10.] [ 1. 2. 3. 4. 5.]] When `indices` is provided as a :obj:`~nnabla.Variable` it will be possible to change the actual index values after function creation. It is important to note that out-of-bound indices raise error when running on CPU but are ignored when using an accelerated computation context. >>> indices = nn.Variable((2, 1)) >>> indices.d = [[0], [0]] >>> y = F.gather_nd(data, indices) >>> print(y.d) [1.] >>> indices.d = [[1], [4]] >>> y.forward() >>> print(y.d) [10.] Args: x(~nnabla.Variable): N-D array input data indices(~nnabla.Variable): N-D array indices Returns: ~nnabla.Variable: N-D array """ return F.GatherNd(ctx)(x, indices, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def bool_gather(ctx, input, mask, n_outputs=-1, outputs=None): r"""Gather from the input data according to the mask. Given an input of :math:`(B_1, \ldots, B_N, D_1, \ldots, D_M)` shape and mask of :math:`(B_1, \ldots, B_N)` shape, the function returns an output of :math:`(nnz, D_1, \ldots, D_M)` shape and :math:`nnz` is the number of non-zero elements in mask. .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F nn.set_auto_forward(True) input = nn.Variable.from_numpy_array([[1, 2], [3, 4], [5, 6]]) mask = nn.Variable.from_numpy_array([1, 0, 1]) output = F.bool_gather(input, mask) print(output.d) # [[1, 2], [5, 6]] Note that this function is normally used with the dynamic graph since this function outputs a variable-length output. If used with the static graph, a network has to be constructed all time in iteration. Args: input(~nnabla.Variable): Data from which to gather. mask(~nnabla.Variable): Mask with which to gather. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask. Returns: ~nnabla.Variable: Gathered output. """ return F.BoolGather(ctx)(input, mask, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def scatter_nd(ctx, data, indices, out=None, shape=None, add=False, n_outputs=-1, outputs=None): r""" Scatter `data` into a new array of given `shape` according to `indices`. This operation is the inverse of :func:`~nnabla.functions.gather_nd`. The forward of :func:`~nnabla.functions.scatter_nd` is equivalent to: .. code-block:: python def scatter_nd(data, indices, shape): import numpy as np if isinstance(indices, np.ndarray) indices = indices.tolist() result = np.zeros(shape, dtype=data.dtype) result[indices] = data return result Examples: >>> import numpy as np, nnabla as nn, nnabla.functions as F >>> nn.set_auto_forward(True) >>> data = nn.Variable.from_numpy_array(np.array([9, 10, 11, 12])) >>> indices = nn.Variable.from_numpy_array(np.array([[4, 3, 1, 7]])) >>> scattered = F.scatter_nd(data, indices, shape=(8,)) >>> print(scatterd.d) [ 0. 11. 0. 10. 9. 0. 0. 12.] >>> print(F.gather_nd(scattered, indices).d) [ 9. 10. 11. 12.] Args: data(~nnabla.Variable): N-D array input data. indices(~nnabla.Variable): N-D array scatter indices. out(~nnabla.Variable): existing output array [optional] shape(repeated int64): Shape of output variable. [default= `None` ] add(bool): Add the input data to the same destination specified by the indices. [default= `False` ] Returns: ~nnabla.Variable: N-D array of given `shape`. """ inputs = [data, indices] if out is not None: inputs += [out] return F.ScatterNd(ctx, shape, add)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def scatter_add(ctx, x0, indices, x1, axis=None, n_outputs=-1, outputs=None): r""" Add all values from `x1` into the `x0` according to index specified by `indices`. This function adds `x1` into the copy of `x0` and outputs the copy. The original `x0` will not be changed. `x0`, `indices` and `x1` must have same number of dimensions. The forward of :func:`~nnabla.functions.scatter_add` is equivalent to: .. code-block:: python def scatter_add(x0, indices, x1, axis): # Assuming each input is 3 dimensional import numpy as np output = np.copy(x0) for i in range(indices.shape[0]): for j in range(indices.shape[1]): for k in range(indices.shape[2]): if axis == 0: output[indices[i][j][k]][j][k] += x1[i][j][k] elif axis == 1: output[i][indices[i][j][k]][k] += x1[i][j][k] elif axis == 2: output[i][j][indices[i][j][k]] += x1[i][j][k] return output Args: x0(~nnabla.Variable): N-D array which the data is added to its copy. indices(~nnabla.Variable): N-D array scatter indices. The size of each dimension must be equal or smaller than that of x0 except for the specified axis. The value of indices must be smaller than the size of specified axis' dimension of x0. The size of each dimension must be equal or smaller than that of x1. Indices must not be negative. x1(~nnabla.Variable): N-D array which is scattered and added to x0. axis(int): Axis along which to index. The axis must not exceed the inputs' dimension. [default= `0` ] Returns: ~nnabla.Variable: N-D array which contains the result of scatter addition. The shape is same as x0. """ if axis is None: axis = 0 return F.ScatterAdd(ctx, axis)(x0, indices, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def bool_scatter(ctx, input, mask, output=None, n_outputs=-1, outputs=None): r"""Scatter the `input` according to the `mask`. Given an input of :math:`(nnz, D_1, \ldots, D_M)` shape and mask of :math:`(B_1, \ldots, B_N)` shape, the function returns an output :math:`(B_1, \ldots, B_N, D_1, \ldots, D_M)` and :math:`nnz` is the number of non-zero elements in the mask. .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F nn.set_auto_forward(True) input0 = nn.Variable.from_numpy_array([[1, 2], [3, 4], [5, 6]]) mask = nn.Variable.from_numpy_array([1, 0, 1]) output0 = F.bool_gather(input0, mask) input1 = output0 + 10 output1 = F.bool_scatter(input1, mask) print(output1.d) # [[11, 12], [0, 0], [15, 16]] Note that the higher-order gradients of this function relies on F.gather, thus the higher-order gradients of this function is normally used with the dynamic graph. Args: input(~nnabla.Variable): Data to be scattered. mask(~nnabla.Variable): Mask with which to scatter. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask. output(~nnabla.Variable): Destination of output. If specified, data are inplaced. [optional] Returns: ~nnabla.Variable: Scattered output. """ inputs = [input, mask] if output is not None: inputs += [output] return F.BoolScatter(ctx)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def bool_fill(ctx, data, mask, value=0, n_outputs=-1, outputs=None): r"""Fill the data with the value to according to the mask. .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F nn.set_auto_forward(True) input = nn.Variable.from_numpy_array([[np.inf, 2], [3, np.nan]]) mask = nn.Variable.from_numpy_array([[1, 0], [0, 1]]) output = F.bool_fill(input, mask, -1) print(output.d) # [[-1, 2], [3, -1]] Args: data(~nnabla.Variable): Data to be filled. mask(~nnabla.Variable): Mask with which to fill. Non-zero/zero elements are supposed to be a binary mask as 1/0. No gradients are computed with respect to mask. value(float): Value to fill. [default= `0` ] Returns: ~nnabla.Variable: Filled output. """ return F.BoolFill(ctx, value)(data, mask, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def pack_padded_sequence(ctx, padded_sequence, lengths, batch_first=False, n_outputs=-1, outputs=None): r"""Pack a padded variable-length sequences. This method packs a padded variable-length sequences. :math:`T_i` is the length of the :math:`i`-th Variable in the sequences. :math:`B` is the batch size equal to the length of the sequences. :math:`T` is the max of :math:`T_i` for all :math:`i`. :math:`*` is the remaining dimensions including none. .. note:: This function assumes the length-sorted padded sequence in the decreasing order and must be used by :func:`~nnabla.utils.rnn.pack_padded_sequence` in the dynamic computation mode. See : Args: padded_sequence(~nnabla.Variable): Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape. lengths(~nnabla.Variable): Sequence length for each batch and always resides in CPU. batch_first(bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False, otherwise (:math:`B`, :math:`T`, :math:`*`). [default= `False` ] Returns: ~nnabla.Variable: Packed sequence of (:math:`N`, :math:`*`) shape. ~nnabla.Variable: Batch size for each time and always resides in CPU. """ return F.PackPaddedSequence(ctx, batch_first)(padded_sequence, lengths, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def pad_packed_sequence(ctx, packed_sequence, batch_sizes, batch_first=False, padding_value=None, total_length=None, n_outputs=-1, outputs=None): r"""Pad packed sequence. This method unpacks the packed sequqnce and pad it, the inverse operation of :func:`pack_padded_sequence`. :math:`T_i` is the length of the :math:`i`-th Variable in the sequences. :math:`B` is the batch size equal to the length of the sequences. :math:`T` is the max of :math:`T_i` for all :math:`i`. :math:`*` is the remaining dimensions including none. .. note:: This function assumes the output of the length-sorted padded sequence in the decreasing order and must be used by :func:`~nnabla.utils.rnn.pad_packed_sequence` in the dynamic computation mode. Args: packed_sequence(~nnabla.Variable): Packed sequence of (:math:`N`, :math:`*`) shape. batch_sizes(~nnabla.Variable): Batch size for each time and always resides in CPU. batch_first(bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False, otherwise (:math:`B`, :math:`T`, :math:`*`). [default= `False` ] padding_value(float): Padding value. [default= `0.0` ] total_length(int): If not None, the outputs are padded up to the `total_length`. If the `total_length` is less than the max length in the `sequences`, the error is thrown. [default= `-1` ] Returns: ~nnabla.Variable: Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape. ~nnabla.Variable: Sequence length for each batch and always resides in CPU. """ if padding_value is None: padding_value = 0.0 if total_length is None: total_length = -1 return F.PadPackedSequence(ctx, batch_first, padding_value, total_length)(packed_sequence, batch_sizes, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def nonzero(ctx, x, n_outputs=-1, outputs=None): r""" Find indices of non-zero elements. NonZero behaves similar to NonZero Operator in ONNX. Examples: >>> import numpy as np, nnabla as nn, nnabla.functions as F >>> nn.set_auto_forward(True) >>> x = F.arange(1, 10).reshape([3, 3]) >>> x.d[0, 1] = x.d[1, 2] = x.d[2, 2] = 0 >>> print(x.d) [[1. 0. 3.], [4. 5. 0.], [7. 8. 0.]] >>> y = F.nonzero(x) >>> print(y.shape) (2, 6) >>> print(y.d) [[0 0 1 1 2 2], [0 2 0 1 0 1]] Note that this function is normally used with the dynamic graph since this function outputs a variable-length output. If used with the static graph, a network has to be constructed all time in iteration. Args: x(~nnabla.Variable): N-D arrays. Returns: ~nnabla.Variable: N-D array indices. """ return F.NonZero(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def interpolate(ctx, x, output_size, mode, align_corners=True, half_pixel=False, half_pixel_for_nn=False, channel_last=False, n_outputs=-1, outputs=None): r""" Resize an ND array with interpolation. The last ``len(output_size)`` dimensions of the input ``x`` are considered as the spatial dimensions to be resized. Args: x(~nnabla.Variable): N-D array. output_size(repeated int64): Output size. mode(string): Interpolation mode chosen from ('nearest'|'linear'). align_corners(bool): If true, the corner pixels of input and output arrays are aligned, such that the output corner pixels have the same values with the input corner pixels. The default is ``None``, and it becomes `True` if mode is 'linear', otherwise `False`. [default= `True` ] half_pixel(bool): If true, in the coordinate transformation, 0.5 is added to the output coordinate and 0.5 is subtracted from the input coordinate after scaling. [default= `False` ] half_pixel_for_nn(bool): This is a special argument to support the backward-compatibility of the nearest neighbor interpolation. Default is `False`. When in ``True``, the implementation of nearest neighbor interpolation is the old one. [default= `False` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: N-D array. """ return F.Interpolate(ctx, output_size, mode, align_corners, half_pixel, half_pixel_for_nn, channel_last)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def onnx_resize(ctx, x, roi=(), scales=(), sizes=(), mode='nearest', coordinate_transformation_mode='half_pixel', cubic_coeff_a=None, exclude_outside=None, extrapolation_value=None, nearest_mode='round_prefer_floor', n_outputs=-1, outputs=None): r""" Resize an ND array with interpolation. This function provides a compatible interface to ONNX Resize. References: * `ONNX Operators documentation. <https://github.com/onnx/onnx/blob/main/docs/Operators.md>` Args: x(~nnabla.Variable): N-D array. roi(repeated float): RoIs for tf_crop_and_resize. [default= `()` ] scales(repeated float): Scale factors along axes. [default= `()` ] sizes(repeated int64): Output size. [default= `()` ] mode(string): Interpolation mode chosen from ('nearest'|'linear'|'cubic'). [default= `'nearest'` ] coordinate_transformation_mode(string): How to transform the coordinate in the resized tensor to the coordinate in the original tensor. This mode is chosen from ('half_pixel'|'pytorch_half_pixel'|'align_corners'|'asymmetric'|'tf_crop_and_resize'). [default= `'half_pixel'` ] cubic_coeff_a(float): The coefficient used in cubic interpolation. [default= `-0.75` ] exclude_outside(int): Whether to set coefficients to zero when sampling locations is outside the input tensor. [default= `0` ] extrapolation_value(float): An extrapolation value used when a sampling location is outside the input tensor at tf_crop_and_resize mode. [default= `0.0` ] nearest_mode(string): Rounding mode for nearest-neighbor interpolation. [default= `'round_prefer_floor'` ] Returns: ~nnabla.Variable: N-D array. """ if cubic_coeff_a is None: cubic_coeff_a = -0.75 if exclude_outside is None: exclude_outside = 0 if extrapolation_value is None: extrapolation_value = 0.0 return F.ONNXResize(ctx, roi, scales, sizes, mode, coordinate_transformation_mode, cubic_coeff_a, exclude_outside, extrapolation_value, nearest_mode)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def fft(ctx, x, signal_ndim, normalized=False, n_outputs=-1, outputs=None): r""" Complex-to-complex Discrete Fourier Transform, .. math:: X_{k_1, \ldots, k_d} = \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x_{n_1, \ldots, n_d} \exp\left(-2 \pi j \left( \sum_{i=0}^{d} \frac{k_i n_i}{N_i} \right) \right), where .. math:: k_i = 0, \ldots, N_i - 1. This function now supports 1-D, 2-D, and 3-D DFT with or without the leading batch dimension(s). The input is expected to be complex-valued with at least signal_ndim + 1 dimensions. The last dimension has a shape of two where x[..., 0] is the real part and x[..., 1] the imaginary part. Example: .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F from nnabla.ext_utils import get_extension_context ctx = get_extension_context("cudnn") nn.set_default_context(ctx) # Example for a batched 2D-FFT and 2D-IFFT (batch-size: 2, data-size: 4x3) x_data = np.random.rand(2, 4, 3) + 1j * np.random.rand(2, 4, 3) x = nn.Variable.from_numpy_array(np.stack([np.real(x_data), np.imag(x_data)], axis=3)) y = F.fft(x, signal_ndim=2, normalized=True) z = F.ifft(y, signal_ndim=2, normalized=True) z.forward() np.allclose(z.d[..., 0] + 1j*z.d[...,1], x_data) Args: x(~nnabla.Variable): Input. signal_ndim(int): The number of dimensions for each signal. It must be 1, 2, or 3. normalized(bool): Use unitary normalization. If `True`, the normalization constant :math:`\sqrt{\frac{1}{\prod_{i=1}^{d} N_i}}` is multiplied. [default= `False` ] Returns: ~nnabla.Variable: FFT transformed signal. """ return F.FFT(ctx, signal_ndim, normalized)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def ifft(ctx, x, signal_ndim, normalized=False, n_outputs=-1, outputs=None): r""" Complex-to-complex inverse Discrete Fourier Transform, .. math:: X_{k_1, \ldots, k_d} = \frac{1}{\prod_{i=1}^{d} N_i} \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x_{n_1, \ldots, n_d} \exp\left(2 \pi j \left( \sum_{i=0}^{d} \frac{k_i n_i}{N_i} \right) \right), where .. math:: k_i = 0, \ldots, N_i - 1. This function now supports 1-D, 2-D, and 3-D DFT with or without the leading batch dimension(s). The input is expected to be complex-valued with at least signal_ndim + 1 dimensions. The last dimension has a shape of two where x[..., 0] is the real part and x[..., 1] the imaginary part. Args: x(~nnabla.Variable): Input. signal_ndim(int): The number of dimensions for each signal. It must be 1, 2, or 3. normalized(bool): Use unitary normalization. If `True`, the normalization constant :math:`\frac{1}{\prod_{i=1}^{d} N_i}` becomes :math:`\sqrt{\frac{1}{\prod_{i=1}^{d} N_i}}`. [default= `False` ] Returns: ~nnabla.Variable: IFFT transformed signal. """ return F.IFFT(ctx, signal_ndim, normalized)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def stft(ctx, x, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect', as_istft_backward=False, n_outputs=-1, outputs=None): r""" Short-time Fourier transform. Args: x(~nnabla.Variable): Time domain sequence of size `batch_size x sample_size`. window_size(int): Size of STFT analysis window. stride(int): Number of samples that we shift the window, also called `hop size`. fft_size(int): Size of the FFT, the output will have `fft_size // 2+ 1` frequency bins. window_type(string): Analysis window, can be either `hanning`, `hamming` or `rectangular`. [default= `'hanning'` ] center(bool): If `True`, then the signal `x` is padded by half the FFT size using reflection padding. [default= `True` ] pad_mode(string): Padding mode, which can be `'constant'` or `'reflect'`. `'constant'` pads with `0`. [default= `'reflect'` ] as_istft_backward(bool): If `True`, then forward execution behaves as backward execution of ISTFT, treating input `x` as output gradient of ISTFT and outputs `y_r` and `y_i` as inputs gradient of ISTFT. This option is only used in nn.grad operator. [default= `False` ] Returns: ~nnabla.Variable: Real part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`. ~nnabla.Variable: Imaginary part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`. """ return F.STFT(ctx, window_size, stride, fft_size, window_type, center, pad_mode, as_istft_backward)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def istft(ctx, y_r, y_i, window_size, stride, fft_size, window_type='hanning', center=True, pad_mode='reflect', as_stft_backward=False, n_outputs=-1, outputs=None): r""" Inverse short-time Fourier transform. .. note:: We use a constant square inverse window for the reconstruction of the time-domain signal, therefore, the first and last `window_size - stride` are not perfectly reconstructed. Args: y_r(~nnabla.Variable): Real part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`. y_i(~nnabla.Variable): Imaginary part of STFT of size `batch_size x fft_size//2 + 1 x frame_size`. window_size(int): Size of STFT analysis window. stride(int): Number of samples that we shift the window, also called `hop size`. fft_size(int): Size of the FFT, the output will have `fft_size // 2+ 1` frequency bins. window_type(string): Analysis window, can be either `hanning`, `hamming` or `rectangular`. [default= `'hanning'` ] center(bool): If `True`, then the signal `x` is padded by half the FFT size using reflection padding. [default= `True` ] pad_mode(string): Padding mode corresponding to STFT `pad_mode`, which can be `'constant'` or `'reflect'`. `'constant'` pads with `0`. This option is ignored for the normal use of ISTFT. You need to set the same `pad_mode` only when `as_stft_backward == True`. [default= `'reflect'` ] as_stft_backward(bool): If `True`, then forward execution behaves as backward execution of STFT, treating inputs `y_r` and `y_i` as outputs gradient of STFT and output `x` as input gradient of STFT. This option is only used in nn.grad operator. [default= `False` ] Returns: ~nnabla.Variable: Time domain sequence of size `batch_size x sample_size`. """ return F.ISTFT(ctx, window_size, stride, fft_size, window_type, center, pad_mode, as_stft_backward)(y_r, y_i, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def dropout(ctx, x, p=0.5, seed=-1, n_outputs=-1, outputs=None): r""" Dropout. Samples a number :math:`u` from a uniform distribution in :math:`[0, 1]` , and ignores the input if :math:`u \leq p`. .. math:: y = \left\{ \begin{array}{ll} \frac{x}{1 - p} & (u > p) \\ 0 & ({\rm otherwise}) \end{array} \right. Note: Usually dropout only applied during training as below (except `MC dropout`_). If you want to use dropout as an MC dropout, remove 'if train:'. .. code-block:: python h = PF.affine(x, num_hidden) if train: h = F.dropout(h, 0.5) .. _MC dropout: https://arxiv.org/abs/1506.02142 Args: x(~nnabla.Variable): N-D array p(float): :math:`p` in definition. [default= `0.5` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Dropout(ctx, p, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def top_k_data(ctx, x, k, abs=False, reduce=True, base_axis=1, largest=True, with_index=False, n_outputs=-1, outputs=None): r""" Select the `k` largest values from each sample in `x` to propagate unmodified and set all other values to 0. If `abs` is True, the `k` largest values are selected by magnitude. If `reduce` is True (the default), all feature dimensions are reduced to a single dimension of size `k` that propagates only the `k` largest values. Otherwise, if `reduce` is False, input and output dimensions are identical. Dimensions before `base_axis` are treated as number of sample dimensions and `k` values get selected from all elements of a sample (dimensions from `base_axis`) regardless of shape. >>> import nnabla as nn, nnabla.functions as F >>> x = nn.Variable((4, 5, 6)) >>> F.top_k_data(x, 3, reduce=False).shape (4, 5, 6) >>> F.top_k_data(x, 3, reduce=True).shape (4, 3) >>> F.top_k_data(x, 3, reduce=True, base_axis=2).shape (4, 5, 3) Args: x(~nnabla.Variable): N-D array k(int): Number of largest data values to propagate. abs(bool): Determine largest data values by magnitude. [default= `False` ] reduce(bool): Reduce feature size to one dimension of size `k`. [default= `True` ] base_axis(int): First dimension of the sample shape. [default= `1` ] largest(bool): Whether to select the `k` largest or smallest values. [default= `True` ] with_index(bool): Return top-k values and indices. [default= `False` ] Returns: ~nnabla.Variable: N-D array. ~nnabla.Variable: N-D array of top-k indices. """ return F.TopKData(ctx, k, abs, reduce, base_axis, largest, with_index)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def top_k_grad(ctx, x, k, abs=False, base_axis=1, n_outputs=-1, outputs=None): r""" Select the `k` largest gradients for each sample in `x` to back-propagate unmodified and set all other gradients to 0. If `abs` is True, the `k` largest gradients are selected by magnitude. Dimensions before `base_axis` are treated as number of sample dimensions and `k` gradients get selected from all gradients of a sample (dimensions from `base_axis`) regardless of shape. Args: x(~nnabla.Variable): N-D array k(int): Number of largest gradients to propagate. abs(bool): Determine largest gradients by magnitude. [default= `False` ] base_axis(int): First dimension of the sample shape. [default= `1` ] Returns: ~nnabla.Variable: N-D array with same shape and data as `x`. """ return F.TopKGrad(ctx, k, abs, base_axis)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def rand(ctx, low=0, high=1, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples numbers from a uniform distribution :math:`x \sim U(low, high)` given lowest value :math:`low`, upper bound :math:`high`, and shape of the returned Variable. Args: low(float): :math:`low` in definition. [default= `0` ] high(float): :math:`high` in definition. [default= `1` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. """ return F.Rand(ctx, low, high, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def randint(ctx, low=0, high=1, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples integer numbers from a uniform distribution :math:`x \sim U(low, high)` given lowest value :math:`low`, upper bound :math:`high`, and the shape of the returned Variable. The lowest value :math:`low` is included in the range, while the upper bound :math:`high` is excluded, corresponding to the half-open interval :math:`[low, high)`. Args: low(int): :math:`low` in definition. [default= `0` ] high(int): :math:`high` in definition. [default= `1` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. The dtype is int32. """ return F.Randint(ctx, low, high, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def randn(ctx, mu=0, sigma=1, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples numbers from a normal distribution :math:`x \sim N(\mu, \sigma)` given mean :math:`\mu`, standard deviation :math:`\sigma`, and shape of the returned Variable. Args: mu(float): :math:`\mu` in definition. [default= `0` ] sigma(float): :math:`\sigma` in definition. [default= `1` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. """ return F.Randn(ctx, mu, sigma, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def rand_binomial(ctx, n=1, p=0.5, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples numbers from a binomial distribution :math:`x \sim B(n, p)` given the numbers of trials :math:`n`, probability :math:`p`, and shape of the returned Variable. When :math:`n = 1`, this behaves like the Bernoulli distriburion. Args: n(int): :math:`n` in definition, the number of trials. [default= `1` ] p(float): :math:`p` in definition, probability of success. [default= `0.5` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. """ return F.RandBinomial(ctx, n, p, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def rand_beta(ctx, alpha=0.5, beta=0.5, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples numbers from a beta distribution :math:`x \sim \beta(\alpha, \beta)`. Args: alpha(float): :math:`\alpha`, scale parameter. [default= `0.5` ] beta(float): :math:`\beta`, scale parameter. [default= `0.5` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. """ return F.RandBeta(ctx, alpha, beta, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def rand_gamma(ctx, k=0.5, theta=1, shape=[], seed=-1, n_outputs=-1, outputs=None): r""" Samples numbers from a gamma distribution :math:`x \sim \frac {\gamma(k, \frac {x}{\theta})}{\Gamma(k)}`. Args: k(float): k, scale parameter. [default= `0.5` ] theta(float): :math:`\theta`, scale parameter. [default= `1` ] shape(:obj:`tuple` of :obj:`int`): Shape of returned variable. [default= `[]` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Variable with the shape specified in the argument. """ return F.RandGamma(ctx, k, theta, shape, seed)(n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def random_choice(ctx, x, w, shape=[], replace=True, seed=-1, n_outputs=-1, outputs=None): r""" Generate random samples from population `x` with selection probabilities determined by the relative weights `w`. The number of samples to draw is given by the product of `shape`\s dimensions, and the samples are returned with the given `shape`. By default, samples are drawn with replacement, i.e. selection of a specific population member is solely determined by its associated weight. Sampling without replacement, where any population member may be drawn only once, is used if `replace` is set to False. For both `x` and `w` the innermost dimension corresponds to the individual populations and their weights from which samples are returned with the requested `shape` following all outermost dimensions of the input. .. code-block:: python import nnabla as nn import nnabla.functions as F import numpy as np nn.set_auto_forward(True) # x holds two populations x = nn.Variable.from_numpy_array(np.array([[11, 22, 33], [110, 220, 330]])) # w holds the weights for each population w = nn.Variable.from_numpy_array(np.array([[10, 20, 70], [70, 20, 10]])) # draw one sample from each population y = F.random_choice(x, w) # y.shape => (2, 1) # draw 12 samples with shape (3, 4) from each population y = F.random_choice(x, w, shape=(3, 4)) # y.shape => (2, 3, 4) Note that weights must not be less than zero and for each population the sum of weights must be greater than zero. Additionally, sampling without replacement requires that the number of non-zero weights is not less than the number of samples to be drawn. These conditions are verified in "cpu" computation context but not when using "cuda" or "cudnn" acceleration (this would require additional device synchronization steps penalizing performance). Random sampling from an implicit array of index values (like categorical or multinomial) can be realized with input `x` constructed as indices. .. code-block:: python w = nn.Variable.from_numpy_array(np.array([1, 2, 3, 2, 1])) y = F.random_choice(F.arange(0, 5), w) Args: x(~nnabla.Variable): N-D array from which a random sample is generated. w(~nnabla.Variable): N-D array of associated weights of elements in `x`. shape(:obj:`tuple` of :obj:`int`): Number and shape of generated samples. [default= `[]` ] replace(bool): Whether sampling is with or without replacement. [default= `True` ] seed(int): Random seed. [default= `-1` ] Returns: ~nnabla.Variable: N-D array """ return F.RandomChoice(ctx, shape, replace, seed)(x, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def random_crop(ctx, x, shape=None, base_axis=1, seed=-1, n_outputs=-1, outputs=None): r""" RandomCrop randomly extracts a portion of an array. Args: x(~nnabla.Variable): N-D array shape(:obj:`tuple` of :obj:`int`): The data size to extract. For example, to randomly extract a portion of the image (3,48,48) from a 3,64,64 image, specify (3,48,48). [default= `x.shape` ] base_axis(int): No Description [default= `1` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: N-D array """ if shape is None: shape = x.shape return F.RandomCrop(ctx, shape, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def random_flip(ctx, x, axes=None, base_axis=1, seed=-1, n_outputs=-1, outputs=None): r""" Reverses the order of elements of the specified dimension of an array at 50% probability. Args: x(~nnabla.Variable): N-D array axes(repeated int64): The index of the axis to reverse the order of the elements. Axis indices take on values 0, 1, 2, and so on from the left. For example, to flip a 32 (W) by 24 (H) 100 RGB images (100, 3,24,32) vertically and horizontally at random, specify (2,3). [default= `[len(x.shape) - 1]` ] base_axis(int): No Description [default= `1` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: N-D array """ if axes is None: axes = [len(x.shape) - 1] return F.RandomFlip(ctx, axes, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def random_shift(ctx, x, shifts=None, border_mode='nearest', constant_value=0, base_axis=1, seed=-1, n_outputs=-1, outputs=None): r""" Randomly shifts the array elements within the specified range. Args: x(~nnabla.Variable): N-D array. shifts(repeated int64): Max absolute amount to shift elements. For example, to shift image data horizontally by :math:`\pm 2` pixels and vertically by :math:`\pm 3` pixels, specify (3,2). [default= `(0,) * len(x.shape)` ] border_mode(string): Specify how to process the ends of arrays whose values will be undetermined as a result of shifting. nearest: The data at the ends of the original array is copied and used. reflect: Original data reflected at the ends of the original array is used. constant: Constant value is used. [default= `'nearest'` ] constant_value(float): Value used for outside of the original array if border_mode='constant'. [default= `0` ] base_axis(int): No Description [default= `1` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: N-D array. """ if shifts is None: shifts = (0,) * len(x.shape) return F.RandomShift(ctx, shifts, border_mode, constant_value, base_axis, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def random_erase(ctx, x, prob=0.5, area_ratios=(0.02, 0.4), aspect_ratios=(0.3, 3.3333), replacements=(0.0, 255.0), n=None, share=True, inplace=False, base_axis=1, seed=-1, channel_last=False, ste_fine_grained=True, n_outputs=-1, outputs=None): r"""Randomly erase patches of the inputs and replace with random values. Erasing is applied for each sample and for each `n` with the given probability, the randomly selected area ratio and aspect ratio if `share` is `True`; otherwise (`share`=`False`), for each feature additionally. Random patch are selected by random coordinates as the following, .. math:: S_e &&= Uniform(s_l, s_h) \times S \\ r_e &&= Uniform(r_l, r_h) \\ H_e &&= \sqrt{S_e \times r_e} \\ W_e &&= \sqrt{S_e / r_e} \\ y_e &&= Uniform(0, H - H_e) \\ x_e &&= Uniform(0, W - W_e), where :math:`S` is the area, :math:`s_l` and :math:`s_h` are the low and high values of the area ratio range, :math:`r_l` and :math:`r_h` are the low and high values of the aspect ratio range, :math:`H_e` and :math:`W_e` are height and width of a patch, and :math:`y_e` and :math:`x_e` are the start coordinates of a patch. If a pixel of the inputs falls in this patch, the value of that pixel is replaced with a random value in `replacements` range. Backward is implemented as passing gradients if `ste_fine_grained` is False; otherwise, the backward only occurs in regions not erased. References: * `Zhun Zhong, Liang Zheng, Guoliang Kang, Shaozi Li, Yi Yang, Random Erasing Data Augmentation, <https://arxiv.org/abs/1708.04896>`_ Args: x(~nnabla.Variable): N-D array. prob(float): Probability to erase. [default= `0.5` ] area_ratios(repeated float): Low and high of the area ratio range. [default= `(0.02, 0.4)` ] aspect_ratios(repeated float): Low and high of the aspect ratios range. [default= `(0.3, 3.3333)` ] replacements(repeated float): Low and high of the replacement value range. [default= `(0.0, 255.0)` ] n(int): Max number of patches to be erased. [default= `1` ] share(bool): Use a same bounding box randomly picked over the feature dimension when being True. Default is True. [default= `True` ] inplace(bool): This option is obsolete and ignored. Output is never in-placed with input. [default= `False` ] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] ste_fine_grained(bool): Straight Through Estimator is fine-grained or not. Default is True. [default= `True` ] Returns: ~nnabla.Variable: N-D array. """ if n is None: n = 1 return F.RandomErase(ctx, prob, area_ratios, aspect_ratios, replacements, n, share, inplace, base_axis, seed, channel_last, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def image_augmentation(ctx, x, shape=None, pad=(0, 0), min_scale=1.0, max_scale=1.0, angle=0.0, aspect_ratio=1.0, distortion=0.0, flip_lr=False, flip_ud=False, brightness=0.0, brightness_each=False, contrast=1.0, contrast_center=0.0, contrast_each=False, noise=0.0, seed=-1, n_outputs=-1, outputs=None): r""" ImageAugmentation randomly alters the input image. Args: x(~nnabla.Variable): N-D array. shape(:obj:`tuple` of :obj:`int`): The output image data size. [default= `x.shape` ] pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension. [default= `(0, 0)` ] min_scale(float): The minimum scale ratio when randomly scaling the image. For example, to scale down to 0.8 times the size of the original image, specify "0.8". To not apply random scaling, set both min_scale and max_scale to "1.0". [default= `1.0` ] max_scale(float): The maximum scale ratio when randomly scaling the image. For example, to scale down to 2 times the size of the original image, specify "2.0". [default= `1.0` ] angle(float): The rotation angle range in radians when randomly rotating the image. The image is randomly rotated in the -Angle to +Angle range. For example, to rotate in a +-15 degree range, specify "0.26" (15 degrees/360 degrees * 2PI). To not apply random rotation, specify "0.0". [default= `0.0` ] aspect_ratio(float): The aspect ratio range when randomly deforming the image. For example, to deform aspect ratio of image from 1:1.3 to 1.3:1, specify "1.3". To not apply random deforming, specify "1.0". [default= `1.0` ] distortion(float): The distortion range when randomly distorting the image. To not apply distortion, specify "0.0". [default= `0.0` ] flip_lr(bool): Whether to randomly flip the image horizontally at 50% probability. [default= `False` ] flip_ud(bool): Whether to randomly flip the image vertically at 50% probability. [default= `False` ] brightness(float): The absolute range of values to randomly add to the brightness. A random value in the -Brightness to +Brightness range is added to the brightness. For example, to vary the brightness in the -0.05 to +0.05 range, specify "0.05". To not apply random addition to brightness, specify "0.0". [default= `0.0` ] brightness_each(bool): Whether to apply the random addition to brightness (as specified by brightness) to each color channel. True: brightness is added based on a different random number for each channel. False: brightness is added based on a random number common to all channels. [default= `False` ] contrast(float): The range in which to randomly vary the image contrast. The contrast is varied in the 1/Contrast times to Contrast times range. The output brightness is equal to (input - contrast_center) * contrast + contrast_center. For example, to vary the contrast in the 0.91 times to 1.1 times range, specify "1.1". To not apply random contrast variation, specify "1.0". [default= `1.0` ] contrast_center(float): Intensity center used for applying contrast. [default= `0.0` ] contrast_each(bool): Whether to apply the random contrast variation (as specified by contrast) to each color channel. True: contrast is varied based on a different random number for each channel. False: contrast is varied based on a random number common to all channels. [default= `False` ] noise(float): Sigma of normal random number to be added. [default= `0.0` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: N-D array. """ if shape is None: shape = x.shape return F.ImageAugmentation(ctx, shape, pad, min_scale, max_scale, angle, aspect_ratio, distortion, flip_lr, flip_ud, brightness, brightness_each, contrast, contrast_center, contrast_each, noise, seed)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sigmoid_cross_entropy(ctx, x, target, n_outputs=-1, outputs=None): r""" Element-wise cross entropy between `x` and the target variables, passed to a sigmoid function. .. math:: y_i = - \left(x^{(1)}_i \ln \left(\sigma \left(x^{(0)}_i \right)\right) + \ \left(1 - x^{(1)}_i\right) \ln \left(1 - \sigma \left(x^{(0)}_i \ \right)\right)\right) where :math:`\sigma(s)=\frac{1}{1+\exp(-s)}`. Note: SigmoidCrossEntropy is equivalent to Sigmoid+BinaryCrossEntropy, but computing them at once has the effect of reducing computational error. Args: x(~nnabla.Variable): N-D array. Typically indicates a score. The value lies in :math:`[-\infty, \infty]` [parameter] target(~nnabla.Variable): N-D array of labels. Only 0 or 1 value is allowed. [parameter] Returns: ~nnabla.Variable: N-D array of element-wise losses. """ return F.SigmoidCrossEntropy(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_cross_entropy(ctx, x, target, n_outputs=-1, outputs=None): r""" Element-wise cross entropy between `x` and the target variables. .. math:: y_i = - \left(x^{(1)}_i * \ln \left(x^{(0)}_i\right) + \left(1 - \ x^{(1)}_i\right) * \ln \left(1 - x^{(0)}_i\right)\right). Args: x(~nnabla.Variable): Probabilities N-D array. :math:`-\infty` to :math:`\infty`. target(~nnabla.Variable): N-D array of labels. Usually set as 0 or 1, but, unlike SigmoidCrossEntropy, it allows probability (0 to 1) as inputs and backpropagation can be done. Returns: ~nnabla.Variable: N-D array of element-wise losses. """ return F.BinaryCrossEntropy(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def softmax_cross_entropy(ctx, x, target, axis=None, n_outputs=-1, outputs=None): r""" Element-wise cross entropy between the variables and the variables of a label given by a category index with Softmax normalization. .. math:: y_{j} = -\ln \left(\frac{\exp(x_{j,t_j})}{\sum_{i'} \exp(x_{j,i'})}\right) along dimension specified by axis (:math:`i` is the axis where normalization is performed on). Note: SoftmaxCrossEntropy is equivalent to Softmax+CategoricalCrossEntropy, but computing them at once has the effect of reducing computational error. Args: x(~nnabla.Variable): N-D array. Typically indicates a score. :math:`(D_1 \times ... \times D_i \times ... \times D_N)` [parameter] target(~nnabla.Variable): N-D array of labels. :math:`(D_1 \times ... \times 1 \times ... \times D_N)` , each label should be the index from 0 to n-class, -1 if not belongs any class. [parameter] axis(int): Axis normalization is taken. [default= `len(x.shape) - 1` ] Returns: ~nnabla.Variable: N-D array of element-wise losses. :math:`(D_1 \times ... \times 1 \times ... \times D_N)` """ if axis is None: axis = len(x.shape) - 1 return F.SoftmaxCrossEntropy(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def categorical_cross_entropy(ctx, x, target, axis=None, n_outputs=-1, outputs=None): r""" Element-wise cross entropy between `x` and the target `t` where targets are given by a category index. .. math:: y_{j} = -\ln \left( x_{j, t_j} \right) along dimension specified by axis (:math:`i` is the axis where normalization is performed on). Args: x(~nnabla.Variable): N-D array. Typically indicates a score. :math:`(D_1 \times ... \times D_i \times ... \times D_N)` [parameter] target(~nnabla.Variable): N-D array of labels. :math:`(D_1 \times ... \times 1 \times ... \times D_N)`, each label should be the index from 0 to n-class, -1 if not belongs any class. [parameter] axis(int): Axis normalization is taken. [default= `len(x.shape) - 1` ] Returns: ~nnabla.Variable: N-D array of element-wise losses. :math:`(D_1 \times ... \times 1 \times ... \times D_N)` """ if axis is None: axis = len(x.shape) - 1 return F.CategoricalCrossEntropy(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def squared_error(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element-wise squared error .. math:: y_i = \left(x^{(0)}_i - x^{(1)}_i\right)^2. Args: x0(~nnabla.Variable): N-D array. x1(~nnabla.Variable): N-D array. Returns: ~nnabla.Variable: N-D array. """ return F.SquaredError(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def absolute_error(ctx, x0, x1, n_outputs=-1, outputs=None): r""" Element-wise absolute error .. math:: y_i = | x^{(0)}_i - x^{(1)}_i |. Args: x0(~nnabla.Variable): N-D array. x1(~nnabla.Variable): N-D array. Returns: ~nnabla.Variable: N-D array. """ return F.AbsoluteError(ctx)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def huber_loss(ctx, x0, x1, delta=1.0, n_outputs=-1, outputs=None): r""" Element-wise Huber loss .. math:: y_i= \left\{ \begin{array}{ll} d^2 & (|d| < \delta)\\ \delta (2 |d| - \delta) & ({\rm otherwise}) \end{array} \right. where :math:`d = x^{(0)}_i - x^{(1)}_i` Args: x0(~nnabla.Variable): N-D array. x1(~nnabla.Variable): N-D array. delta(float): Delta [default= `1.0` ] Returns: ~nnabla.Variable: N-D array of element-wise losses. """ return F.HuberLoss(ctx, delta)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def epsilon_insensitive_loss(ctx, x0, x1, epsilon, n_outputs=-1, outputs=None): r""" Element-wise Epsilon Insensitive Loss .. math:: y_i= \left\{ \begin{array}{ll} | x^{(0)}_i - x^{(1)}_i | - \epsilon & if \ \ | x^{(0)}_i - x^{(1)}_i | > \epsilon \\ 0 & otherwise \end{array} \right. Args: x0(~nnabla.Variable): N-D array. x1(~nnabla.Variable): N-D array. epsilon(float): Insensitive parameter. Returns: ~nnabla.Variable: N-D array of element-wise losses. """ return F.EpsilonInsensitiveLoss(ctx, epsilon)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def kl_multinomial(ctx, p, q, base_axis=1, n_outputs=-1, outputs=None): r""" The Kullback Leibler Divergence for multinomial distributions. .. math:: D = \sum_i p_i \log \left( \frac{p_i}{q_i} \right) Args: p(~nnabla.Variable): N-D array of the source categorical probabilities q(~nnabla.Variable): N-D array of the target categorical probabilities base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] Returns: ~nnabla.Variable: Kullback Leibler divergence :math:`KL(p \parallel q)`. """ return F.KLMultinomial(ctx, base_axis)(p, q, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def affine_grid(ctx, theta, size, align_corners=False, n_outputs=-1, outputs=None): r"""Generate the source grid based on the normalized target grid with `size`. The target grid is first normalized in [-1, 1], then tranformed by the affine transformation :math:`\theta` to generate the source grid. 2D and 3D grid are supported now. This function is normally used with the `warp_by_grid` function for constructing the spatial transformer. Args: theta(~nnabla.Variable): N-D array with the shape (:math:`B \times 2 \times 3`), the sample-wise affine transformation matrix. size(repeated int64): The grid size of (:math:`H \times W`) for 2D and (:math:`D \times H \times W`) for 3D. align_corners(bool): If `True`, the top-left and bottom-right pixels correspond to (-1, -1) and (1, 1) respectively since a pixel is located on the corner of a grid, and the target grid is normalized in [-1, 1]. If `False`, the normalized target grid in [-1, 1] is scaled by `size - 1 / size` according to the respective spatial size (e.g., :math:`H` and :math:`W`) before the transformation since a pixel is located on a center of a cell in a grid. [default= `False` ] Returns: ~nnabla.Variable: N-D array with the shape (:math:`B \times H \times W \times 2`) for 2D and (:math:`B \times D \times H \times W \times 3`) for 3D. The last dimension of 2 is for (x, y) and of 3 for (x, y, z). The `gird` is used as the source grid for the warping. """ return F.AffineGrid(ctx, size, align_corners)(theta, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def warp_by_grid(ctx, x, grid, mode='linear', padding_mode='zero', align_corners=False, channel_last=False, n_outputs=-1, outputs=None): r"""Warp the input data by the grid. This function is normally used with the generated normalized grid by the `affine_grid` function for constructing the spatial transformer. Args: x(~nnabla.Variable): Input data to be warped with the shape (:math:`B \times C \times H_{in} \times W_{in}`) for 2D and (:math:`B \times C \times D_{in} \times H_{in} \times W_{in}`) for 3D. grid(~nnabla.Variable): Grid warping the input data with the shape (:math:`B \times H_{out} \times W_{out} \times 2`) for 2D and (:math:`B \times D_{out} \times H_{out} \times W_{out} \times 3`) for 3D. The last dimension of 2 is for (x, y) or 3 for (x, y, z). mode(string): Interpolation mode, linear or nearest. [default= `'linear'` ] padding_mode(string): Padding mode when the grid value is outside [-1, 1]. If this is "zero", 0 is used for padding. "reflect" uses the values reflected at the ends of the original input data like the mirror. "repeat" used the values at the ends of the original input data. [default= `'zero'` ] align_corners(bool): The target grid normalized in [-1, 1] is scaled by `size - 1 / size` according to the respective spatial size (e.g., :math:`H` and :math:`W`) before the transformation if this is `False`. If this is `True`, the top-left and bottom-right pixels correspond to (-1, -1) and (1, 1) respectively. [default= `False` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a NHWC order. [default= `False` ] Returns: ~nnabla.Variable: Output data warped by the grid. """ return F.WarpByGrid(ctx, mode, padding_mode, align_corners, channel_last)(x, grid, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def warp_by_flow(ctx, data, flow, n_outputs=-1, outputs=None): r""" Transform the image(s) *data* by *flow* field(s) of offset vectors such that each output pixel corresponds to the input image pixel at the relative offset location given by horizontal and vertical flow values (in other words, the flow field describes the coordinate displacements for each output pixel to the corresponding input pixel). Both *data* and *flow* are 4-D variables (in "NCHW" layout) with identical shape except the *flow* channel dimension (which is always 2). .. math:: output_{n,c,y,x} = data_{n,c,y',x'}, where .. math:: y' &=& y + flow_{n,1,y,x}, \\ x' &=& x + flow_{n,0,y,x}. The output pixel values at :math:`y'` and :math:`x'` locations are obtained by bilinear interpolating between the 4 closest pixels of the input image. Pixel values outside of the input image are implicitly padded with the value of the closest boundary pixel. Args: data(~nnabla.Variable): Input image data with shape `(N, Channels, Height, Width)`. flow(~nnabla.Variable): Flow field vectors with shape `(N, 2, Height, Width)`. Returns: ~nnabla.Variable: Transformed image data with shape `(N, Channels, Height, Width)`. """ return F.WarpByFlow(ctx)(data, flow, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_sigmoid(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise binary sigmoid function. In the forward pass, it computes .. math:: f(x) = \begin{cases} 1 & (x > 0) \\ 0 & ({\rm otherwise})\end{cases}, but in the backward pass, a straight-through approximation of the gradient is used, i.e., .. math:: \frac{\partial f(x)}{\partial x} = \begin{cases} 0 & (|x| \geq 1) \\ \frac{1}{2} & ({\rm otherwise}) \end{cases}. References: * `Courbariaux, Matthieu, and Yoshua Bengio. Binarynet: Training deep neural networks with weights and activations constrained to+ 1 or-1. <https://arxiv.org/abs/1602.02830>`_ Args: x(~nnabla.Variable): Input . Returns: ~nnabla.Variable: Output. """ return F.BinarySigmoid(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_tanh(ctx, x, n_outputs=-1, outputs=None): r""" Element-wise binary tanh function. In the forward pass, it computes .. math:: f(x) = \begin{cases} 1 & (x > 0) \\ -1 & ({\rm otherwise}) \end{cases}, but in the backward pass, a straight-through approximation of the gradient is used, i.e., .. math:: \frac{\partial f(x)}{\partial x} = \begin{cases} 0 & (|x| \geq 1) \\ 1 & ({\rm otherwise}) \end{cases}. References: * `Courbariaux, Matthieu, and Yoshua Bengio. Binarynet: Training deep neural networks with weights and activations constrained to+ 1 or-1. <https://arxiv.org/abs/1602.02830>`_ Args: x(~nnabla.Variable): Input . Returns: ~nnabla.Variable: Output. """ return F.BinaryTanh(ctx)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_connect_affine(ctx, x, weight, binary_weight, bias=None, base_axis=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None): r""" This function provides a BinaryConnect affine layer. It computes in the forward pass .. math:: y_j = \sum_{i} sign(w_{j,i}) x_i, i.e., the weights :math:`w_{j,i}` are binarized to :math:`sign(w_{j,i})` and, hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the inner product computations do not require any multiplications anymore as they turn into additions/subtractions. This function should be used together with :meth:`~nnabla.functions.batch_normalization`. .. note:: 1) If you would like to share the binary weights between other layers, please use the standard, floating value weights (`weight`) and not the binary weights (`binary_weight`). 2) The weights and the binary weights become in sync only after a call to :meth:`~nnabla.Variable.forward`, and not after a call to :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of the network, remember to call :meth:`~nnabla.Variable.forward`, once before doing so, otherwise the weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use floating values for `binary_weight`, since this function is for simulation purposes. References: * `M. Courbariaux, Y. Bengio, and J.-P. David. BinaryConnect: Training Deep Neural Networks with binary weights during propagations. <https://arxiv.org/abs/1511.00363>`_ Args: x(~nnabla.Variable): Input . weight(~nnabla.Variable): Weight . [parameter] binary_weight(~nnabla.Variable): Binarized weight . [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] quantize_zero_to(float): Input value at zero is quantized to this value. [default= `1.0` ] Returns: ~nnabla.Variable: Output. """ inputs = [x, weight, binary_weight] if bias is not None: inputs += [bias] return F.BinaryConnectAffine(ctx, base_axis, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_connect_convolution(ctx, x, weight, binary_weight, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None): r""" This function provides a BinaryConnect convolution layer. It computes in the forward pass .. math:: y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}, i.e., the weights :math:`w_{n, m, i, j}` are binarized to :math:`sign(w_{n, m, i, j})` and, hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the inner product computations do not require any multiplications anymore as they turn into additions/subtractions. This function should be used together with :meth:`~nnabla.functions.batch_normalization`. Reference * `M. Courbariaux, Y. Bengio, and J.-P. David. BinaryConnect: Training Deep Neural Networks with binary weights during propagations. <https://arxiv.org/abs/1511.00363>`_ .. note:: 1) If you would like to share the binary weights between other layers, please use the standard, floating value weights (`weight`) and not the binary weights (`binary_weight`). 2) The weights and the binary weights become in sync only after a call to :meth:`~nnabla.Variable.forward`, and not after a call to :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of the network, remember to call :meth:`~nnabla.Variable.forward`, once before doing so, otherwise the weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use floating values for `binary_weight`, since this function is for simulation purposes. Args: x(~nnabla.Variable): Input. weight(~nnabla.Variable): Weight. [parameter] binary_weight(~nnabla.Variable): Binarized weight. [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] quantize_zero_to(float): Input value at zero is quantized to this value. [default= `1.0` ] Returns: ~nnabla.Variable: Output """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight, binary_weight] if bias is not None: inputs += [bias] return F.BinaryConnectConvolution(ctx, base_axis, pad, stride, dilation, group, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_weight_affine(ctx, x, weight, binary_weight, alpha, bias=None, base_axis=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None): r""" This function provides a Binary Weight Network affine layer. It computes in the forward pass .. math:: y_j = \frac{1}{\|\mathbf{w}_j\|_{\ell_1}} \sum_{i} sign(w_{j,i}) x_i i.e., the weights :math:`w_{j,i}` are binarized to :math:`sign(w_{j,i})` and, hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the inner product computations turn into additions/subtractions which are followed by multiplication with the scaling factor :math:`\alpha_j = \frac{1}{\|\mathbf{w}_j\|_{\ell_1}}`. Reference * `Rastegari, Mohammad, et al. XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks. <https://arxiv.org/abs/1603.05279>`_ .. note:: 1) If you would like to share the binary weights with other layers, please use the standard, floating value weights (`weight`) and not the binary weights (`binary_weight`). 2) The weights and the binary weights become in sync only after a call to :meth:`~nnabla.Variable.forward`, and not after a call to :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of the network, remember to call :meth:`~nnabla.Variable.forward`, once before doing so, otherwise the weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use floating values for `binary_weight`, since this function is for simulation purposes. Args: x(~nnabla.Variable): Input . weight(~nnabla.Variable): Weight. [parameter] binary_weight(~nnabla.Variable): Binarized weight. [parameter] alpha(~nnabla.Variable): Alpha. [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] quantize_zero_to(float): Input value at zero is quantized to this value. [default= `1.0` ] Returns: ~nnabla.Variable: Output. """ inputs = [x, weight, binary_weight, alpha] if bias is not None: inputs += [bias] return F.BinaryWeightAffine(ctx, base_axis, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_weight_convolution(ctx, x, weight, binary_weight, alpha, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, quantize_zero_to=1.0, n_outputs=-1, outputs=None): r""" This function provides a Binary Weight Network convolution layer. It computes in the forward pass .. math:: y_{n, a, b} = \frac{1}{\|\mathbf{w}_n\|_{\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}. i.e., the weights :math:`w_{n, m, i, j}` are binarized to :math:`sign(w_{n, m, i, j})` and, hence, each weight is in :math:`\{-1,\,1\}`. By this weight binarization, the inner product computations turn into additions/subtractions which are followed by multiplication with the scaling factor :math:`\alpha_n = \frac{1}{\|\mathbf{w}_n\|_{\ell_1}}`. Reference * `Rastegari, Mohammad, et al. XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks. <https://arxiv.org/abs/1603.05279>`_ .. note:: 1) If you would like to share the binary weights between other standard layers, please use the standard, floating value weights (`weight`) and not the binary weights (`binary_weight`). 2) The weights and the binary weights become in sync only after a call to :meth:`~nnabla.Variable.forward`, and not after a call to :meth:`~nnabla.Variable.backward`. If you wish to store the parameters of the network, remember to call :meth:`~nnabla.Variable.forward`, once before doing so, otherwise the weights and the binary weights will not be in sync. 3) CPU and GPU implementations now use floating values for `binary_weight`, since this function is for simulation purposes. Args: x(~nnabla.Variable): Input. weight(~nnabla.Variable): Weight. [parameter] binary_weight(~nnabla.Variable): Binarized weight. [parameter] alpha(~nnabla.Variable): Alpha. [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] quantize_zero_to(float): Input value at zero is quantized to this value. [default= `1.0` ] Returns: ~nnabla.Variable: Output """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight, binary_weight, alpha] if bias is not None: inputs += [bias] return F.BinaryWeightConvolution(ctx, base_axis, pad, stride, dilation, group, quantize_zero_to)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def inq_affine(ctx, x, weight, indicator_fixedweights, bias=None, base_axis=1, num_bits=4, inq_iterations=(), selection_algorithm='largest_abs', seed=-1, n_outputs=-1, outputs=None): r""" This function provides a INQ affine layer. It computes in the forward pass .. math:: y_j = \sum_{i} w_{j,i} x_i, where the weights :math:`w_{j,i}` are quantized sequentially during training to power-of-two numbers. In the backward pass, only the non-fixed (i.e., learnable) weights are updated. References: * `Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization: Towards lossless CNNs with low-precision weights. <https://arxiv.org/abs/1702.03044>`_ Args: x(~nnabla.Variable): Input . weight(~nnabla.Variable): Weight . [parameter] indicator_fixedweights(~nnabla.Variable): Indicates which weights are already fixed (0 = not fixed, 1 = fixed) . [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] num_bits(int): Number of bits per weight. Needs to be >= 2 as two bits are used to code `zero` and sign of weight. [default= `4` ] inq_iterations(repeated int64): List which specifies after how many forward passes we fix 50% of the learnable weights. If we have done as many iterations as specified in the last element of `inq_iterations`, then all weights are fixed. [default= `()` ] selection_algorithm(string): Chooses algorithm that we use for selecting the weights to fix ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly) [default= `'largest_abs'` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Output. """ inputs = [x, weight, indicator_fixedweights] if bias is not None: inputs += [bias] return F.INQAffine(ctx, base_axis, num_bits, inq_iterations, selection_algorithm, seed)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def inq_convolution(ctx, x, weight, indicator_fixedweights, bias=None, base_axis=1, pad=None, stride=None, dilation=None, group=1, num_bits=4, inq_iterations=(), selection_algorithm='largest_abs', seed=-1, n_outputs=-1, outputs=None): r""" This function provides a INQ convolution layer. It computes in the forward pass .. math:: y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} w_{n, m, i, j} x_{m, a + i, b + j}, where the weights :math:`w_{j,i}` are quantized sequentially during training to power-of-two numbers. In the backward pass, only the non-fixed (i.e., learnable) weights are updated. Reference * `Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization: Towards lossless CNNs with low-precision weights. <https://arxiv.org/abs/1702.03044>`_ Args: x(~nnabla.Variable): Input. weight(~nnabla.Variable): Weight. [parameter] indicator_fixedweights(~nnabla.Variable): Indicates which weights are already fixed (0 = not fixed, 1 = fixed) . [parameter] bias(~nnabla.Variable): Bias. [optional][parameter] base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] pad(:obj:`tuple` of :obj:`int`): Padding sizes for dimensions. [default= `(0,) * (len(x.shape) - (base_axis+1))` ] stride(:obj:`tuple` of :obj:`int`): Stride sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] dilation(:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions. [default= `(1,) * (len(x.shape) - (base_axis+1))` ] group(int): Number of groups of channels. This makes the connection across channels sparser, by grouping connections along the mapping direction. [default= `1` ] num_bits(int): Number of bits per weight. Needs to be >= 2 as two bits are used to code `zero` and sign of weight. [default= `4` ] inq_iterations(repeated int64): List which specifies after how many forward passes we fix 50% of the learnable weights. If we have done as many iterations as specified in the last element of `inq_iterations`, then all weights are fixed. [default= `()` ] selection_algorithm(string): Chooses algorithm that we use for selecting the weights to fix ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly) [default= `'largest_abs'` ] seed(int): Random seed. When -1, seed is sampled from global random number generator. [default= `-1` ] Returns: ~nnabla.Variable: Output """ if pad is None: pad = (0,) * (len(x.shape) - (base_axis+1)) if stride is None: stride = (1,) * (len(x.shape) - (base_axis+1)) if dilation is None: dilation = (1,) * (len(x.shape) - (base_axis+1)) inputs = [x, weight, indicator_fixedweights] if bias is not None: inputs += [bias] return F.INQConvolution(ctx, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)(*inputs, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def fixed_point_quantize(ctx, x, sign=True, n=8, delta=0.0625, ste_fine_grained=True, n_outputs=-1, outputs=None): r"""This function simulates to uniformly quantize values in fixed-point number representation. In the forward pass, .. math:: q_i= \left\{ \begin{array}{ll} max & if \ \ \ x_i > max \\ sign(x_i) \times floor(|x_i| \delta^{-1} + 2^{-1}) \times \delta & if \ \ min \le x_i \le max \\ min & if \ \ x_i < min \\ \end{array} \right., where :math:`\delta` is the step size, :math:`(min, max) :=(- (2^{n-1} - 1)\delta, (2^{n-1} - 1)\delta)` if :math:`sign` is true, :math:`(min, max) := (0, (2^n - 1) \delta)` otherwise, and :math:`n` is the total bit-width used. In the backward pass when using `ste_fine_grained` as false, .. math:: \frac{\partial q_i}{\partial x_i} = 1. In the backward pass when using `ste_fine_grained` as true, .. math:: \frac{\partial q_i}{\partial x_i}= \left\{ \begin{array}{ll} 0 & if \ \ \ x_i > max \\ 1 & if \ \ min \le x_i \le max \\ 0 & if \ \ x_i < min \\ \end{array} \right.. .. note:: Quantized values are stored as floating point number, since this function is for simulation purposes. Args: x(~nnabla.Variable): N-D array sign(bool): Indicate the signed number or the unsigned number. Default is true. [default= `True` ] n(int): Bit width used. Note that `sign` consumes one bit. :math:`n-1` is used for number representation in `signed` case. [default= `8` ] delta(float): Step size. [default= `0.0625` ] ste_fine_grained(bool): Straight Through Estimator is fine-grained or not. [default= `True` ] Returns: ~nnabla.Variable: N-D array. """ return F.FixedPointQuantize(ctx, sign, n, delta, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def min_max_quantize(ctx, x, qr_min, qr_max, ql_min, ql_max, decay=0.999, x_min_max=False, ema=False, ste_fine_grained=True, eps=0.01, n_outputs=-1, outputs=None): r"""This function simulates to uniformly quantize values in the range of min and max quantization levels. Min-max quantization is defined as the following equation .. math:: y = round \left(\frac{\min(\max(x, m), M) - m}{scale} \right) \times scale + m, where the :math:`scale` is defined as .. math:: scale = \frac{M - m}{M_q - m_q}, and .. math:: m_q = ql_{min}, \\ M_q = ql_{max}, \\ m = qr_{min}, \\ M = qr_{max}. In the backward pass when using `ste_fine_grained` as false, .. math:: \frac{\partial q_i}{\partial x_i} = 1. In the backward pass when using `ste_fine_grained` as true, .. math:: \frac{\partial q_i}{\partial x_i}= \left\{ \begin{array}{ll} 0 & if \ \ \ x_i > M \\ 1 & if \ \ m \le x_i \le M \\ 0 & if \ \ x_i < m \\ \end{array} \right.. :math:`qr_{min}` and :math:`qr_{max}` are treaded as follows. * `x_min_max` is `True` and `ema` is `True`: Exponential moving average are computed for each :math:`min(x)` and :math:`max(x)` then stored in :math:`qr_{min}` and :math:`qr_{max}`. * `x_min_max` is `True` and `ema` is `False`: :math:`min(x)` and :math:`max(x)` are computed then stored in :math:`qr_{min}` and :math:`qr_{max}`. * `x_min_max` is `False` and `ema` is `True`: Exponential moving average stored in :math:`qr_{min}` and :math:`qr_{max}` are used. * `x_min_max` is `False` and `ema` is `False` Gradients of :math:`qr_{min}` and :math:`qr_{max}` are computed in the backward pass. More precisely, in inference of the min-max quantization, one has to consider *zero-point (zp)* which corresponds to the real value 0, and its data type is an integer. *zero-point* is defined as .. math:: && zp_f = ql_{min} -\frac{qr_{min}}{scale}, \\ && zp = \left\{ \begin{array}{ll} ql_{max} & if \ \ \ zp_f >= ql_{max} \\ round(zp_f) & if \ \ otherwise \\ ql_{min} & if \ \ zp_f <= ql_{min} \\ \end{array} \right.. Accordingly, in order to simulate quantization effect of *zero-point*, during both forward and backward pass, :math:`qr_{min}` and :math:`qr_{max}` are adjusted as follows, .. math:: qr_{min}^{adj} = ql_{min} - zp * scale, \\ qr_{max}^{adj} = ql_{max} - zp * scale. These operations are often called *nudge*. Finally, in the formulas of the min-max quantization, :math:`m` and :math:`M` are replaced by :math:`qr_{min}^{adj}` and :math:`qr_{max}^{adj}` respectively. .. note:: Quantized values are stored as floating point number, since this function is for simulation purposes. Args: x(~nnabla.Variable): N-D array innput. qr_min(~nnabla.Variable): Minimum value for the quantization range, modified during forward execution when x_min_max is True. qr_max(~nnabla.Variable): Maximum value for the quantization range, modified during forward execution when x_min_max is True. ql_min(~nnabla.Variable): Minimum value for the quantization level, typically 0. ql_max(~nnabla.Variable): Maximum value for the quantization level, typically 255. decay(float): Decay rate for the exponential moving average. [default= `0.999` ] x_min_max(bool): Use the min and max of x to compute quantization ranges. [default= `False` ] ema(bool): Use the exponential moving average for the min and max quantization ranges. [default= `False` ] ste_fine_grained(bool): Straight Through Estimator is fine-grained or not. [default= `True` ] eps(float): Epsilon, or small value to ensure :math:`qr_{max} - qr_{min}` must be greater than the epsilon. [default= `0.01` ] Returns: ~nnabla.Variable: N-D array. """ return F.MinMaxQuantize(ctx, decay, x_min_max, ema, ste_fine_grained, eps)(x, qr_min, qr_max, ql_min, ql_max, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def pow2_quantize(ctx, x, sign=True, with_zero=True, n=8, m=1, ste_fine_grained=True, n_outputs=-1, outputs=None): r""" This function simulates to quantize values in the power of 2 number representation, in other words, it is linear (uniform) quantization in :math:`log_2` domain. In the forward pass of `signed` case, .. math:: q_i= \left\{ \begin{array}{ll} max_{+} & if \ \ \overline{q_i} > max_{+} \\ \overline{q_i} & if \ \ min_{+} \le \overline{q_i} \le max_{+} \\ min_{+} & if \ \ 0 \le \overline{q_i} < min_{+} \\ min_{-} & if \ \ min_{-} < \overline{q_i} < 0 \\ \overline{q_i} & if \ \ max_{-} \le \overline{q_i} \le min_{-}\\ max_{-} & if \ \ \overline{q_i} < max_{-} \\ \end{array} \right., where .. math:: && max_{+} = 2^{m}, min_{+} = 2^{m - (2^{n-1} - 1)},\\ && max_{-} = -2^{m}, min_{-} = -2^{m - (2^{n-1} - 1)},\\ && \overline{q_i} = sign(x_i) \times 2^{round(\log_2 |x_i|)}. This quantization uses the geometric mean between two power-of-two numbers as quantization threshold. In the forward pass of `unsigned` case, .. math:: q_i= \left\{ \begin{array}{ll} max & if \ \ \overline{q_i} > max \\ \overline{q_i} & if \ \ min \le \overline{q_i} \le max \\ min & if \ \ 0 < \overline{q_i} < min \\ \end{array} \right., where .. math:: && max = 2^{m}, min = 2^{m - (2^{n} - 1)},\\ && \overline{q_i} = 2^{int(\log_2 |x_i|)}. When using `with_zero` as true, a pruning threshold is used to round an input to 0 or :math:`min`. The pruning threshold is defined in this function as the following, .. math:: pruning\ threshold = min \times 2^{-\frac{1}{2}}. If an absolute value of the input is lesser than this value, the input is rounded to 0, otherwise :math:`min`. In the backward pass when using ste_fine_grained as false, .. math:: \frac{\partial q_i}{\partial x_i} = 1. In the backward pass when using ste_fine_grained as true, .. math:: \frac{\partial q_i}{\partial x_i}= \left\{ \begin{array}{ll} 0 & if \ \ \overline{q_i} > max_{+} \\ 1 & if \ \ otherwise \\ 0 & if \ \ \overline{q_i} < max_{-} \\ \end{array} \right.. There are some literatures using pow2 quantization in their proposed methods. References: * `Miyashita Daisuke, Lee H. Edward, Murmann Boris. Convolutional Neural Networks using Logarithmic Data Representation. <https://arxiv.org/abs/1603.01025>`_ * `Aojun Zhou, Anbang Yao, Yiwen Guo, Lin Xu, Yurong Chen. Incremental Network Quantization: Towards Lossless CNNs with Low-precision Weights. <https://arxiv.org/abs/1702.03044>`_ .. note:: Quantized values are stored as floating point number, since this function is for simulation purposes. Args: x(~nnabla.Variable): N-D array sign(bool): Indicate the signed number or the unsigned number. Default is true. [default= `True` ] with_zero(bool): Indicate using zero as a quantized value. Default is true. Note that `zero` consumes one bit. [default= `True` ] n(int): Bit width used, Note that `sign` consumes one bit. :math:`n-1` is used for number representation in `signed` case. Default is 8. [default= `8` ] m(int): :math:`2^m` is the upper bound of the dynamic range and :math:`-2^m` is the lower bound, :math:`m \in \mathcal{Z}`. Default is 1. [default= `1` ] ste_fine_grained(bool): Straight Through Estimator is fine-grained or not. [default= `True` ] Returns: ~nnabla.Variable: N-D array. """ return F.Pow2Quantize(ctx, sign, with_zero, n, m, ste_fine_grained)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def prune(ctx, x, rate=0.9, n_outputs=-1, outputs=None): r""" Prune the input as the following equation, .. math:: q_i = \left \{ \begin{array}{ll} 0 & abs(x_i) < threshold \\ x_i & otherwise \end{array} \right. where :math:`threshold` is determined by `threshold = np.sort(np.abs(x))[int((x.size - 1) * rate)]`. Args: x(~nnabla.Variable): N-D array rate(float): Sparse rate, or pruning rate. [default= `0.9` ] Returns: ~nnabla.Variable: N-D array with the same shape as x """ return F.Prune(ctx, rate)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def quantize_linear(ctx, x, scale, zero_point, round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, dtype=1, n_outputs=-1, outputs=None): r"""Quantize linearly inputs with the scale and zero point. .. math:: y = saturate(round(x / scale) + zero_point). :math:`saturate` rage is determined by `dtype` and :math:`round` mode is selected by `round_mode`. :math:`zero_point` is constrained by the `dtype` range and its values are rounded by `round_mode`. This function aligns with ONNX. Args: x(~nnabla.Variable): Input N-D array. scale(~nnabla.Variable): Scale N-D array. The values must be positive number. zero_point(~nnabla.Variable): Zero point N-D array. round_mode(string): Rounding mode. HALF_AWAY_FROM_ZERO or HALF_TO_EVEN. [default= `'HALF_AWAY_FROM_ZERO'` ] narrow_range(bool): If true, this function does not use the minimum quantized value. For example, if `dtype` is int8 (the range is in [-128, 127]), the output range is corrected in [-127, 127]. [default= `False` ] dtype(int): Data type for the output. The int value is compatible to the enumtype for the data type defined in `the numpy <https://github.com/numpy/numpy/blob/master/numpy/core/include/numpy/ndarraytypes.h>`_. [default= `1` ] Returns: ~nnabla.Variable: Input N-D array. """ return F.QuantizeLinear(ctx, round_mode, narrow_range, dtype)(x, scale, zero_point, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def dequantize_linear(ctx, x, scale, zero_point, n_outputs=-1, outputs=None): r"""Dequantize linearly inputs with the scale and zero point. .. math:: y = (x - zero_point) * scale. :math:`zero_point` is constrained by the `dtype` range. This function aligns with ONNX. Args: x(~nnabla.Variable): Input N-D array. scale(~nnabla.Variable): Scale N-D array. The values must be positive number. This should be same as one used in QuantizeLinear. zero_point(~nnabla.Variable): Zero point N-D array. This should be same as one used in QuantizeLinear. Returns: ~nnabla.Variable: Input N-D array. """ return F.DequantizeLinear(ctx)(x, scale, zero_point, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def top_n_error(ctx, x, target, axis=None, n=1, n_outputs=-1, outputs=None): r""" Top N error along the dimension specified by the axis, the element of outputs is .. math:: y_i = \left \{ \begin{array}{l} 1 \ (x_i \ is \ not \ within \ N-th \ place) \\ 0 \ (x_i \ is \ within \ N-th \ place) \end{array} \right. Args: x(~nnabla.Variable): Probabilities N-D array. :math:`D_1 \times ... \times D_i \times ... \times D_N` target(~nnabla.Variable): N-D array of labels. :math:`D_1 \times ... \times 1 \times ... \times D_N` axis(int): Axis on which the top N error is calculated. [default= `len(x.shape) - 1` ] n(int): top N [default= `1` ] Returns: ~nnabla.Variable: Element-wise error N-D array. (:math:`D_1 \times ... \times 1 \times ... \times D_N`) """ if axis is None: axis = len(x.shape) - 1 return F.TopNError(ctx, axis, n)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def binary_error(ctx, x, target, n_outputs=-1, outputs=None): r""" Elementwise binary error. .. math:: y_i = \left \{ \begin{array}{l} 0 ((x^{(0)} \geq 0.5) = (x^{(1)} \geq 0.5)) \\ 1 ((x^{(0)} \geq 0.5) \neq (x^{(1)} \geq 0.5)) \end{array} \right. Args: x(~nnabla.Variable): Probabilities N-D array. :math:`-\infty` to :math:`\infty`. target(~nnabla.Variable): Labels N-D array. Usually set as 0 or 1, but, it allows probability (0 to 1) as inputs. Returns: ~nnabla.Variable: Element-wise errors N-D array. """ return F.BinaryError(ctx)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def confusion_matrix(ctx, x, target, axis=None, n_outputs=-1, outputs=None): r""" Confusion matrix. The return value is already summed over samples. Args: x(~nnabla.Variable): Probabilities N-D array. (:math:`D_1 \times ... \times D_i \times ... \times D_N`) target(~nnabla.Variable): Labels N-D array. (:math:`D_1 \times ... \times 1 \times ... \times D_N`) axis(int): Axis on which the confusion matrix is calculated. [default= `len(x.shape) - 1` ] Returns: ~nnabla.Variable: Confusion matrix 2-D array. Col index is estimated class. Row index is label class. """ if axis is None: axis = len(x.shape) - 1 return F.ConfusionMatrix(ctx, axis)(x, target, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def vat_noise(ctx, x, w, base_axis=1, eps=1.0, n_outputs=-1, outputs=None): r""" Noise for virtual adversarial training. This layer is a special layer for GUI network designing, specialized for getting the noise of virtual adversarial training. In the backward process, the weight parameter will be replaced with the gradient. Forward .. math:: y_i = \frac{\epsilon x_i}{\sqrt{\sum_k x_k^2 + c}} Backward .. math:: \delta x_i = 0 .. math:: w_i = \epsilon \delta y_i Note: This layer is a special layer for GUI network designing. References: * `Miyato et.al, Distributional Smoothing with Virtual Adversarial Training. <https://arxiv.org/abs/1507.00677>`_ Args: x(~nnabla.Variable): N-D array of noise input. Noise is standard Gaussian noise initially, but the next step, fed back gradient variable. w(~nnabla.Variable): N-D array for keep gradient values. base_axis(int): Dimensions up to base_axis is treated as sample dimension. [default= `1` ] eps(float): Noise norm (l2) factor. [default= `1.0` ] Returns: ~nnabla.Variable: N-D array """ return F.VATNoise(ctx, base_axis, eps)(x, w, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def sink(ctx, *x, **kw): r""" Creates a dummy variable used to call forward or backward function of multiple variables at one place. This takes any numbers of input variables with any shape, and creates a single 0-shape outputs. The forward pass does nothing. The backward pass set ones to the input grads if one_input_grad is set as true. Note: ``sink`` can only be called at the very end of the graph, and ``grad`` of input variables are cleared when ``y.backward(clear_buffer=True)`` is called. Args: *x(~nnabla.Variable): Any number of inputs with any shape. [variadic] one_input_grad(bool): Set grads of inputs as one during backward. It is useful to set false if you want to set external gradients to the input variables. [default= `True` ] Returns: ~nnabla.Variable: Dummy variable. """ assert len(x) >= 1, "sink must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) one_input_grad = kw.pop('one_input_grad', True) return F.Sink(ctx, one_input_grad)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
[docs]@function_api def nms_detection2d(ctx, x, thresh=None, nms=None, nms_per_class=None, n_outputs=-1, outputs=None): r""" Non-Maximum Suppression (NMS) to 2D Object detector output. The input is a 3-dimensional tensor with shape of ``(B, N, 5 + C)`` where ``B`` denotes batch size, ``N`` denotes the number of detection box candidates, and ``C`` denotes the number of classes of object detection. ``5 + C`` consists of the box coordinates ``x, y, w, h`` in normalized coordinates (size of each x and y are 1.0), objectness (learned to predict IoU value to ground truth box), and the class probabilities of ``C`` classes. It outputs a tensor with the same dimensions as the input, where all values are copied from the input to the output, except the class probabilities are multiplied by objectness, and possibly suppressed to 0 by NMS. During NMS, all of combination of pairs of bounding boxes is compared. For each pair, the bounding box with a lower detection score (described below) is suppressed if the overlap ratio (the IoU) is greater than the value of ``nms``. There are two suppression modes for NMS. 1. Suppress by class probability (``nms_per_class`` is ``True``): For each bounding box, the detection score is calculated by ``objectness * probability[class_id]`` for each class. The suppression is done for each class independently. 2. Suppress by objectness (``nms_per_class`` is ``False``): The suppression is done for each bounding box using ``objectness`` as a detection score. All class probabilities becomes 0 for every suppressed boxes. References: * `Joseph Redmon, Ali Farhadi, YOLO9000: Better, Faster, Stronger. <https://arxiv.org/abs/1612.08242>`_ Args: x(~nnabla.Variable): A 3-dimensional array. thresh(float): Detection score threshold. [default= `0.5` ] nms(float): IoU threshold for Non-maximum suppression (NMS). [default= `0.45` ] nms_per_class(bool): If true, NMS is applied for each class. [default= `True` ] Returns: ~nnabla.Variable: A 3-dim array with the same dimensions with the input. """ if thresh is None: thresh = 0.5 if nms is None: nms = 0.45 if nms_per_class is None: nms_per_class = True return F.NmsDetection2d(ctx, thresh, nms, nms_per_class)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)
@function_api def onnx_non_max_suppression(ctx, boxes, scores, center_point_box=None, max_output_boxes_per_class=None, iou_threshold=None, score_threshold=None, n_outputs=-1, outputs=None): r""" Non-Maximum Suppression (NMS) to 2D Object detector output. This function provides a ONNX-compatible interface of Non-Maximum Suppression. The first input is a 3-dimensional bounding box tensor with shape of ``(B, N, 4)`` where ``B`` denotes batch size and ``N`` denotes the number of detection box candidates. ``4`` consists of the box coordinates ``y1, x1, y2, x2`` in normalized coordinates (size of each x and y are 1.0). The second input is a 3-dimensional score tensor with shape of ``(B, C, N)`` where ``C`` denotes the number of classes of object detection. It outputs the indices of the selected boxes as a tensor with shape of ``(M, 3)`` where ``M`` denotes the number of the selected boxes. ``3`` consists of 3-dimensional indices ``batch_index, class_index, box_index``. References: * `Joseph Redmon, Ali Farhadi, YOLO9000: Better, Faster, Stronger. <https://arxiv.org/abs/1612.08242>`_ * `ONNX Operators documentation. <https://github.com/onnx/onnx/blob/main/docs/Operators.md>` Args: boxes(~nnabla.Variable): A 3-dimensional array. scores(~nnabla.Variable): A 3-dimensional array. center_point_box(int): Bounding box format (0 or 1). [default= `0` ] max_output_boxes_per_class(int): The maximum number of boxes selected per batch per class. [default= `0` ] iou_threshold(float): IoU threshold for Non-maximum suppression (NMS). [default= `0.0` ] score_threshold(float): Detection score threshold. [default= `0.0` ] Returns: ~nnabla.Variable: A 2-dimensional array. """ if center_point_box is None: center_point_box = 0 if max_output_boxes_per_class is None: max_output_boxes_per_class = 0 if iou_threshold is None: iou_threshold = 0.0 if score_threshold is None: score_threshold = 0.0 return F.ONNXNonMaxSuppression(ctx, center_point_box, max_output_boxes_per_class, iou_threshold, score_threshold)(boxes, scores, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def max_pooling_backward(ctx, dy, x, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, n_outputs=-1, outputs=None): r""" Max pooling backward. This aims to support the n-th order gradients of the max pooling. The document of this function must not be shown, and the function must not be called in the end-user side. Args: dy(~nnabla.Variable): Input variable. x(~nnabla.Variable): Input variable. kernel(:obj:`tuple` of :obj:`int`): Kernel sizes for each spatial axis. stride(:obj:`tuple` of :obj:`int`): Subsampling factors for each spatial axis. [default= `kernel` ] ignore_border(bool): If false, kernels covering borders are also considered for the output. [default= `True` ] pad(:obj:`tuple` of :obj:`int`): Border padding values for each spatial axis. Padding will be added both sides of the dimension. [default= `(0,) * len(kernel)` ] channel_last(bool): If True, the last dimension is considered as channel dimension, a.k.a. NHWC order. [default= `False` ] Returns: ~nnabla.Variable: Output """ if stride is None: stride = kernel if pad is None: pad = (0,) * len(kernel) return F.MaxPoolingBackward(ctx, kernel, stride, ignore_border, pad, channel_last)(dy, x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def patch_correlation(ctx, x1, x2, patch=(1, 1), shift=(0, 0), patch_step=(1, 1), shift_step=(1, 1), padding=(0, 0, 0, 0), n_outputs=-1, outputs=None): r""" Multiplicative patch-wise comparision between inputs `x1` and `x2`, which must both be 4-dimensional NCHW (with `channel_last=False`) or NHWC (with `channel_last=True`) arrays (where *N* is the number of samples, *H* and *W* are the sample height and width and *C* is the number of channels). The function returns a 5-D array with shape :math:`(N, C_y, C_x, H_o, W_o)` where :math:`H_o, W_o` are determined by the possible patch locations within the, optionally padded, input image size and :math:`C_y, C_x` are determined by the optionally shifted patch positions. Mathmatically, the patch correlation is formulated as .. math:: O(s_y, s_x, h_0, w_0) = \sum_{c} \sum_{k_h} \sum_{k_w} I_1(c, h + k_h, w + k_w) \times I_2(c, h + k_h + s_h, w + k_w + s_w), where :math:`I_1(c, h, w)` and :math:`I_2(c, h, w)` are the inputs at :math:`c`-th channel, :math:`h`-th height, and :math:`w`-th width, :math:`k_h, k_w` indices for the patch size and :math:`s_h, s_w` indices for the shifts. A single correlation value (per sample) is produced if the patch extends to the image dimensions and all other parameters use the default values. >>> import numpy as np, nnabla as nn, nnabla.functions as F >>> nn.set_auto_forward(True) >>> N, C, H, W = (1, 2, 3, 4) >>> x = nn.Variable.from_numpy_array(np.ones([N, C, H, W])) >>> F.patch_correlation(x, x, patch=(H, W)).d array([[[[[24.]]]]], dtype=float32) A patch that is smaller than the image size moves horizontal and vertical producing a value per position. The `patch_step` argument may be used to control the position increments. >>> F.patch_correlation(x, x, patch=(H-1, W-1)).d array([[[[[12., 12.], [12., 12.]]]]], dtype=float32) >>> F.patch_correlation(x, x, patch=(H-1, W-1), patch_step=(2, 1)).d array([[[[[12., 12.]]]]], dtype=float32) Multiple correlations may be performed at each position between the patch from `x1` and patches from `x2` at relative offsets striding the maximum vertical and horizontal distance given by the `shift` values at increments of `shift_step`. The shifted correlation values can be obtained for the from the second and third output dimension for the vertical and horizontal shifts. >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1)).shape (1, 1, 3, 1, 4) >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1)).d array([[[[[0., 6., 6., 6.]], [[6., 6., 6., 6.]], [[6., 6., 6., 0.]]]]], dtype=float32) >>> F.patch_correlation(x, x, (H, 1), shift=(0, 1), shift_step=(1, 2)).d array([[[[[0., 6., 6., 6.]], [[6., 6., 6., 0.]]]]], dtype=float32) Padding with zero values may be applied individually to the top, bottom, left and right side of the input image. >>> F.patch_correlation(x, x, patch=(H, W), padding=(0, 1, W, W)).d array([[[[[ 0., 6., 12., 18., 24., 18., 12., 6., 0.], [ 0., 4., 8., 12., 16., 12., 8., 4., 0.]]]]], dtype=float32) This function may be used to implement the FlowNetC correlation layer. >>> N, C, H, W = (1, 256, 44, 60) >>> x1, x2 = nn.Variable((N, C, H, W)), nn.Variable((N, C, H, W)) >>> F.patch_correlation(x1, x2, shift=20, shift_step=2).shape (1, 21, 21, 44, 60) References: * `Fischer et al., FlowNet: Learning Optical Flow with Convolutional Networks. <https://arxiv.org/abs/1504.06852>`_ Args: x1(~nnabla.Variable): Input N-D array with shape :math:`(N, H, W, C)`. x2(~nnabla.Variable): Input N-D array with shape :math:`(N, H, W, C)`. patch(:obj:`tuple` of :obj:`int`): A tuple with height and width of the correlation patch. A single integer expands to identical height and width. [default= `(1, 1)` ] shift(:obj:`tuple` of :obj:`int`): A tuple of maximum vertical and horizontal displacement of patches from `x2` that are correlated with a single patch from `x1`. A single integer expands to identical vertical and horizontal displacement. [default= `(0, 0)` ] patch_step(:obj:`tuple` of :obj:`int`): A tuple of vertical and horizontal increments for advancing the position of the correlation patch within the input image shape. A single integer expands to identical vertical and horizontal increments. [default= `(1, 1)` ] shift_step(:obj:`tuple` of :obj:`int`): A tuple of vertical and horizontal increments for advancing the relative offset position within the shift range. A single integer expands to identical vertical and horizontal increments. [default= `(1, 1)` ] padding(:obj:`tuple` of :obj:`int`): A tuple of top, bottom, left and right padding extent. A tuple of two values yields identical top/bottom and left/right padding from the first and second tuple value. A single integer expands to identical padding extent for all sides. [default= `(0, 0, 0, 0)` ] Returns: ~nnabla.Variable: N-D array with shape :math:`(N, C_y, C_x, H_o, W_o)`. A spatial size of the output is calculated as .. math:: H_o = \frac{H + (top\_pad + bottom\_pad) - patch_v }{patch\_step_v} + 1. A channel size of the output is calculated as .. math:: C_y = \frac{2 \times shift_v}{shift\_step_v} + 1. :math:`W_o` and :math:`C_x` are the same calculation with differenct components. """ return F.PatchCorrelation(ctx, patch, shift, patch_step, shift_step, padding)(x1, x2, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def unique(ctx, x, flatten=True, axis=None, sorted=True, with_index=False, with_inverse=False, with_counts=False, n_outputs=-1, outputs=None): r""" Find the unique elements of input array. Args: x(~nnabla.Variable): A N-D array. flatten(bool): If True, unique values of the flatten input array are returned. [default= `True` ] axis(int): If flatten is True and axis is specified, unique slices along axis are returned. [default= `None` ] sorted(bool): If True, unique values/slices sorted in ascending order are returned. [default= `True` ] with_index(bool): If True, `indices` is returned. [default= `False` ] with_inverse(bool): If True, `inverse_indices` is returned. [default= `False` ] with_counts(bool): If True, `counts` is returned. [default= `False` ] Returns: ~nnabla.Variable: A N-D array. ~nnabla.Variable: A 1-D array. It's indices of `y` elements first occurance in `x`. If `flatten` is True, it contains indices to flattend input array `x`. If `flatten` is False and `axis` is specified, it contains indices to input array `x` on `axis`. ~nnabla.Variable: A 1-D array. It's indices of `x` elements corresponding to `y`. If `flatten` is True, it contains indices to output array `y`. If `flatten` is False and `axis` is specified, it contains indices to output array `y` on `axis`. ~nnabla.Variable: A 1-D array. It's the count of each element of 'y' in input array `x`. """ return F.Unique(ctx, flatten, axis, sorted, with_index, with_inverse, with_counts)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def eye_like(ctx, x, k=0, n_outputs=-1, outputs=None): r""" Generate a 2-D array with ones on the diagonal, specified by `k`, and zeros elsewhere. The shape of the output array is the same as the input array. Args: x(~nnabla.Variable): A 2-D array. k(int): Index of the diagonal. The default value 0 means the main diagonal, a positive value means an upper diagonal, and a negative value means a lower diagonal. [default= `0` ] Returns: ~nnabla.Variable: A 2-D array. """ return F.EyeLike(ctx, k)(x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def mod2(ctx, x0, x1, fmod=False, n_outputs=-1, outputs=None): r""" Element-wise remainder function. The behavior of this opeator is determined by x0's dtype and the `fmod` argument: .. math:: y_i = \left\{ \begin{array}{ll} \text{numpy.fmod}(x_{0,i}, x_{1,i}) & (x_{0} \text{has a floating-point type or fmod is True})\\ \text{numpy.mod}(x_{0,i}, x_{1,i}) & (\text{otherwise}) \end{array} \right.. Args: x0(~nnabla.Variable): A N-D array. x1(~nnabla.Variable): A N-D array. fmod(bool): If True, this operator behaves like numpy.fmod, otherwise it behaves like numpy.mod. [default= `False` ] Returns: ~nnabla.Variable: A N-D array. """ return F.Mod2(ctx, fmod)(x0, x1, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def bit_shift(ctx, x, shift, direction='LEFT', n_outputs=-1, outputs=None): r""" Element-wise bit shift function. Args: x(~nnabla.Variable): A N-D array. Its dtype must be one of the unsigned integer types. shift(~nnabla.Variable): A N-D array. Its dtype is casted to x's dtype at run-time. direction(string): Direction of bit shift. [default= `'LEFT'` ] Returns: ~nnabla.Variable: A N-D array. """ return F.BitShift(ctx, direction)(x, shift, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs) @function_api def einsum(ctx, *x, **kw): r"""Evaluates the Einstein summation convention on the inputs. See the numpy.einsum documentation for more information about equation. Args: *x(~nnabla.Variable): List of N-D array. [variadic] equation(string): A string that folllows Einstein summation convention. [default= `` ] Returns: ~nnabla.Variable: A N-D array. """ assert len(x) >= 1, "einsum must take more than 1 inputs" n_outputs = kw.pop('n_outputs', -1) outputs = kw.pop('outputs', None) equation = kw.pop('equation', ) return F.Einsum(ctx, equation)(*x, n_outputs=n_outputs, auto_forward=get_auto_forward(), outputs=outputs)