Source code for nnabla.experimental.graph_converters.quantize

# Copyright (c) 2020 Sony Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import nnabla as nn
import nnabla.functions as F
import numpy as np
from collections import defaultdict

from .graph_converter import FunctionModifier
from .batch_normalization_folding import BatchNormalizationFoldingModifier
from .batch_normalization_self_folding import BatchNormalizationSelfFoldingModifier
from .remove_function import RemoveFunctionModifier


[docs]class QuantizeNonQNNToRecordingModifier(FunctionModifier):
    def __init__(self, functions_ranks, config=None, training=True):
        super(QuantizeNonQNNToRecordingModifier, self).__init__()
        self._config = config
        self._fct_bin_set = {
            'Add2': F.add2,
            'Sub2': F.sub2,
            'Mul2': F.mul2,
            'Div2': F.div2,
            'Pow2': F.pow2
        }
        self._training = training

        # Dict to record the rank of each function
        self.functions_ranks = functions_ranks

    def get_function_rank(self, f):
        rank = self.functions_ranks.get(f, -1)
        return rank

    def check(self, f):
        def backward_traverse(f, l):
            # list functions between input and f
            for inp in f.inputs:
                if inp.parent is not None:
                    l.append(inp.parent)
                    backward_traverse(inp.parent, l)

        def forward_traverse(f, l):
            # list functions between f and output
            for fref in f.outputs[0].function_references:
                l.append(fref)
                forward_traverse(fref, l)

        def is_skip_layer(f):
            skip_inputs_layers = self._config.skip_inputs_layers
            skip_outputs_layers = self._config.skip_outputs_layers
            if not skip_inputs_layers and not skip_outputs_layers:
                return False

            fs = []
            if skip_outputs_layers:
                forward_traverse(f, fs)
            fs = list(set([func.info.type_name for func in fs]))
            is_output_layer = True if skip_outputs_layers else False
            for skl in skip_outputs_layers:
                if skl in fs:
                    is_output_layer = False
                    break

            fs = []
            if skip_inputs_layers:
                backward_traverse(f, fs)
            is_input_layer = True if skip_inputs_layers else False
            fs = list(set([func.info.type_name for func in fs]))
            for skl in skip_inputs_layers:
                if skl in fs:
                    is_input_layer = False
                    break

            for skl in skip_inputs_layers:
                if f.info.type_name == skl and is_input_layer:
                    return True
            for skl in skip_outputs_layers:
                if f.info.type_name == skl and is_output_layer:
                    return True
            return False

        fn = f.info.type_name
        cfg = self._config

        # Do not record sink and bn
        if fn == 'Sink' or fn == 'BatchNormalization':
            return False

        # Only add recorder to specific layers
        # If record_layers empty, add to all layers
        record_layers = cfg.record_layers
        if record_layers and (fn not in record_layers):
            return False

        if is_skip_layer(f):
            return False
        return True

    def share_recorder(self, f, inputs, new_inputs, cfg):
        # share quantization parameters for Add2 and Concat
        fn = f.info.type_name
        recorder_activation = cfg.recorder_activation
        recorder_weight = cfg.recorder_weight
        axes = [3] if cfg.channel_last else [1]

        if fn in ['Add2', 'Concatenate']:
            idx = 0
            min_rank = inputs[0].rank
            for i, input_var in enumerate(new_inputs[1:]):
                if input_var.rank < min_rank:
                    idx = i + 1
                    min_rank = input_var.rank
            shared_name = 'x0'
            scope = self.get_parameter_scope(new_inputs[idx].parent.inputs[1])
            for i, input_var in enumerate(new_inputs):
                if i == idx:
                    continue
                input_var = input_var.parent.inputs[0]
                with nn.parameter_scope(scope):
                    input_var = recorder_activation()(
                        input_var, axes=axes, training=self._training, name=shared_name)
                new_inputs[i] = input_var
        return new_inputs

    def add_recorder(self, f, inputs, cfg):
        fn = f.info.type_name
        function_rank = self.get_function_rank(f)
        scope = '{}-{}'.format(fn, function_rank)
        axes = [3] if cfg.channel_last else [1]
        recorder_activation = cfg.recorder_activation
        recorder_weight = cfg.recorder_weight

        params_idx = 1
        if fn in ['Concatenate', 'Stack']:
            params_idx = len(inputs)
        if fn in self._fct_bin_set:
            params_idx = 2

        new_inputs = []
        # Add recorder for variable(activation)
        for i, input_var in enumerate(inputs[:params_idx]):
            fref = input_var.function_references
            if fref and fref[0].info.type_name == cfg.recorder_activation().name():
                input_var = fref[0].outputs[0]
            else:
                with nn.parameter_scope(scope):
                    parent = input_var.parent
                    if parent and parent.info.type_name == recorder_activation().name():
                        input_var = input_var
                    else:
                        input_var = recorder_activation()(input_var, axes=axes, training=self._training,
                                                          name='x{}'.format(i))
            new_inputs.append(input_var)

        # Add recorder for parameters(weight and bias)
        for i, input_parameter in enumerate(inputs[params_idx:]):
            with nn.parameter_scope(scope):
                input_parameter = recorder_weight()(input_parameter, axes=axes, training=self._training,
                                                    name='w{}'.format(i))
            new_inputs.append(input_parameter)
        return new_inputs

    def modify(self, f, inputs):
        if not self.check(f):
            return  # Skip modify this function

        fn = f.info.type_name
        cfg = self._config
        axes = [3] if cfg.channel_last else [1]
        recorder_activation = cfg.recorder_activation

        # Add recorder for each input
        new_inputs = self.add_recorder(f, inputs, cfg)
        new_inputs = self.share_recorder(f, inputs, new_inputs, cfg)

        h = self._modify_as_same(f, new_inputs)

        # Add recorder before/after a function
        next_func = f.outputs[0].function_references[0]
        next_func_rank = self.get_function_rank(next_func)
        scope = '{}-{}'.format(next_func.info.type_name, next_func_rank)
        with nn.parameter_scope(scope):
            if cfg.recorder_position == cfg.RecorderPosition.BOTH:
                h = recorder_activation()(h, axes=axes, training=self._training, name='x0')
        return h


[docs]class QuantizeRecordingToTrainingModifier(FunctionModifier):
    class SimulatedQNN(object):
        """
        Doc here.
        """

        def __init__(self, functions_ranks, modifier=None, config=None):
            self._config = config
            self._modifier = modifier
            # input: [tuple(scale, zeropoint)]
            self._map_input_scale_zeropoint = defaultdict(list)
            # Dict to record the rank of each function
            self.functions_ranks = functions_ranks

        def get_function_rank(self, f):
            rank = self.functions_ranks.get(f, -1)
            return rank

        def _quantize_outputs(self, f, output, axes, cfg):
            # TODO: multi-outputs
            rm = cfg.round_mode
            nr = cfg.narrow_range
            dt = cfg.dtype
            pow2 = cfg.pow2.value

            h = output
            for next_func in f.outputs[0].function_references:
                next_fn = next_func.info.type_name
                next_func_rank = self.get_function_rank(next_func)

                if next_fn == 'Sink':
                    return h

                name = 'x0'
                if next_fn in self._modifier._fct_bin_set:
                    for i, elm in enumerate(next_func.inputs):
                        if f.outputs[0] == elm:
                            name = 'x{}'.format(i)
                scope = '{}-{}'.format(next_fn, next_func_rank)
                with nn.parameter_scope(scope):
                    sy, zpy = cfg.recorder_activation.get_scale_zeropoint(
                        h, axes=axes, narrow_range=nr, round_method=pow2, name=name)
                if sy is not None:
                    break

            # D->Q
            h = self.try_to_quantize(h, sy, zpy, rm, nr, dt)

            self._map_input_scale_zeropoint[h] = (sy, zpy)

            return h

        def try_to_quantize(self, x, scale, zero_point, round_mode, narrow_range, dtype):
            x = F.quantize_linear(x, scale, zero_point, round_mode,
                                  narrow_range, dtype) if scale is not None else x
            return x

        def try_to_dequantize(self, x, scale, zero_point):
            x = F.dequantize_linear(
                x, scale, zero_point) if scale is not None else x
            return x

        def get_quantization_params(self, variable):
            scale, zero_point = None, None
            if variable.parent is not None and variable.parent.info.type_name == 'QuantizeLinear':
                scale = variable.parent.inputs[1].d
                zero_point = variable.parent.inputs[2].d
            return scale, zero_point

        def requantize_bias(self, f, inputs, scope, rm, nr, dt, skip_bias=False):
            def with_bias():
                return True if len(inputs) == 3 else False
            functions_with_bias = ['Affine', 'Convolution', 'Deconvolution', 'DepthwiseConvolution',
                                   'DepthwiseDeconvolution']
            fn = f.info.type_name

            # handle bias. recalculate scale and zero point of bias
            if fn in functions_with_bias and with_bias():
                x, w, b = inputs
                sx, zpx, sw = None, None, None
                # Get scale and zero_point of x
                sx, zpx = self.get_quantization_params(x)
                # Get scale and zero_point of w
                sw, _ = self.get_quantization_params(w)
                if sx is not None and sw is not None:
                    sbd = np.reshape(sx.copy(), (1,)) * \
                          np.reshape(sw.copy(), (1,))
                    with nn.parameter_scope(scope):
                        sb = nn.parameter.get_parameter_or_create(
                            'scale-b', (1,), sbd, False)
                        zpbd = np.reshape(zpx.copy(), (1,))
                        zpb = nn.parameter.get_parameter_or_create(
                            'zeropoint-b', (1,), zpbd, False)
                        if b.parent is not None and b.parent.info.type_name == 'QuantizeLinear':
                            b = b.parent.inputs[0]
                            # Quantize/Dequantize Bias only when skip bias is False
                            b = F.quantize_linear(
                                b, sb, zpb, rm, nr, dt) if not skip_bias else b
                            inputs[2] = b
            return inputs

        def quantize_inputs(self, f, inputs, scope, cfg, axes, rm, nr, dt, pow2):
            fn = f.info.type_name
            params_idx = 1
            if fn in ['Concatenate', 'Stack']:
                params_idx = len(inputs)
            if fn in self._modifier._fct_bin_set:
                params_idx = 2
            # Quantize input
            inps = []
            for i, input_var in enumerate(inputs[:params_idx]):
                if input_var.rank == 0:
                    with nn.parameter_scope(scope):
                        sx, zpx = cfg.recorder_activation.get_scale_zeropoint(
                            input_var, axes=axes, narrow_range=nr, round_method=pow2, name='x{}'.format(i))
                        input_var = self.try_to_quantize(
                            input_var, sx, zpx, rm, nr, dt)
                        self._map_input_scale_zeropoint[input_var] = (sx, zpx)
                inps.append(input_var)

            for i, input_parameter in enumerate(inputs[params_idx:]):
                with nn.parameter_scope(scope):
                    sw, zpw = cfg.recorder_weight.get_scale_zeropoint(
                        input_parameter, axes=axes, narrow_range=nr, round_method=pow2, name='w{}'.format(i))
                    input_parameter = self.try_to_quantize(
                        input_parameter, sw, zpw, rm, nr, dt)
                inps.append(input_parameter)

            # handle bias. recalculate scale and zero point of bias
            inps = self.requantize_bias(
                f, inps, scope, rm, nr, dt, cfg.skip_bias)
            return inps

        def dequantize_inputs(self, inps):
            # Dequantize input
            for i, var in enumerate(inps):
                s, zp = None, None
                frefs = var.function_references
                for rf in frefs:
                    if rf.info.type_name == 'DequantizeLinear':
                        var = rf.outputs[0]
                        break
                if var.parent and var.parent.info.type_name == 'QuantizeLinear':
                    s, zp = var.parent.inputs[1:3]
                var = self.try_to_dequantize(var, s, zp)
                inps[i] = var
            return inps

        def shared_quantization(self, f, inps, cfg):
            # share quantization parameter for Add2 and Concat
            fn = f.info.type_name
            if fn in ['Add2', 'Concatenate']:
                idx = 0
                min_rank = inps[0].rank
                s, zp = None, None
                for i, x in enumerate(inps[1:]):
                    if x.parent is not None and x.parent.info.type_name == 'DequantizeLinear':
                        if x.rank < min_rank:
                            idx = i + 1
                            min_rank = x.rank
                if inps[idx].parent is not None and inps[idx].parent.info.type_name == 'DequantizeLinear':
                    s, zp = inps[idx].parent.inputs[1:3]

                for i, x in enumerate(inps):
                    if i == idx:
                        continue
                    if inps[i].parent is not None and inps[i].parent.info.type_name == 'DequantizeLinear':
                        inps[i] = inps[i].parent.inputs[0].parent.inputs[0]
                    inps[i] = self.try_to_quantize(inps[i], s, zp,
                                                   cfg.round_mode,
                                                   cfg.narrow_range,
                                                   cfg.dtype)
                    inps[i] = self.try_to_dequantize(inps[i], s, zp)
            return inps

        def modify(self, f, inputs):
            fn = f.info.type_name
            cfg = self._config
            function_rank = self.get_function_rank(f)
            scope = '{}-{}'.format(fn, function_rank)

            rm = cfg.round_mode
            nr = cfg.narrow_range
            dt = cfg.dtype
            pow2 = cfg.pow2.value
            axes = [3] if cfg.channel_last else [1]

            # inputs -> Q -> DQ -> F
            inps = self.quantize_inputs(
                f, inputs, scope, cfg, axes, rm, nr, dt, pow2)
            inps = self.dequantize_inputs(inps)
            inps = self.shared_quantization(f, inps, cfg)
            h = self._modifier._modify_as_same(f, inps)

            if fn == 'Sink':
                return h

            # F -> output -> Q -> DQ
            h = self._quantize_outputs(f, h, axes, cfg)
            return h

        def __finish__(self):
            # input: [tuple(scale, zeropoint)]
            self._map_input_scale_zeropoint = defaultdict(list)

    """
    Doc here.
    """

    def __init__(self, functions_ranks, config=None):
        super(QuantizeRecordingToTrainingModifier, self).__init__()
        self._fct_bin_set = {
            'Add2': F.add2,
            'Sub2': F.sub2,
            'Mul2': F.mul2,
            'Div2': F.div2,
            'Pow2': F.pow2
        }

        from nnabla.utils.qnn import PrecisionMode
        self._mode = self._precision_mode_set = {
            PrecisionMode.QNN: None,
            PrecisionMode.SIM_QNN: self.SimulatedQNN,
            PrecisionMode.MIXED_QNN: None
        }[config.precision_mode](functions_ranks, self, config)

    def modify(self, f, inputs):
        return self._mode.modify(f, inputs)

    def __finish__(self):
        self._mode.__finish__()