Source code for nnabla.utils.inspection.profile

# Copyright 2020,2021 Sony Corporation.
# Copyright 2021 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import csv
import os
import time
from collections import OrderedDict
from contextlib import contextmanager
from importlib import import_module

import numpy as np
from nnabla.ext_utils import import_extension_module

from .base import FunctionHookCallbackBase


[docs]class TimeProfiler(FunctionHookCallbackBase):
    """
    An utility API to create function_hook callbacks to profile the execution time of each function.
    Passing ``ext_name`` and ``device_id``, you can define which device time you want to profile.
    If `ext_name` = "cuda" or "cudnn", then cudaEvent will be used to measure the execution time.
    For more information about cudaEvent, see the CUDA `document <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html>`_.
    If `ext_name`="cpu" , then wall-clock-time on host will be used.

    Example:

    .. code-block:: python

        ext_name = "cpu"
        device_id = "0"

        from nnabla.ext_utils import get_extension_context
        ctx = get_extension_context(ext_name, device_id=device_id)
        nn.set_default_context(ctx)

        y = model(...)

        from nnabla.utils.inspection import TimeProfiler
        tp = TimeProfiler(ext_name=ext_name, device_id=device_id)

        for i in range(max_iter):
            # All results of executions under "forward" scope are registered as "forward" execution.
            with tp.scope("forward"):
                y.forward(function_pre_hook=tp.pre_hook, function_post_hook=tp.post_hook)

            # All results of executions under "backward" scope are registered as "backward" execution.
            with tp.scope("backward") as tp:
                y.backward(function_pre_hook=tp.pre_hook, function_post_hook=tp.post_hook)

            # All results are evaluated by passing scopes to .calc_elapsed_time().
            # Be sure to call calc_elapsed_time at each iteration, otherwise nothing is measured.
            tp.calc_elapsed_time(["forward", "backward", "summary"])

        # To output results on stdout, call instance as a function.
        tp()

        # To write out as csv file, call .to_csv().
        tp.to_csv(output_file_name)
    """

    def __init__(self, ext_name, device_id):
        """
        Args:
             ext_name (str): backend extension name (e.g. cpu, cuda, or cudnn)
             device_id (str): device id
        """
        if ext_name == "cpu":
            self.profiler = import_module(
                "nnabla.utils.inspection").CpuTimerCallback

        elif ext_name in ["cuda", "cudnn"]:
            self.profiler = import_extension_module(
                "cuda.utils.inspection").CudaEventTimerCallback

        else:
            # Unsupported extension.
            raise NotImplementedError(
                "Profiler for the extension '{}' is not implemented.".format(ext_name))

        self.ext_name = ext_name
        self.device_id = device_id
        self._scope_name = ""
        self.profilers = {}
        self.create_new_profiler("summary")

    def create_new_profiler(self, scope_name):
        self.profilers[scope_name] = self.profiler(
            ext_name=self.ext_name, device_id=self.device_id)

[docs]    @contextmanager
    def scope(self, scope_name):
        """
        Change a scope to aggregate results.
        This function is used as context (`with` statement),
         and all results under the context are labeled by ``scope_name``.
        In addition to the execution time of each function,
        the elapsed times between entering and exiting the each context are also recorded
         and they are aggregated as "summary" scope.

        Args:
            scope_name (str): Scope name.
        """
        prev_scope = self._scope_name

        try:
            # create new profiler for the scope if not exist
            if scope_name not in self.profilers:
                self.create_new_profiler(scope_name)

            self.call_pre_hook("summary", scope_name)

            self._scope_name = scope_name

            yield self

        finally:
            self._scope_name = prev_scope
            self.call_post_hook("summary", scope_name)

    @property
    def pre_hook(self):
        """
        Get a callback for function_pre_hook.
        This function can be used like the example below:

        .. code-block:: python

            tp = TimeProfiler(..)
            with tp.scope("forward"):
                v.forward(function_pre_hook=tp.pre_hook())

            with tp.scope("backward"):
                v.backward(function_pre_hook=tp.pre_hook())

        """
        profiler = self.profilers[self._scope_name]

        def callback(key):
            profiler.pre_hook(key)

        return callback

    @property
    def post_hook(self):
        """
        Get a callback for function_post_hook.
        This function can be used like the example below:

        .. code-block:: python

            tp = TimeProfiler(..)
            with tp.scope("forward"):
                v.forward(function_post_hook=tp.post_hook())

            with tp.scope("backward"):
                v.backward(function_post_hook=tp.post_hook())

        """
        profiler = self.profilers[self._scope_name]

        def callback(key):
            profiler.post_hook(key)

        return callback

    def call_pre_hook(self, scope_name, key):
        if scope_name not in self.profilers:
            self.profilers[scope_name] = self.profiler(
                ext_name=self.ext_name, device_id=self.device_id)

        self.profilers[scope_name].pre_hook(key)

    def call_post_hook(self, scope_name, key):
        if scope_name not in self.profilers:
            raise ValueError(
                "profiler instance for '{0}' scope is not found. ".format(scope_name))

        self.profilers[scope_name].post_hook(key)

[docs]    def calc_elapsed_time(self, names=None):
        """
        Evaluate all elapsed times.
        Note that elapsed time is not recorded until calc_elapsed_time is called.

        Args:
            names (str or list of str): Scope name(s) to evaluate elapsed time.
        """
        if isinstance(names, str):
            names = [names]

        if names is not None:
            # get specified profilers` elapsed time
            for name in list(names):
                self.profilers[name].calc_elapsed_time()
        else:
            # get all profilers` elapsed time
            for profiler in self.profilers.values():
                profiler.calc_elapsed_time()

    def average(self, start_index=0):
        ret = {}
        for name, profiler in self.profilers.items():
            ret[name] = profiler.average(start_index)

        return ret

    def get_all_results(self):
        ret = {}
        for name, profiler in self.profilers.items():
            if len(profiler.results) == 0:
                continue

            ret[name] = profiler.results

        return ret

[docs]    def to_csv(self, out_dir="./", ignore_init=True):
        """
        Writes out to csv file. Output directory can be specified by `out_dir`.
        As default, the elapsed times of first iteration will be omitted.
        If you evaluate the first iteration as well, pass True to `ignore_init`.

        Args:
             out_dir (str): Output directory.
             ignore_init (bool): Ignore the result of the first iteration or not.
        """
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        res_all = self.average(start_index=1 if ignore_init else 0)

        summary_out = [dict(res, scope=scope)
                       for scope, res in res_all["summary"].items()]

        with open(os.path.join(out_dir, "profile_summary.csv"), "w") as f:
            writer = csv.DictWriter(f, ["scope", "time", "scale", "num"])
            writer.writeheader()
            writer.writerows(summary_out)

        # below is basically for nnabla functions
        # key is identically distinguished by its id. (not str expression)
        keystr_to_count = {}  # count number based on id
        key_to_unique = {}  # id of key to unique str expression
        out_dict = {}

        for scope, scope_res in res_all.items():
            if scope == "summary":
                continue

            for key, key_res in scope_res.items():
                insert = {"{}_{}".format(
                    k, scope): v for k, v in key_res.items()}

                # same key has already existed. -> update
                if key in key_to_unique:
                    out_dict[key_to_unique[key]].update(insert)
                    continue

                # new key -> create
                count = keystr_to_count.get(str(key), 0)
                keystr_to_count[str(key)] = count + 1
                key_to_unique[key] = "{}_{}".format(str(key), count)

                out_dict[key_to_unique[key]] = insert

        # preparation for writing to csv
        detail_out = []
        header = set()
        unique_to_key = {v: k for k, v in key_to_unique.items()}
        for scope, res in out_dict.items():
            header.update([*res.keys()])
            detail_out.append(
                dict(res, scope=scope, original_key=unique_to_key[scope]))

        with open(os.path.join(out_dir, "profile_detail.csv"), "w") as f:
            writer = csv.DictWriter(
                f, ["scope", "original_key"] + list(header))
            writer.writeheader()
            writer.writerows(detail_out)

    def __call__(self, *args, **kwargs):
        """
        Outputs results on stdout.
        The results of the first iteration will be omitted.
        """
        ret = self.average(start_index=1)

        for name, results in ret.items():
            if len(results) == 0:
                continue

            print("### {} ###".format(name))
            for key, result in results.items():
                print("{}: {} [{}]".format(
                    key, result["time"], result["scale"]))
            print()


class CpuTimerCallback(FunctionHookCallbackBase):
    def __init__(self, ext_name, device_id):
        super(CpuTimerCallback, self).__init__()

        self.results = OrderedDict()
        self.device_id = str(device_id)
        self.ext_module = import_extension_module(ext_name)
        self.key_to_times = OrderedDict()

    @property
    def pre_hook(self):
        def callback(key):
            if key not in self.key_to_times:
                self.key_to_times[key] = [0, 0]

            self.key_to_times[key][0] = self.timer()

        return callback

    @property
    def post_hook(self):
        def callback(key):
            if key not in self.key_to_times:
                raise ValueError()

            self.ext_module.synchronize(device_id=self.device_id)
            self.key_to_times[key][1] = self.timer()

        return callback

    @staticmethod
    def timer():
        return time.time()

    def calc_elapsed_time(self):
        for key, (start_time, end_time) in self.key_to_times.items():
            elapsed = end_time - start_time

            if key not in self.results:
                self.results[key] = []

            self.results[key].append(elapsed)

    def average(self, start_index=0):
        ret = {}
        for key, times in self.results.items():
            ret[key] = {
                "time": np.mean(times[start_index:]) * (10**3),
                "num": len(times) - start_index,
                "scale": "ms"
            }

        return ret