Source code for nnabla.models.object_detection.utils

# Copyright 2019,2020,2021 Sony Corporation.
# Copyright 2021 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from nnabla.models.object_detection.draw_utils import DrawBoundingBoxes
from nnabla.utils.image_utils import imresize


[docs]def draw_bounding_boxes(img, bboxes, names, colors=None, thresh=0.5):
    '''
    The transformed cordinates are further used to draw bounding boxes for the detected objects.

    Args:
        img (numpy.ndarray) : Input image
        bboxes (numpy.ndarray): 
            Transformed bounding box coorinates from the model.
        names (list of str): Name of categories in the dataset
        colors (list of tuple of 3 ints): Colors for bunding boxes
        thresh (float): Threshold of bounding boxes.

    '''
    if colors is None:
        rng = np.random.RandomState(1223)
        colors = rng.randint(0, 256, (len(names), 3)).astype(np.uint8)
        colors = [tuple(c.tolist()) for c in colors]

    im_h, im_w = img.shape[:2]
    draw = DrawBoundingBoxes(img, colors)
    for bb in bboxes:
        x, y, w, h = bb[:4]
        dw = w / 2.
        dh = h / 2.
        x0 = int(np.clip(x - dw, 0, im_w))
        y0 = int(np.clip(y - dh, 0, im_h))
        x1 = int(np.clip(x + dw, 0, im_w))
        y1 = int(np.clip(y + dh, 0, im_h))
        det_ind = np.where(bb[5:] > thresh)[0]
        if len(det_ind) == 0:
            continue
        prob = bb[5 + det_ind]
        label = ', '.join("{}: {:.2f}%".format(
            names[det_ind[j]], prob[j] * 100) for j in range(len(det_ind)))
        print("[INFO] {}".format(label))
        draw.draw((x0, y0, x1, y1), det_ind[0], label)
    return draw.get()


def apply_inverse_letterbox_coordinate_transform(bboxes, im_w, im_h, letterbox_w, letterbox_h):
    '''
    The predicted bounding box coordinates from the model are not according to original image but the pre-processed image. This function transforms the coorinates
    according to original image by applying inverse letterbox co-rdinate trasforms mathematically.

    Args:

        bboxes: 
             The bounding box coordinates predicted from the model.
        im_w : 
             Width of original input image.
        im_h :
             Height of original input image.

    '''
    bboxes = bboxes.copy()
    for bb in bboxes:
        x, y, w, h = bb[:4]
        x1 = (x - (1 - letterbox_w) / 2.) / letterbox_w * im_w
        y1 = (y - (1 - letterbox_h) / 2.) / letterbox_h * im_h
        w1 = w * im_w / letterbox_w
        h1 = h * im_h / letterbox_h
        bb[:4] = x1, y1, w1, h1
    return bboxes


def letterbox(img_orig, h, w):
    '''
    Input image is pre-processed before passing it to the network in YoloV2. This function applies the pre-processing to input image.

    Args:
        img_orig: Input image
        w : Desired width of output image after pre-processing. Should be a multiple of 32.
        h : Desired height of output image after pre-processing. Should be a multiple of 32.
    '''
    assert img_orig.dtype == np.uint8
    im_h, im_w, _ = img_orig.shape
    if (w * 1.0 / im_w) < (h * 1. / im_h):
        new_w = w
        new_h = int((im_h * w) / im_w)
    else:
        new_h = h
        new_w = int((im_w * h) / im_h)

    patch = imresize(img_orig, (new_w, new_h))
    img = np.ones((h, w, 3), np.uint8) * 127
    # resize
    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) / 2)
    img[y0:y0 + new_h, x0:x0 + new_w] = patch
    return img, new_w, new_h


[docs]class LetterBoxTransform(object):
    '''Create an object holding a new letterboxed image as `image` attribute.

    Letterboxing is defined as scaling the input image to fit inside the
    desired output image frame (letterbox) while preserving the aspect
    ratio of the original image. The pixels that are not filled with the
    original image pixels become 127.

    The created object also provides a functionality to convert bounding box
    coordinates back to the original image frame.

    Args:

        image (numpy.ndarray): An uint8 3-channel image 
        height (int): Letterbox height
        width (int): Letterbox width

    '''

    def __init__(self, image, height, width):
        self.height, self.width = height, width
        self.im_h, self.im_w = image.shape[:2]
        self.image, self.new_w, self.new_h = letterbox(image, height, width)

[docs]    def inverse_coordinate_transform(self, coords):
        '''Convert the bounding boxes back to the original image frame.

        Args:
            coords (numpy.ndarray):
                `N` x `M` array where `M >= 4` and first 4 elements
                of `M` are `x`, `y` (center coordinates of bounding box),
                `w` and `h` (bouding box width and height).

        '''
        return apply_inverse_letterbox_coordinate_transform(
            coords, self.im_w, self.im_h, self.new_w * 1.0 / self.width, self.new_h * 1.0 / self.height)