Files
inaturalist_pytorch_model/data_aug/data_aug.py
2021-07-01 15:22:44 -04:00

857 lines
24 KiB
Python

import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
import sys
import os
from bbox_util import *
lib_path = os.path.join(os.path.realpath("."), "data_aug")
sys.path.append(lib_path)
class RandomHorizontalFlip(object):
"""Randomly horizontally flips the Image with the probability *p*
Parameters
----------
p: float
The probability with which the image is flipped
Returns
-------
numpy.ndaaray
Flipped image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, p=0.5):
self.p = p
def __call__(self, img, bboxes):
img_center = np.array(img.shape[:2])[::-1]/2
img_center = np.hstack((img_center, img_center))
if random.random() < self.p:
img = img[:, ::-1, :]
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
bboxes[:, 0] -= box_w
bboxes[:, 2] += box_w
return img, bboxes
class HorizontalFlip(object):
"""Randomly horizontally flips the Image with the probability *p*
Parameters
----------
p: float
The probability with which the image is flipped
Returns
-------
numpy.ndaaray
Flipped image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self):
pass
def __call__(self, img, bboxes):
img_center = np.array(img.shape[:2])[::-1]/2
img_center = np.hstack((img_center, img_center))
img = img[:, ::-1, :]
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
bboxes[:, 0] -= box_w
bboxes[:, 2] += box_w
return img, bboxes
class RandomScale(object):
"""Randomly scales an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
scale: float or tuple(float)
if **float**, the image is scaled by a factor drawn
randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
the `scale` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Scaled image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, scale = 0.2, diff = False):
self.scale = scale
if type(self.scale) == tuple:
assert len(self.scale) == 2, "Invalid range"
assert self.scale[0] > -1, "Scale factor can't be less than -1"
assert self.scale[1] > -1, "Scale factor can't be less than -1"
else:
assert self.scale > 0, "Please input a positive float"
self.scale = (max(-1, -self.scale), self.scale)
self.diff = diff
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
if self.diff:
scale_x = random.uniform(*self.scale)
scale_y = random.uniform(*self.scale)
else:
scale_x = random.uniform(*self.scale)
scale_y = scale_x
resize_scale_x = 1 + scale_x
resize_scale_y = 1 + scale_y
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
canvas = np.zeros(img_shape, dtype = np.uint8)
y_lim = int(min(resize_scale_y,1)*img_shape[0])
x_lim = int(min(resize_scale_x,1)*img_shape[1])
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
img = canvas
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class Scale(object):
"""Scales the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
scale_x: float
The factor by which the image is scaled horizontally
scale_y: float
The factor by which the image is scaled vertically
Returns
-------
numpy.ndaaray
Scaled image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, scale_x = 0.2, scale_y = 0.2):
self.scale_x = scale_x
self.scale_y = scale_y
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
resize_scale_x = 1 + self.scale_x
resize_scale_y = 1 + self.scale_y
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
canvas = np.zeros(img_shape, dtype = np.uint8)
y_lim = int(min(resize_scale_y,1)*img_shape[0])
x_lim = int(min(resize_scale_x,1)*img_shape[1])
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
img = canvas
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class RandomTranslate(object):
"""Randomly Translates the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
translate: float or tuple(float)
if **float**, the image is translated by a factor drawn
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
`translate` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Translated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, translate = 0.2, diff = False):
self.translate = translate
if type(self.translate) == tuple:
assert len(self.translate) == 2, "Invalid range"
assert self.translate[0] > 0 & self.translate[0] < 1
assert self.translate[1] > 0 & self.translate[1] < 1
else:
assert self.translate > 0 and self.translate < 1
self.translate = (-self.translate, self.translate)
self.diff = diff
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
#translate the image
#percentage of the dimension of the image to translate
translate_factor_x = random.uniform(*self.translate)
translate_factor_y = random.uniform(*self.translate)
if not self.diff:
translate_factor_y = translate_factor_x
canvas = np.zeros(img_shape).astype(np.uint8)
corner_x = int(translate_factor_x*img.shape[1])
corner_y = int(translate_factor_y*img.shape[0])
#change the origin to the top-left corner of the translated box
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
img = canvas
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class Translate(object):
"""Randomly Translates the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
translate: float or tuple(float)
if **float**, the image is translated by a factor drawn
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
`translate` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Translated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False):
self.translate_x = translate_x
self.translate_y = translate_y
assert self.translate_x > 0 and self.translate_x < 1
assert self.translate_y > 0 and self.translate_y < 1
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
#translate the image
#percentage of the dimension of the image to translate
translate_factor_x = self.translate_x
translate_factor_y = self.translate_y
canvas = np.zeros(img_shape).astype(np.uint8)
#get the top-left corner co-ordinates of the shifted box
corner_x = int(translate_factor_x*img.shape[1])
corner_y = int(translate_factor_y*img.shape[0])
#change the origin to the top-left corner of the translated box
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
img = canvas
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class RandomRotate(object):
"""Randomly rotates an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
angle: float or tuple(float)
if **float**, the image is rotated by a factor drawn
randomly from a range (-`angle`, `angle`). If **tuple**,
the `angle` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Rotated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, angle = 10):
self.angle = angle
if type(self.angle) == tuple:
assert len(self.angle) == 2, "Invalid range"
else:
self.angle = (-self.angle, self.angle)
def __call__(self, img, bboxes):
angle = random.uniform(*self.angle)
w,h = img.shape[1], img.shape[0]
cx, cy = w//2, h//2
img = rotate_im(img, angle)
corners = get_corners(bboxes)
corners = np.hstack((corners, bboxes[:,4:]))
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
new_bbox = get_enclosing_box(corners)
scale_factor_x = img.shape[1] / w
scale_factor_y = img.shape[0] / h
img = cv2.resize(img, (w,h))
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
bboxes = new_bbox
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
return img, bboxes
class Rotate(object):
"""Rotates an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
angle: float
The angle by which the image is to be rotated
Returns
-------
numpy.ndaaray
Rotated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, angle):
self.angle = angle
def __call__(self, img, bboxes):
"""
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Randomly flipped image.
"""
angle = self.angle
print(self.angle)
w,h = img.shape[1], img.shape[0]
cx, cy = w//2, h//2
corners = get_corners(bboxes)
corners = np.hstack((corners, bboxes[:,4:]))
img = rotate_im(img, angle)
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
new_bbox = get_enclosing_box(corners)
scale_factor_x = img.shape[1] / w
scale_factor_y = img.shape[0] / h
img = cv2.resize(img, (w,h))
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
bboxes = new_bbox
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
return img, bboxes
class RandomShear(object):
"""Randomly shears an image in horizontal direction
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
shear_factor: float or tuple(float)
if **float**, the image is sheared horizontally by a factor drawn
randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
the `shear_factor` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, shear_factor = 0.2):
self.shear_factor = shear_factor
if type(self.shear_factor) == tuple:
assert len(self.shear_factor) == 2, "Invalid range for scaling factor"
else:
self.shear_factor = (-self.shear_factor, self.shear_factor)
shear_factor = random.uniform(*self.shear_factor)
def __call__(self, img, bboxes):
shear_factor = random.uniform(*self.shear_factor)
w,h = img.shape[1], img.shape[0]
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
nW = img.shape[1] + abs(shear_factor*img.shape[0])
bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int)
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
img = cv2.resize(img, (w,h))
scale_factor_x = nW / w
bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1]
return img, bboxes
class Shear(object):
"""Shears an image in horizontal direction
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
shear_factor: float
Factor by which the image is sheared in the x-direction
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, shear_factor = 0.2):
self.shear_factor = shear_factor
def __call__(self, img, bboxes):
shear_factor = self.shear_factor
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
nW = img.shape[1] + abs(shear_factor*img.shape[0])
bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int)
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
return img, bboxes
class Resize(object):
"""Resize the image in accordance to `image_letter_box` function in darknet
The aspect ratio is maintained. The longer side is resized to the input
size of the network, while the remaining space on the shorter side is filled
with black color. **This should be the last transform**
Parameters
----------
inp_dim : tuple(int)
tuple containing the size to which the image will be resized.
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Resized bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, inp_dim):
self.inp_dim = inp_dim
def __call__(self, img, bboxes):
w,h = img.shape[1], img.shape[0]
img = letterbox_image(img, self.inp_dim)
scale = min(self.inp_dim/h, self.inp_dim/w)
bboxes[:,:4] *= (scale)
new_w = scale*w
new_h = scale*h
inp_dim = self.inp_dim
del_h = (inp_dim - new_h)/2
del_w = (inp_dim - new_w)/2
add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
bboxes[:,:4] += add_matrix
img = img.astype(np.uint8)
return img, bboxes
class RandomHSV(object):
"""HSV Transform to vary hue saturation and brightness
Hue has a range of 0-179
Saturation and Brightness have a range of 0-255.
Chose the amount you want to change thhe above quantities accordingly.
Parameters
----------
hue : None or int or tuple (int)
If None, the hue of the image is left unchanged. If int,
a random int is uniformly sampled from (-hue, hue) and added to the
hue of the image. If tuple, the int is sampled from the range
specified by the tuple.
saturation : None or int or tuple(int)
If None, the saturation of the image is left unchanged. If int,
a random int is uniformly sampled from (-saturation, saturation)
and added to the hue of the image. If tuple, the int is sampled
from the range specified by the tuple.
brightness : None or int or tuple(int)
If None, the brightness of the image is left unchanged. If int,
a random int is uniformly sampled from (-brightness, brightness)
and added to the hue of the image. If tuple, the int is sampled
from the range specified by the tuple.
Returns
-------
numpy.ndaaray
Transformed image in the numpy format of shape `HxWxC`
numpy.ndarray
Resized bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, hue = None, saturation = None, brightness = None):
if hue:
self.hue = hue
else:
self.hue = 0
if saturation:
self.saturation = saturation
else:
self.saturation = 0
if brightness:
self.brightness = brightness
else:
self.brightness = 0
if type(self.hue) != tuple:
self.hue = (-self.hue, self.hue)
if type(self.saturation) != tuple:
self.saturation = (-self.saturation, self.saturation)
if type(brightness) != tuple:
self.brightness = (-self.brightness, self.brightness)
def __call__(self, img, bboxes):
hue = random.randint(*self.hue)
saturation = random.randint(*self.saturation)
brightness = random.randint(*self.brightness)
img = img.astype(int)
a = np.array([hue, saturation, brightness]).astype(int)
img += np.reshape(a, (1,1,3))
img = np.clip(img, 0, 255)
img[:,:,0] = np.clip(img[:,:,0],0, 179)
img = img.astype(np.uint8)
return img, bboxes
class Sequence(object):
"""Initialise Sequence object
Apply a Sequence of transformations to the images/boxes.
Parameters
----------
augemnetations : list
List containing Transformation Objects in Sequence they are to be
applied
probs : int or list
If **int**, the probability with which each of the transformation will
be applied. If **list**, the length must be equal to *augmentations*.
Each element of this list is the probability with which each
corresponding transformation is applied
Returns
-------
Sequence
Sequence Object
"""
def __init__(self, augmentations, probs = 1):
self.augmentations = augmentations
self.probs = probs
def __call__(self, images, bboxes):
for i, augmentation in enumerate(self.augmentations):
if type(self.probs) == list:
prob = self.probs[i]
else:
prob = self.probs
if random.random() < prob:
images, bboxes = augmentation(images, bboxes)
return images, bboxes