This commit is contained in:
2021-07-01 15:22:44 -04:00
parent b0a8d52855
commit 83195da92c
30 changed files with 3253 additions and 283 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
*.pth *.pth
example

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"python.formatting.provider": "black"
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

98
data.py
View File

@@ -1,18 +1,19 @@
# %% # %%
import os
import numpy as np
import torch
from PIL import Image
import torchvision import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import json import json
import torch import torch
from torchvision import datasets, transforms as T from torchvision import transforms as T
import cv2
import numpy as np import numpy as np
import os import os
import sys import sys
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
from references.detection import utils, engine from references.detection import utils, engine
@@ -28,9 +29,6 @@ def get_transform(train):
transforms.append(T.RandomHorizontalFlip(0.5)) transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms) return T.Compose(transforms)
IMAGE_MEAN = [0.485, 0.456, 0.406]
IMAGE_STD = [0.229, 0.224, 0.225]
PATH_ROOT = r"D:\ishan\ml\inaturalist\\" PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
@@ -43,12 +41,12 @@ def create_map(list_in, from_key, to_key):
class iNaturalistDataset(torch.utils.data.Dataset): class iNaturalistDataset(torch.utils.data.Dataset):
def __init__(self, validation=False, train=False): def __init__(self, validation=False, train=False, transforms = None):
self.validation = validation self.validation = validation
self.train = train self.train = train
self.transforms = transforms
self.transforms = T.Compose([T.Resize(600, max_size=1024), T.ToTensor()])
if validation: if validation:
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json") json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
@@ -65,7 +63,7 @@ class iNaturalistDataset(torch.utils.data.Dataset):
for category in f["categories"]: for category in f["categories"]:
if category["supercategory"] == "Aves": if category["supercategory"] == "Aves":
if category['name'] in ['Archilochus colubris','Icterus galbula']: if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
print(category['name']) print(category['name'])
categories.append(category) categories.append(category)
@@ -101,44 +99,35 @@ class iNaturalistDataset(torch.utils.data.Dataset):
self.idx_to_id = [x for x in self.images] self.idx_to_id = [x for x in self.images]
self.num_classes = len(self.categories) + 1 self.num_classes = len(self.categories) + 1
self.num_samples = len(self.images) self.num_samples = len(self.images)
self.transforms = [
data_aug.RandomHorizontalFlip(0.5),
data_aug.Resize(600),
]
self.pre_transform = T.Compose([T.ToTensor()])#],T.Normalize(mean=[0.485, 0.456, 0.406],
#std=[0.229, 0.224, 0.225])])
def __len__(self): def __len__(self):
return self.num_samples return self.num_samples
def transform(self, img, bbox):
for x in self.transforms:
img, bbox = x(img, bbox)
img = self.pre_transform(img)
return img, bbox
def __getitem__(self, idx): def __getitem__(self, idx):
idd = self.idx_to_id[idx] idd = self.idx_to_id[idx]
c_image = self.images[idd] c_image = self.images[idd]
# print(c_image, idx, self.validation, self.train) img_path = c_image["path"]
# breakpoint() img = Image.open(img_path).convert("RGB")
image = np.asarray(cv2.imread(c_image["path"])[:,:,::-1].copy(),dtype=np.float32)
annot = c_image["annotation"] annot = c_image["annotation"]
bbox = annot["bbox"] bbox = annot["bbox"]
bbox.append(annot["new_category_id"]) boxes = bbox
bbox = np.asarray([bbox], dtype=np.float32)
image, bbox = self.transform(image.copy(), bbox.copy())
boxes = torch.as_tensor(bbox[:,:4], dtype=torch.float32)
target = dict() target = dict()
target["boxes"] = boxes target["boxes"] = torch.as_tensor([boxes])
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64) target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
target['image_id'] = torch.tensor([annot['image_id']]) target['image_id'] = torch.tensor([annot['image_id']])
target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target['area'] = torch.as_tensor([annot['area']])
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64) target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
return image, target
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
# %%
# v = iNaturalistDataset(validation=True)
# v = iNaturalistDataset(validation= True) # v = iNaturalistDataset(validation= True)
# o = v[10] # o = v[10]
@@ -149,24 +138,45 @@ class iNaturalistDataset(torch.utils.data.Dataset):
# plt.imshow(ox.permute([1,2,0])) # plt.imshow(ox.permute([1,2,0]))
# plt.savefig('crap2.png') # plt.savefig('crap2.png')
def get_model(num_classes):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2 # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
import transforms as T
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
from engine import train_one_epoch, evaluate
import utils
# %% # %%
def run(): def run():
val_dataset = iNaturalistDataset(validation=True) val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
train_dataset = iNaturalistDataset(train=True) train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
train_data_loader = torch.utils.data.DataLoader( train_data_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
) )
val_data_loader = torch.utils.data.DataLoader( val_data_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
pretrained=True, num_classes=train_dataset.num_classes, progress=True
) )
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
num_classes = 2
model = get_model(num_classes)
model.to(device) model.to(device)
# construct an optimizer # construct an optimizer
params = [p for p in model.parameters() if p.requires_grad] params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, optimizer = torch.optim.SGD(params, lr=0.005,

Binary file not shown.

Binary file not shown.

300
data_aug/bbox_util.py Normal file
View File

@@ -0,0 +1,300 @@
import cv2
import numpy as np
def draw_rect(im, cords, color = None):
"""Draw the rectangle on the image
Parameters
----------
im : numpy.ndarray
numpy image
cords: numpy.ndarray
Numpy array containing bounding boxes of shape `N X 4` where N is the
number of bounding boxes and the bounding boxes are represented in the
format `x1 y1 x2 y2`
Returns
-------
numpy.ndarray
numpy image with bounding boxes drawn on it
"""
im = im.copy()
cords = cords[:,:4]
cords = cords.reshape(-1,4)
if not color:
color = [255,255,255]
for cord in cords:
pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3])
pt1 = int(pt1[0]), int(pt1[1])
pt2 = int(pt2[0]), int(pt2[1])
im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200))
return im
def bbox_area(bbox):
return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1])
def clip_box(bbox, clip_box, alpha):
"""Clip the bounding boxes to the borders of an image
Parameters
----------
bbox: numpy.ndarray
Numpy array containing bounding boxes of shape `N X 4` where N is the
number of bounding boxes and the bounding boxes are represented in the
format `x1 y1 x2 y2`
clip_box: numpy.ndarray
An array of shape (4,) specifying the diagonal co-ordinates of the image
The coordinates are represented in the format `x1 y1 x2 y2`
alpha: float
If the fraction of a bounding box left in the image after being clipped is
less than `alpha` the bounding box is dropped.
Returns
-------
numpy.ndarray
Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the
number of bounding boxes left are being clipped and the bounding boxes are represented in the
format `x1 y1 x2 y2`
"""
ar_ = (bbox_area(bbox))
x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1)
y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1)
x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1)
y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1)
bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:]))
delta_area = ((ar_ - bbox_area(bbox))/ar_)
mask = (delta_area < (1 - alpha)).astype(int)
bbox = bbox[mask == 1,:]
return bbox
def rotate_im(image, angle):
"""Rotate the image.
Rotate the image such that the rotated image is enclosed inside the tightest
rectangle. The area not occupied by the pixels of the original image is colored
black.
Parameters
----------
image : numpy.ndarray
numpy image
angle : float
angle by which the image is to be rotated
Returns
-------
numpy.ndarray
Rotated Image
"""
# grab the dimensions of the image and then determine the
# centre
(h, w) = image.shape[:2]
(cX, cY) = (w // 2, h // 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# perform the actual rotation and return the image
image = cv2.warpAffine(image, M, (nW, nH))
# image = cv2.resize(image, (w,h))
return image
def get_corners(bboxes):
"""Get corners of bounding boxes
Parameters
----------
bboxes: numpy.ndarray
Numpy array containing bounding boxes of shape `N X 4` where N is the
number of bounding boxes and the bounding boxes are represented in the
format `x1 y1 x2 y2`
returns
-------
numpy.ndarray
Numpy array of shape `N x 8` containing N bounding boxes each described by their
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
"""
width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1)
height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1)
x1 = bboxes[:,0].reshape(-1,1)
y1 = bboxes[:,1].reshape(-1,1)
x2 = x1 + width
y2 = y1
x3 = x1
y3 = y1 + height
x4 = bboxes[:,2].reshape(-1,1)
y4 = bboxes[:,3].reshape(-1,1)
corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
return corners
def rotate_box(corners,angle, cx, cy, h, w):
"""Rotate the bounding box.
Parameters
----------
corners : numpy.ndarray
Numpy array of shape `N x 8` containing N bounding boxes each described by their
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
angle : float
angle by which the image is to be rotated
cx : int
x coordinate of the center of image (about which the box will be rotated)
cy : int
y coordinate of the center of image (about which the box will be rotated)
h : int
height of the image
w : int
width of the image
Returns
-------
numpy.ndarray
Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
"""
corners = corners.reshape(-1,2)
corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0]))))
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cx
M[1, 2] += (nH / 2) - cy
# Prepare the vector to be transformed
calculated = np.dot(M,corners.T).T
calculated = calculated.reshape(-1,8)
return calculated
def get_enclosing_box(corners):
"""Get an enclosing box for ratated corners of a bounding box
Parameters
----------
corners : numpy.ndarray
Numpy array of shape `N x 8` containing N bounding boxes each described by their
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
Returns
-------
numpy.ndarray
Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the
number of bounding boxes and the bounding boxes are represented in the
format `x1 y1 x2 y2`
"""
x_ = corners[:,[0,2,4,6]]
y_ = corners[:,[1,3,5,7]]
xmin = np.min(x_,1).reshape(-1,1)
ymin = np.min(y_,1).reshape(-1,1)
xmax = np.max(x_,1).reshape(-1,1)
ymax = np.max(y_,1).reshape(-1,1)
final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:]))
return final
def letterbox_image(img, inp_dim):
'''resize image with unchanged aspect ratio using padding
Parameters
----------
img : numpy.ndarray
Image
inp_dim: tuple(int)
shape of the reszied image
Returns
-------
numpy.ndarray:
Resized image
'''
inp_dim = (inp_dim, inp_dim)
img_w, img_h = img.shape[1], img.shape[0]
w, h = inp_dim
new_w = int(img_w * min(w/img_w, h/img_h))
new_h = int(img_h * min(w/img_w, h/img_h))
resized_image = cv2.resize(img, (new_w,new_h))
canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
return canvas

856
data_aug/data_aug.py Normal file
View File

@@ -0,0 +1,856 @@
import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
import sys
import os
from bbox_util import *
lib_path = os.path.join(os.path.realpath("."), "data_aug")
sys.path.append(lib_path)
class RandomHorizontalFlip(object):
"""Randomly horizontally flips the Image with the probability *p*
Parameters
----------
p: float
The probability with which the image is flipped
Returns
-------
numpy.ndaaray
Flipped image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, p=0.5):
self.p = p
def __call__(self, img, bboxes):
img_center = np.array(img.shape[:2])[::-1]/2
img_center = np.hstack((img_center, img_center))
if random.random() < self.p:
img = img[:, ::-1, :]
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
bboxes[:, 0] -= box_w
bboxes[:, 2] += box_w
return img, bboxes
class HorizontalFlip(object):
"""Randomly horizontally flips the Image with the probability *p*
Parameters
----------
p: float
The probability with which the image is flipped
Returns
-------
numpy.ndaaray
Flipped image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self):
pass
def __call__(self, img, bboxes):
img_center = np.array(img.shape[:2])[::-1]/2
img_center = np.hstack((img_center, img_center))
img = img[:, ::-1, :]
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
bboxes[:, 0] -= box_w
bboxes[:, 2] += box_w
return img, bboxes
class RandomScale(object):
"""Randomly scales an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
scale: float or tuple(float)
if **float**, the image is scaled by a factor drawn
randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
the `scale` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Scaled image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, scale = 0.2, diff = False):
self.scale = scale
if type(self.scale) == tuple:
assert len(self.scale) == 2, "Invalid range"
assert self.scale[0] > -1, "Scale factor can't be less than -1"
assert self.scale[1] > -1, "Scale factor can't be less than -1"
else:
assert self.scale > 0, "Please input a positive float"
self.scale = (max(-1, -self.scale), self.scale)
self.diff = diff
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
if self.diff:
scale_x = random.uniform(*self.scale)
scale_y = random.uniform(*self.scale)
else:
scale_x = random.uniform(*self.scale)
scale_y = scale_x
resize_scale_x = 1 + scale_x
resize_scale_y = 1 + scale_y
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
canvas = np.zeros(img_shape, dtype = np.uint8)
y_lim = int(min(resize_scale_y,1)*img_shape[0])
x_lim = int(min(resize_scale_x,1)*img_shape[1])
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
img = canvas
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class Scale(object):
"""Scales the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
scale_x: float
The factor by which the image is scaled horizontally
scale_y: float
The factor by which the image is scaled vertically
Returns
-------
numpy.ndaaray
Scaled image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, scale_x = 0.2, scale_y = 0.2):
self.scale_x = scale_x
self.scale_y = scale_y
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
resize_scale_x = 1 + self.scale_x
resize_scale_y = 1 + self.scale_y
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
canvas = np.zeros(img_shape, dtype = np.uint8)
y_lim = int(min(resize_scale_y,1)*img_shape[0])
x_lim = int(min(resize_scale_x,1)*img_shape[1])
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
img = canvas
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class RandomTranslate(object):
"""Randomly Translates the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
translate: float or tuple(float)
if **float**, the image is translated by a factor drawn
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
`translate` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Translated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, translate = 0.2, diff = False):
self.translate = translate
if type(self.translate) == tuple:
assert len(self.translate) == 2, "Invalid range"
assert self.translate[0] > 0 & self.translate[0] < 1
assert self.translate[1] > 0 & self.translate[1] < 1
else:
assert self.translate > 0 and self.translate < 1
self.translate = (-self.translate, self.translate)
self.diff = diff
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
#translate the image
#percentage of the dimension of the image to translate
translate_factor_x = random.uniform(*self.translate)
translate_factor_y = random.uniform(*self.translate)
if not self.diff:
translate_factor_y = translate_factor_x
canvas = np.zeros(img_shape).astype(np.uint8)
corner_x = int(translate_factor_x*img.shape[1])
corner_y = int(translate_factor_y*img.shape[0])
#change the origin to the top-left corner of the translated box
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
img = canvas
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class Translate(object):
"""Randomly Translates the image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
translate: float or tuple(float)
if **float**, the image is translated by a factor drawn
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
`translate` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Translated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False):
self.translate_x = translate_x
self.translate_y = translate_y
assert self.translate_x > 0 and self.translate_x < 1
assert self.translate_y > 0 and self.translate_y < 1
def __call__(self, img, bboxes):
#Chose a random digit to scale by
img_shape = img.shape
#translate the image
#percentage of the dimension of the image to translate
translate_factor_x = self.translate_x
translate_factor_y = self.translate_y
canvas = np.zeros(img_shape).astype(np.uint8)
#get the top-left corner co-ordinates of the shifted box
corner_x = int(translate_factor_x*img.shape[1])
corner_y = int(translate_factor_y*img.shape[0])
#change the origin to the top-left corner of the translated box
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
img = canvas
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
return img, bboxes
class RandomRotate(object):
"""Randomly rotates an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
angle: float or tuple(float)
if **float**, the image is rotated by a factor drawn
randomly from a range (-`angle`, `angle`). If **tuple**,
the `angle` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Rotated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, angle = 10):
self.angle = angle
if type(self.angle) == tuple:
assert len(self.angle) == 2, "Invalid range"
else:
self.angle = (-self.angle, self.angle)
def __call__(self, img, bboxes):
angle = random.uniform(*self.angle)
w,h = img.shape[1], img.shape[0]
cx, cy = w//2, h//2
img = rotate_im(img, angle)
corners = get_corners(bboxes)
corners = np.hstack((corners, bboxes[:,4:]))
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
new_bbox = get_enclosing_box(corners)
scale_factor_x = img.shape[1] / w
scale_factor_y = img.shape[0] / h
img = cv2.resize(img, (w,h))
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
bboxes = new_bbox
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
return img, bboxes
class Rotate(object):
"""Rotates an image
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
angle: float
The angle by which the image is to be rotated
Returns
-------
numpy.ndaaray
Rotated image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, angle):
self.angle = angle
def __call__(self, img, bboxes):
"""
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Randomly flipped image.
"""
angle = self.angle
print(self.angle)
w,h = img.shape[1], img.shape[0]
cx, cy = w//2, h//2
corners = get_corners(bboxes)
corners = np.hstack((corners, bboxes[:,4:]))
img = rotate_im(img, angle)
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
new_bbox = get_enclosing_box(corners)
scale_factor_x = img.shape[1] / w
scale_factor_y = img.shape[0] / h
img = cv2.resize(img, (w,h))
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
bboxes = new_bbox
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
return img, bboxes
class RandomShear(object):
"""Randomly shears an image in horizontal direction
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
shear_factor: float or tuple(float)
if **float**, the image is sheared horizontally by a factor drawn
randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
the `shear_factor` is drawn randomly from values specified by the
tuple
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, shear_factor = 0.2):
self.shear_factor = shear_factor
if type(self.shear_factor) == tuple:
assert len(self.shear_factor) == 2, "Invalid range for scaling factor"
else:
self.shear_factor = (-self.shear_factor, self.shear_factor)
shear_factor = random.uniform(*self.shear_factor)
def __call__(self, img, bboxes):
shear_factor = random.uniform(*self.shear_factor)
w,h = img.shape[1], img.shape[0]
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
nW = img.shape[1] + abs(shear_factor*img.shape[0])
bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int)
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
img = cv2.resize(img, (w,h))
scale_factor_x = nW / w
bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1]
return img, bboxes
class Shear(object):
"""Shears an image in horizontal direction
Bounding boxes which have an area of less than 25% in the remaining in the
transformed image is dropped. The resolution is maintained, and the remaining
area if any is filled by black color.
Parameters
----------
shear_factor: float
Factor by which the image is sheared in the x-direction
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Tranformed bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, shear_factor = 0.2):
self.shear_factor = shear_factor
def __call__(self, img, bboxes):
shear_factor = self.shear_factor
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
nW = img.shape[1] + abs(shear_factor*img.shape[0])
bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int)
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
if shear_factor < 0:
img, bboxes = HorizontalFlip()(img, bboxes)
return img, bboxes
class Resize(object):
"""Resize the image in accordance to `image_letter_box` function in darknet
The aspect ratio is maintained. The longer side is resized to the input
size of the network, while the remaining space on the shorter side is filled
with black color. **This should be the last transform**
Parameters
----------
inp_dim : tuple(int)
tuple containing the size to which the image will be resized.
Returns
-------
numpy.ndaaray
Sheared image in the numpy format of shape `HxWxC`
numpy.ndarray
Resized bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, inp_dim):
self.inp_dim = inp_dim
def __call__(self, img, bboxes):
w,h = img.shape[1], img.shape[0]
img = letterbox_image(img, self.inp_dim)
scale = min(self.inp_dim/h, self.inp_dim/w)
bboxes[:,:4] *= (scale)
new_w = scale*w
new_h = scale*h
inp_dim = self.inp_dim
del_h = (inp_dim - new_h)/2
del_w = (inp_dim - new_w)/2
add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
bboxes[:,:4] += add_matrix
img = img.astype(np.uint8)
return img, bboxes
class RandomHSV(object):
"""HSV Transform to vary hue saturation and brightness
Hue has a range of 0-179
Saturation and Brightness have a range of 0-255.
Chose the amount you want to change thhe above quantities accordingly.
Parameters
----------
hue : None or int or tuple (int)
If None, the hue of the image is left unchanged. If int,
a random int is uniformly sampled from (-hue, hue) and added to the
hue of the image. If tuple, the int is sampled from the range
specified by the tuple.
saturation : None or int or tuple(int)
If None, the saturation of the image is left unchanged. If int,
a random int is uniformly sampled from (-saturation, saturation)
and added to the hue of the image. If tuple, the int is sampled
from the range specified by the tuple.
brightness : None or int or tuple(int)
If None, the brightness of the image is left unchanged. If int,
a random int is uniformly sampled from (-brightness, brightness)
and added to the hue of the image. If tuple, the int is sampled
from the range specified by the tuple.
Returns
-------
numpy.ndaaray
Transformed image in the numpy format of shape `HxWxC`
numpy.ndarray
Resized bounding box co-ordinates of the format `n x 4` where n is
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
"""
def __init__(self, hue = None, saturation = None, brightness = None):
if hue:
self.hue = hue
else:
self.hue = 0
if saturation:
self.saturation = saturation
else:
self.saturation = 0
if brightness:
self.brightness = brightness
else:
self.brightness = 0
if type(self.hue) != tuple:
self.hue = (-self.hue, self.hue)
if type(self.saturation) != tuple:
self.saturation = (-self.saturation, self.saturation)
if type(brightness) != tuple:
self.brightness = (-self.brightness, self.brightness)
def __call__(self, img, bboxes):
hue = random.randint(*self.hue)
saturation = random.randint(*self.saturation)
brightness = random.randint(*self.brightness)
img = img.astype(int)
a = np.array([hue, saturation, brightness]).astype(int)
img += np.reshape(a, (1,1,3))
img = np.clip(img, 0, 255)
img[:,:,0] = np.clip(img[:,:,0],0, 179)
img = img.astype(np.uint8)
return img, bboxes
class Sequence(object):
"""Initialise Sequence object
Apply a Sequence of transformations to the images/boxes.
Parameters
----------
augemnetations : list
List containing Transformation Objects in Sequence they are to be
applied
probs : int or list
If **int**, the probability with which each of the transformation will
be applied. If **list**, the length must be equal to *augmentations*.
Each element of this list is the probability with which each
corresponding transformation is applied
Returns
-------
Sequence
Sequence Object
"""
def __init__(self, augmentations, probs = 1):
self.augmentations = augmentations
self.probs = probs
def __call__(self, images, bboxes):
for i, augmentation in enumerate(self.augmentations):
if type(self.probs) == list:
prob = self.probs[i]
else:
prob = self.probs
if random.random() < prob:
images, bboxes = augmentation(images, bboxes)
return images, bboxes

221
model.py Normal file
View File

@@ -0,0 +1,221 @@
# %%
import os
import numpy as np
import torch
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import json
import torch
from torchvision import transforms as T
import numpy as np
import os
import sys
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
from references.detection import utils, engine
import data_aug
import bbox_util
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def create_map(list_in, from_key, to_key):
cmap = dict()
for l in list_in:
cmap[l[from_key]] = l[to_key]
return cmap
class iNaturalistDataset(torch.utils.data.Dataset):
def __init__(self, validation=False, train=False, transforms = None):
self.validation = validation
self.train = train
self.transforms = transforms
if validation:
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
elif train:
json_path = os.path.join(
PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
)
with open(json_path, "r") as rj:
f = json.load(rj)
categories = list()
image_info = dict()
for category in f["categories"]:
if category["supercategory"] == "Aves":
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
print(category['name'])
categories.append(category)
categories = sorted(categories, key=lambda k: k["name"])
for idx, cat in enumerate(categories):
cat["new_id"] = idx + 1
orig_to_new_id = create_map(categories, "id", "new_id")
for annot in f["annotations"]:
if annot["category_id"] in orig_to_new_id:
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
id = annot["image_id"]
if id not in image_info:
image_info[id] = dict()
annot["bbox"][2] += annot["bbox"][0]
annot["bbox"][3] += annot["bbox"][1]
image_info[id]["annotation"] = annot
for img in f["images"]:
id = img["id"]
path = os.path.join(PATH_ROOT, img["file_name"])
height = img["height"]
width = img["width"]
if id in image_info:
image_info[id].update({"path": path, "height": height, "width": width})
for idx, (id, im_in) in enumerate(image_info.items()):
im_in["idx"] = idx
self.images = image_info
self.categories = categories
self.idx_to_id = [x for x in self.images]
self.num_classes = len(self.categories) + 1
self.num_samples = len(self.images)
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
idd = self.idx_to_id[idx]
c_image = self.images[idd]
img_path = c_image["path"]
img = Image.open(img_path).convert("RGB")
annot = c_image["annotation"]
bbox = annot["bbox"]
boxes = bbox
target = dict()
target["boxes"] = torch.as_tensor([boxes])
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
target['image_id'] = torch.tensor([annot['image_id']])
target['area'] = torch.as_tensor([annot['area']])
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
# %%
# v = iNaturalistDataset(validation=True)
# v = iNaturalistDataset(validation= True)
# o = v[10]
# %%
# oimage = t.tensor(o[0]*255, dtype=t.uint8)
# import matplotlib.pyplot as plt
# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
# plt.imshow(ox.permute([1,2,0]))
# plt.savefig('crap2.png')
def get_model(num_classes):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2 # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
import transforms as T
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
from engine import train_one_epoch, evaluate
import utils
# %%
def run():
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
train_data_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
)
val_data_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
)
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
num_classes = 2
model = get_model(num_classes)
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
print(epoch)
torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
# train for one epoch, printing every 10 iterations
engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
# update the learning rate
lr_scheduler.step()
torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
# evaluate on the test dataset
engine.evaluate(model, val_data_loader, device=device)
if __name__ == "__main__":
run()
# # %%
# json_path = os.path.join(
# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
# )
# with open(json_path, "r") as rj:
# f = json.load(rj)
# # %%
# image_id: 2358

82
old_files/README.md Normal file
View File

@@ -0,0 +1,82 @@
# Object detection reference training scripts
This folder contains reference training scripts for object detection.
They serve as a log of how to train specific models, to provide baseline
training and evaluation scripts to quickly bootstrap research.
To execute the example commands below you must install the following:
```
cython
pycocotools
matplotlib
```
You must modify the following flags:
`--data-path=/path/to/coco/dataset`
`--nproc_per_node=<number_of_gpus_available>`
Except otherwise noted, all models have been trained on 8x V100 GPUs.
### Faster R-CNN ResNet-50 FPN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Faster R-CNN MobileNetV3-Large FPN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Faster R-CNN MobileNetV3-Large 320 FPN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### RetinaNet
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model retinanet_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01
```
### SSD300 VGG16
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model ssd300_vgg16 --epochs 120\
--lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\
--weight-decay 0.0005 --data-augmentation ssd
```
### SSDlite320 MobileNetV3-Large
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
--weight-decay 0.00004 --data-augmentation ssdlite
```
### Mask R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Keypoint R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
--lr-steps 36 43 --aspect-ratio-group-factor 3
```

352
old_files/coco_eval.py Normal file
View File

@@ -0,0 +1,352 @@
import json
import tempfile
import numpy as np
import copy
import time
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util
from collections import defaultdict
import utils
class CocoEvaluator(object):
def __init__(self, coco_gt, iou_types):
assert isinstance(iou_types, (list, tuple))
coco_gt = copy.deepcopy(coco_gt)
self.coco_gt = coco_gt
self.iou_types = iou_types
self.coco_eval = {}
for iou_type in iou_types:
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
self.img_ids = []
self.eval_imgs = {k: [] for k in iou_types}
def update(self, predictions):
img_ids = list(np.unique(list(predictions.keys())))
self.img_ids.extend(img_ids)
for iou_type in self.iou_types:
results = self.prepare(predictions, iou_type)
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
coco_eval = self.coco_eval[iou_type]
coco_eval.cocoDt = coco_dt
coco_eval.params.imgIds = list(img_ids)
img_ids, eval_imgs = evaluate(coco_eval)
self.eval_imgs[iou_type].append(eval_imgs)
def synchronize_between_processes(self):
for iou_type in self.iou_types:
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
def accumulate(self):
for coco_eval in self.coco_eval.values():
coco_eval.accumulate()
def summarize(self):
for iou_type, coco_eval in self.coco_eval.items():
print("IoU metric: {}".format(iou_type))
coco_eval.summarize()
def prepare(self, predictions, iou_type):
if iou_type == "bbox":
return self.prepare_for_coco_detection(predictions)
elif iou_type == "segm":
return self.prepare_for_coco_segmentation(predictions)
elif iou_type == "keypoints":
return self.prepare_for_coco_keypoint(predictions)
else:
raise ValueError("Unknown iou type {}".format(iou_type))
def prepare_for_coco_detection(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"bbox": box,
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
return coco_results
def prepare_for_coco_segmentation(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
scores = prediction["scores"]
labels = prediction["labels"]
masks = prediction["masks"]
masks = masks > 0.5
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
for mask in masks
]
for rle in rles:
rle["counts"] = rle["counts"].decode("utf-8")
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"segmentation": rle,
"score": scores[k],
}
for k, rle in enumerate(rles)
]
)
return coco_results
def prepare_for_coco_keypoint(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
keypoints = prediction["keypoints"]
keypoints = keypoints.flatten(start_dim=1).tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
'keypoints': keypoint,
"score": scores[k],
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results
def convert_to_xywh(boxes):
xmin, ymin, xmax, ymax = boxes.unbind(1)
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
def merge(img_ids, eval_imgs):
all_img_ids = utils.all_gather(img_ids)
all_eval_imgs = utils.all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
img_ids, eval_imgs = merge(img_ids, eval_imgs)
img_ids = list(img_ids)
eval_imgs = list(eval_imgs.flatten())
coco_eval.evalImgs = eval_imgs
coco_eval.params.imgIds = img_ids
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions
def createIndex(self):
# create index
# print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
# print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
maskUtils = mask_util
def loadRes(self, resFile):
"""
Load result file and return a result api object.
Args:
self (obj): coco object with ground truth annotations
resFile (str): file name of result file
Returns:
res (obj): result api object
"""
res = COCO()
res.dataset['images'] = [img for img in self.dataset['images']]
# print('Loading and preparing results...')
# tic = time.time()
if isinstance(resFile, torch._six.string_classes):
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id + 1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2] * bb[3]
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if 'bbox' not in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x2 - x1) * (y2 - y1)
ann['id'] = id + 1
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
createIndex(res)
return res
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
# tic = time.time()
# print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
# print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {
(imgId, catId): computeIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
self._paramsEval = copy.deepcopy(self.params)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return p.imgIds, evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################

252
old_files/coco_utils.py Normal file
View File

@@ -0,0 +1,252 @@
import copy
import os
from PIL import Image
import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO
import transforms as T
class FilterAndRemapCocoCategories(object):
def __init__(self, categories, remap=True):
self.categories = categories
self.remap = remap
def __call__(self, image, target):
anno = target["annotations"]
anno = [obj for obj in anno if obj["category_id"] in self.categories]
if not self.remap:
target["annotations"] = anno
return image, target
anno = copy.deepcopy(anno)
for obj in anno:
obj["category_id"] = self.categories.index(obj["category_id"])
target["annotations"] = anno
return image, target
def convert_coco_poly_to_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8)
mask = mask.any(dim=2)
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
class ConvertCocoPolysToMask(object):
def __call__(self, image, target):
w, h = image.size
image_id = target["image_id"]
image_id = torch.tensor([image_id])
anno = target["annotations"]
anno = [obj for obj in anno if obj['iscrowd'] == 0]
boxes = [obj["bbox"] for obj in anno]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_to_mask(segmentations, h, w)
keypoints = None
if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
num_keypoints = keypoints.shape[0]
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints
# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
target["area"] = area
target["iscrowd"] = iscrowd
return image, target
def _coco_remove_images_without_annotations(dataset, cat_list=None):
def _has_only_empty_bbox(anno):
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
def _count_visible_keypoints(anno):
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
min_keypoints_per_image = 10
def _has_valid_annotation(anno):
# if it's empty, there is no annotation
if len(anno) == 0:
return False
# if all boxes have close to zero area, there is no annotation
if _has_only_empty_bbox(anno):
return False
# keypoints task have a slight different critera for considering
# if an annotation is valid
if "keypoints" not in anno[0]:
return True
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
return True
return False
assert isinstance(dataset, torchvision.datasets.CocoDetection)
ids = []
for ds_idx, img_id in enumerate(dataset.ids):
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
anno = dataset.coco.loadAnns(ann_ids)
if cat_list:
anno = [obj for obj in anno if obj["category_id"] in cat_list]
if _has_valid_annotation(anno):
ids.append(ds_idx)
dataset = torch.utils.data.Subset(dataset, ids)
return dataset
def convert_to_coco_api(ds):
coco_ds = COCO()
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
ann_id = 1
dataset = {'images': [], 'categories': [], 'annotations': []}
categories = set()
for img_idx in range(len(ds)):
# find better way to get target
# targets = ds.get_annotations(img_idx)
img, targets = ds[img_idx]
image_id = targets["image_id"].item()
img_dict = {}
img_dict['id'] = image_id
img_dict['height'] = img.shape[-2]
img_dict['width'] = img.shape[-1]
dataset['images'].append(img_dict)
bboxes = targets["boxes"]
bboxes[:, 2:] -= bboxes[:, :2]
bboxes = bboxes.tolist()
labels = targets['labels'].tolist()
areas = targets['area'].tolist()
iscrowd = targets['iscrowd'].tolist()
if 'masks' in targets:
masks = targets['masks']
# make masks Fortran contiguous for coco_mask
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
if 'keypoints' in targets:
keypoints = targets['keypoints']
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
num_objs = len(bboxes)
for i in range(num_objs):
ann = {}
ann['image_id'] = image_id
ann['bbox'] = bboxes[i]
ann['category_id'] = labels[i]
categories.add(labels[i])
ann['area'] = areas[i]
ann['iscrowd'] = iscrowd[i]
ann['id'] = ann_id
if 'masks' in targets:
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
if 'keypoints' in targets:
ann['keypoints'] = keypoints[i]
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
dataset['annotations'].append(ann)
ann_id += 1
dataset['categories'] = [{'id': i} for i in sorted(categories)]
coco_ds.dataset = dataset
coco_ds.createIndex()
return coco_ds
def get_coco_api_from_dataset(dataset):
for _ in range(10):
if isinstance(dataset, torchvision.datasets.CocoDetection):
break
if isinstance(dataset, torch.utils.data.Subset):
dataset = dataset.dataset
if isinstance(dataset, torchvision.datasets.CocoDetection):
return dataset.coco
return convert_to_coco_api(dataset)
class CocoDetection(torchvision.datasets.CocoDetection):
def __init__(self, img_folder, ann_file, transforms):
super(CocoDetection, self).__init__(img_folder, ann_file)
self._transforms = transforms
def __getitem__(self, idx):
img, target = super(CocoDetection, self).__getitem__(idx)
image_id = self.ids[idx]
target = dict(image_id=image_id, annotations=target)
if self._transforms is not None:
img, target = self._transforms(img, target)
return img, target
def get_coco(root, image_set, transforms, mode='instances'):
anno_file_template = "{}_{}2017.json"
PATHS = {
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
}
t = [ConvertCocoPolysToMask()]
if transforms is not None:
t.append(transforms)
transforms = T.Compose(t)
img_folder, ann_file = PATHS[image_set]
img_folder = os.path.join(root, img_folder)
ann_file = os.path.join(root, ann_file)
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
if image_set == "train":
dataset = _coco_remove_images_without_annotations(dataset)
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
return dataset
def get_coco_kp(root, image_set, transforms):
return get_coco(root, image_set, transforms, mode="person_keypoints")

BIN
old_files/crap.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 357 KiB

BIN
old_files/crap2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 357 KiB

221
old_files/data.py Normal file
View File

@@ -0,0 +1,221 @@
# %%
import os
import numpy as np
import torch
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import json
import torch
from torchvision import transforms as T
import numpy as np
import os
import sys
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
from references.detection import utils, engine
import data_aug
import bbox_util
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def create_map(list_in, from_key, to_key):
cmap = dict()
for l in list_in:
cmap[l[from_key]] = l[to_key]
return cmap
class iNaturalistDataset(torch.utils.data.Dataset):
def __init__(self, validation=False, train=False, transforms = None):
self.validation = validation
self.train = train
self.transforms = transforms
if validation:
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
elif train:
json_path = os.path.join(
PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
)
with open(json_path, "r") as rj:
f = json.load(rj)
categories = list()
image_info = dict()
for category in f["categories"]:
if category["supercategory"] == "Aves":
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
print(category['name'])
categories.append(category)
categories = sorted(categories, key=lambda k: k["name"])
for idx, cat in enumerate(categories):
cat["new_id"] = idx + 1
orig_to_new_id = create_map(categories, "id", "new_id")
for annot in f["annotations"]:
if annot["category_id"] in orig_to_new_id:
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
id = annot["image_id"]
if id not in image_info:
image_info[id] = dict()
annot["bbox"][2] += annot["bbox"][0]
annot["bbox"][3] += annot["bbox"][1]
image_info[id]["annotation"] = annot
for img in f["images"]:
id = img["id"]
path = os.path.join(PATH_ROOT, img["file_name"])
height = img["height"]
width = img["width"]
if id in image_info:
image_info[id].update({"path": path, "height": height, "width": width})
for idx, (id, im_in) in enumerate(image_info.items()):
im_in["idx"] = idx
self.images = image_info
self.categories = categories
self.idx_to_id = [x for x in self.images]
self.num_classes = len(self.categories) + 1
self.num_samples = len(self.images)
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
idd = self.idx_to_id[idx]
c_image = self.images[idd]
img_path = c_image["path"]
img = Image.open(img_path).convert("RGB")
annot = c_image["annotation"]
bbox = annot["bbox"]
boxes = bbox
target = dict()
target["boxes"] = torch.as_tensor([boxes])
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
target['image_id'] = torch.tensor([annot['image_id']])
target['area'] = torch.as_tensor([annot['area']])
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
# %%
# v = iNaturalistDataset(validation=True)
# v = iNaturalistDataset(validation= True)
# o = v[10]
# %%
# oimage = t.tensor(o[0]*255, dtype=t.uint8)
# import matplotlib.pyplot as plt
# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
# plt.imshow(ox.permute([1,2,0]))
# plt.savefig('crap2.png')
def get_model(num_classes):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2 # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
import transforms as T
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
from engine import train_one_epoch, evaluate
import utils
# %%
def run():
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
train_data_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
)
val_data_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
)
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
num_classes = 2
model = get_model(num_classes)
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
print(epoch)
torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
# train for one epoch, printing every 10 iterations
engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
# update the learning rate
lr_scheduler.step()
torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
# evaluate on the test dataset
engine.evaluate(model, val_data_loader, device=device)
if __name__ == "__main__":
run()
# # %%
# json_path = os.path.join(
# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
# )
# with open(json_path, "r") as rj:
# f = json.load(rj)
# # %%
# image_id: 2358

110
old_files/engine.py Normal file
View File

@@ -0,0 +1,110 @@
import math
import sys
import time
import torch
import torchvision.models.detection.mask_rcnn
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
if epoch == 0:
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
def _get_iou_types(model):
model_without_ddp = model
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model_without_ddp = model.module
iou_types = ["bbox"]
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
iou_types.append("segm")
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
iou_types.append("keypoints")
return iou_types
@torch.no_grad()
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator

28
old_files/fine_tuned.py Normal file
View File

@@ -0,0 +1,28 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 1 # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = list(backbone.modules())[-3].out_channels
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
output_size=7,
sampling_ratio=2)
model = FasterRCNN(backbone,
num_classes=2,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
# %%

View File

@@ -15,19 +15,19 @@ sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 3) model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2)
model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_1.pth')) model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_9.pth'))
model.eval() model.eval()
model.to('cuda') model.to('cuda')
#img = r'D:\ishan\ml\inaturalist\test2017\00a903fa1d23b2f8f28248e81bc1c4a4.jpg'
#img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210617093423.mp4_023.jpg'
img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210609095848.mp4_133.jpg'
import random import random
rtdir = r'J:\hummingbird_imagenet\hummingbird' rtdir = r'J:\hummingbird_imagenet\hummingbird'
ff = os.listdir(rtdir) ff = os.listdir(rtdir)
# %%
img = os.path.join(rtdir,random.choice(ff)) img = os.path.join(rtdir,random.choice(ff))
image = cv2.imread(img)[:,:,::-1].copy() image = cv2.imread(img)[:,:,::-1].copy()
o = T.ToTensor()(image).cuda() o = T.ToTensor()(image).cuda()
@@ -43,7 +43,6 @@ plt.imshow(ox.permute([1,2,0]))
# %% # %%
from data import iNaturalistDataset from data import iNaturalistDataset
sd = iNaturalistDataset(validation=True) sd = iNaturalistDataset(validation=True)
# # %%

233
old_files/train.py Normal file
View File

@@ -0,0 +1,233 @@
r"""PyTorch Detection Training.
To run in a multi-gpu environment, use the distributed launcher::
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
train.py ... --world-size $NGPU
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
--lr 0.02 --batch-size 2 --world-size 8
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
Also, if you train Keypoint R-CNN, the default hyperparameters are
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
Because the number of images is smaller in the person keypoint subset of COCO,
the number of epochs should be adapted so that we have the same number of iterations.
"""
import datetime
import os
import time
import torch
import torch.utils.data
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn
from coco_utils import get_coco, get_coco_kp
from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from engine import train_one_epoch, evaluate
import presets
import utils
def get_dataset(name, image_set, transform, data_path):
paths = {
"coco": (data_path, get_coco, 91),
"coco_kp": (data_path, get_coco_kp, 2)
}
p, ds_fn, num_classes = paths[name]
ds = ds_fn(p, image_set=image_set, transforms=transform)
return ds, num_classes
def get_transform(train, data_augmentation):
return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
def get_args_parser(add_help=True):
import argparse
parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
parser.add_argument('--dataset', default='coco', help='dataset')
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
parser.add_argument('--device', default='cuda', help='device')
parser.add_argument('-b', '--batch-size', default=2, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--epochs', default=26, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--lr', default=0.02, type=float,
help='initial learning rate, 0.02 is the default value for training '
'on 8 gpus and 2 images_per_gpu')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
parser.add_argument('--lr-step-size', default=8, type=int,
help='decrease lr every step-size epochs (multisteplr scheduler only)')
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
help='decrease lr every step-size epochs (multisteplr scheduler only)')
parser.add_argument('--lr-gamma', default=0.1, type=float,
help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
parser.add_argument('--trainable-backbone-layers', default=None, type=int,
help='number of trainable layers of backbone')
parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
parser.add_argument(
"--sync-bn",
dest="sync_bn",
help="Use sync batch norm",
action="store_true",
)
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--pretrained",
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
)
# distributed training parameters
parser.add_argument('--world-size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
return parser
def main(args):
if args.output_dir:
utils.mkdir(args.output_dir)
utils.init_distributed_mode(args)
print(args)
device = torch.device(args.device)
# Data loading code
print("Loading data")
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
args.data_path)
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
print("Creating data loaders")
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
else:
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
if args.aspect_ratio_group_factor >= 0:
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
else:
train_batch_sampler = torch.utils.data.BatchSampler(
train_sampler, args.batch_size, drop_last=True)
data_loader = torch.utils.data.DataLoader(
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1,
sampler=test_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
print("Creating model")
kwargs = {
"trainable_backbone_layers": args.trainable_backbone_layers
}
if "rcnn" in args.model:
if args.rpn_score_thresh is not None:
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
**kwargs)
model.to(device)
if args.distributed and args.sync_bn:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
model_without_ddp = model
if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
args.lr_scheduler = args.lr_scheduler.lower()
if args.lr_scheduler == 'multisteplr':
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
elif args.lr_scheduler == 'cosineannealinglr':
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
else:
raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
"are supported.".format(args.lr_scheduler))
if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu')
model_without_ddp.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
args.start_epoch = checkpoint['epoch'] + 1
if args.test_only:
evaluate(model, data_loader_test, device=device)
return
print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
lr_scheduler.step()
if args.output_dir:
checkpoint = {
'model': model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(),
'args': args,
'epoch': epoch
}
utils.save_on_master(
checkpoint,
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
utils.save_on_master(
checkpoint,
os.path.join(args.output_dir, 'checkpoint.pth'))
# evaluate after every epoch
evaluate(model, data_loader_test, device=device)
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
if __name__ == "__main__":
args = get_args_parser().parse_args()
main(args)

239
old_files/transforms.py Normal file
View File

@@ -0,0 +1,239 @@
import torch
import torchvision
from torch import nn, Tensor
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T
from typing import List, Tuple, Dict, Optional
def _flip_coco_person_keypoints(kps, width):
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
flipped_data = kps[:, flip_inds]
flipped_data[..., 0] = width - flipped_data[..., 0]
# Maintain COCO convention that if visibility == 0, then x, y = 0
inds = flipped_data[..., 2] == 0
flipped_data[inds] = 0
return flipped_data
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class RandomHorizontalFlip(T.RandomHorizontalFlip):
def forward(self, image: Tensor,
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
if torch.rand(1) < self.p:
image = F.hflip(image)
if target is not None:
width, _ = F._get_image_size(image)
target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
if "keypoints" in target:
keypoints = target["keypoints"]
keypoints = _flip_coco_person_keypoints(keypoints, width)
target["keypoints"] = keypoints
return image, target
class ToTensor(nn.Module):
def forward(self, image: Tensor,
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
image = F.to_tensor(image)
return image, target
class RandomIoUCrop(nn.Module):
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5,
max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40):
super().__init__()
# Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
self.min_scale = min_scale
self.max_scale = max_scale
self.min_aspect_ratio = min_aspect_ratio
self.max_aspect_ratio = max_aspect_ratio
if sampler_options is None:
sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
self.options = sampler_options
self.trials = trials
def forward(self, image: Tensor,
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
if target is None:
raise ValueError("The targets can't be None for this transform.")
if isinstance(image, torch.Tensor):
if image.ndimension() not in {2, 3}:
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
elif image.ndimension() == 2:
image = image.unsqueeze(0)
orig_w, orig_h = F._get_image_size(image)
while True:
# sample an option
idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
min_jaccard_overlap = self.options[idx]
if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option
return image, target
for _ in range(self.trials):
# check the aspect ratio limitations
r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
new_w = int(orig_w * r[0])
new_h = int(orig_h * r[1])
aspect_ratio = new_w / new_h
if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
continue
# check for 0 area crops
r = torch.rand(2)
left = int((orig_w - new_w) * r[0])
top = int((orig_h - new_h) * r[1])
right = left + new_w
bottom = top + new_h
if left == right or top == bottom:
continue
# check for any valid boxes with centers within the crop area
cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
if not is_within_crop_area.any():
continue
# check at least 1 box with jaccard limitations
boxes = target["boxes"][is_within_crop_area]
ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]],
dtype=boxes.dtype, device=boxes.device))
if ious.max() < min_jaccard_overlap:
continue
# keep only valid boxes and perform cropping
target["boxes"] = boxes
target["labels"] = target["labels"][is_within_crop_area]
target["boxes"][:, 0::2] -= left
target["boxes"][:, 1::2] -= top
target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
image = F.crop(image, top, left, new_h, new_w)
return image, target
class RandomZoomOut(nn.Module):
def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5):
super().__init__()
if fill is None:
fill = [0., 0., 0.]
self.fill = fill
self.side_range = side_range
if side_range[0] < 1. or side_range[0] > side_range[1]:
raise ValueError("Invalid canvas side range provided {}.".format(side_range))
self.p = p
@torch.jit.unused
def _get_fill_value(self, is_pil):
# type: (bool) -> int
# We fake the type to make it work on JIT
return tuple(int(x) for x in self.fill) if is_pil else 0
def forward(self, image: Tensor,
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
if isinstance(image, torch.Tensor):
if image.ndimension() not in {2, 3}:
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
elif image.ndimension() == 2:
image = image.unsqueeze(0)
if torch.rand(1) < self.p:
return image, target
orig_w, orig_h = F._get_image_size(image)
r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
canvas_width = int(orig_w * r)
canvas_height = int(orig_h * r)
r = torch.rand(2)
left = int((canvas_width - orig_w) * r[0])
top = int((canvas_height - orig_h) * r[1])
right = canvas_width - (left + orig_w)
bottom = canvas_height - (top + orig_h)
if torch.jit.is_scripting():
fill = 0
else:
fill = self._get_fill_value(F._is_pil_image(image))
image = F.pad(image, [left, top, right, bottom], fill=fill)
if isinstance(image, torch.Tensor):
v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \
image[..., :, (left + orig_w):] = v
if target is not None:
target["boxes"][:, 0::2] += left
target["boxes"][:, 1::2] += top
return image, target
class RandomPhotometricDistort(nn.Module):
def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5),
hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5):
super().__init__()
self._brightness = T.ColorJitter(brightness=brightness)
self._contrast = T.ColorJitter(contrast=contrast)
self._hue = T.ColorJitter(hue=hue)
self._saturation = T.ColorJitter(saturation=saturation)
self.p = p
def forward(self, image: Tensor,
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
if isinstance(image, torch.Tensor):
if image.ndimension() not in {2, 3}:
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
elif image.ndimension() == 2:
image = image.unsqueeze(0)
r = torch.rand(7)
if r[0] < self.p:
image = self._brightness(image)
contrast_before = r[1] < 0.5
if contrast_before:
if r[2] < self.p:
image = self._contrast(image)
if r[3] < self.p:
image = self._saturation(image)
if r[4] < self.p:
image = self._hue(image)
if not contrast_before:
if r[5] < self.p:
image = self._contrast(image)
if r[6] < self.p:
channels = F._get_image_num_channels(image)
permutation = torch.randperm(channels)
is_pil = F._is_pil_image(image)
if is_pil:
image = F.to_tensor(image)
image = image[..., permutation, :, :]
if is_pil:
image = F.to_pil_image(image)
return image, target

295
old_files/utils.py Normal file
View File

@@ -0,0 +1,295 @@
from collections import defaultdict, deque
import datetime
import errno
import os
import time
import torch
import torch.distributed as dist
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
if not is_dist_avail_and_initialized():
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
data_list = [None] * world_size
dist.all_gather_object(data_list, data)
return data_list
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.all_reduce(values)
if average:
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append(
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ''
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}')
data_time = SmoothedValue(fmt='{avg:.4f}')
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
if torch.cuda.is_available():
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}',
'max mem: {memory:.0f}'
])
else:
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0 or i == len(iterable) - 1:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available():
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB))
else:
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time)))
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {} ({:.4f} s / it)'.format(
header, total_time_str, total_time / len(iterable)))
def collate_fn(batch):
return tuple(zip(*batch))
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
def f(x):
if x >= warmup_iters:
return 1
alpha = float(x) / warmup_iters
return warmup_factor * (1 - alpha) + alpha
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
def mkdir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop('force', False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def save_on_master(*args, **kwargs):
if is_main_process():
torch.save(*args, **kwargs)
def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE'])
args.gpu = int(os.environ['LOCAL_RANK'])
elif 'SLURM_PROCID' in os.environ:
args.rank = int(os.environ['SLURM_PROCID'])
args.gpu = args.rank % torch.cuda.device_count()
else:
print('Not using distributed mode')
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
print('| distributed init (rank {}): {}'.format(
args.rank, args.dist_url), flush=True)
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
torch.distributed.barrier()
setup_for_distributed(args.rank == 0)

233
train.py
View File

@@ -1,233 +0,0 @@
r"""PyTorch Detection Training.
To run in a multi-gpu environment, use the distributed launcher::
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
train.py ... --world-size $NGPU
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
--lr 0.02 --batch-size 2 --world-size 8
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
Also, if you train Keypoint R-CNN, the default hyperparameters are
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
Because the number of images is smaller in the person keypoint subset of COCO,
the number of epochs should be adapted so that we have the same number of iterations.
"""
import datetime
import os
import time
import torch
import torch.utils.data
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn
from coco_utils import get_coco, get_coco_kp
from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from engine import train_one_epoch, evaluate
import presets
import utils
def get_dataset(name, image_set, transform, data_path):
paths = {
"coco": (data_path, get_coco, 91),
"coco_kp": (data_path, get_coco_kp, 2)
}
p, ds_fn, num_classes = paths[name]
ds = ds_fn(p, image_set=image_set, transforms=transform)
return ds, num_classes
def get_transform(train, data_augmentation):
return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
def get_args_parser(add_help=True):
import argparse
parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
parser.add_argument('--dataset', default='coco', help='dataset')
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
parser.add_argument('--device', default='cuda', help='device')
parser.add_argument('-b', '--batch-size', default=2, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--epochs', default=26, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--lr', default=0.02, type=float,
help='initial learning rate, 0.02 is the default value for training '
'on 8 gpus and 2 images_per_gpu')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
parser.add_argument('--lr-step-size', default=8, type=int,
help='decrease lr every step-size epochs (multisteplr scheduler only)')
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
help='decrease lr every step-size epochs (multisteplr scheduler only)')
parser.add_argument('--lr-gamma', default=0.1, type=float,
help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
parser.add_argument('--trainable-backbone-layers', default=None, type=int,
help='number of trainable layers of backbone')
parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
parser.add_argument(
"--sync-bn",
dest="sync_bn",
help="Use sync batch norm",
action="store_true",
)
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--pretrained",
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
)
# distributed training parameters
parser.add_argument('--world-size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
return parser
def main(args):
if args.output_dir:
utils.mkdir(args.output_dir)
utils.init_distributed_mode(args)
print(args)
device = torch.device(args.device)
# Data loading code
print("Loading data")
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
args.data_path)
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
print("Creating data loaders")
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
else:
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
if args.aspect_ratio_group_factor >= 0:
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
else:
train_batch_sampler = torch.utils.data.BatchSampler(
train_sampler, args.batch_size, drop_last=True)
data_loader = torch.utils.data.DataLoader(
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1,
sampler=test_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
print("Creating model")
kwargs = {
"trainable_backbone_layers": args.trainable_backbone_layers
}
if "rcnn" in args.model:
if args.rpn_score_thresh is not None:
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
**kwargs)
model.to(device)
if args.distributed and args.sync_bn:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
model_without_ddp = model
if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
args.lr_scheduler = args.lr_scheduler.lower()
if args.lr_scheduler == 'multisteplr':
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
elif args.lr_scheduler == 'cosineannealinglr':
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
else:
raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
"are supported.".format(args.lr_scheduler))
if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu')
model_without_ddp.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
args.start_epoch = checkpoint['epoch'] + 1
if args.test_only:
evaluate(model, data_loader_test, device=device)
return
print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
lr_scheduler.step()
if args.output_dir:
checkpoint = {
'model': model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(),
'args': args,
'epoch': epoch
}
utils.save_on_master(
checkpoint,
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
utils.save_on_master(
checkpoint,
os.path.join(args.output_dir, 'checkpoint.pth'))
# evaluate after every epoch
evaluate(model, data_loader_test, device=device)
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
if __name__ == "__main__":
args = get_args_parser().parse_args()
main(args)

1
vision Submodule

Submodule vision added at a83b9a17e4