yacwc
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
*.pth
|
*.pth
|
||||||
|
example
|
||||||
|
|||||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.formatting.provider": "black"
|
||||||
|
}
|
||||||
BIN
__pycache__/coco_eval.cpython-39.pyc
Normal file
BIN
__pycache__/coco_eval.cpython-39.pyc
Normal file
Binary file not shown.
BIN
__pycache__/coco_utils.cpython-39.pyc
Normal file
BIN
__pycache__/coco_utils.cpython-39.pyc
Normal file
Binary file not shown.
BIN
__pycache__/data.cpython-39.pyc
Normal file
BIN
__pycache__/data.cpython-39.pyc
Normal file
Binary file not shown.
BIN
__pycache__/engine.cpython-39.pyc
Normal file
BIN
__pycache__/engine.cpython-39.pyc
Normal file
Binary file not shown.
BIN
__pycache__/transforms.cpython-39.pyc
Normal file
BIN
__pycache__/transforms.cpython-39.pyc
Normal file
Binary file not shown.
BIN
__pycache__/utils.cpython-39.pyc
Normal file
BIN
__pycache__/utils.cpython-39.pyc
Normal file
Binary file not shown.
98
data.py
98
data.py
@@ -1,18 +1,19 @@
|
|||||||
# %%
|
# %%
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
import torchvision
|
import torchvision
|
||||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
from collections import defaultdict as ddict
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
import json
|
import json
|
||||||
import torch
|
import torch
|
||||||
from torchvision import datasets, transforms as T
|
from torchvision import transforms as T
|
||||||
import cv2
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
||||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||||
from references.detection import utils, engine
|
from references.detection import utils, engine
|
||||||
@@ -28,9 +29,6 @@ def get_transform(train):
|
|||||||
transforms.append(T.RandomHorizontalFlip(0.5))
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
return T.Compose(transforms)
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
|
||||||
IMAGE_MEAN = [0.485, 0.456, 0.406]
|
|
||||||
IMAGE_STD = [0.229, 0.224, 0.225]
|
|
||||||
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
||||||
|
|
||||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||||
@@ -43,12 +41,12 @@ def create_map(list_in, from_key, to_key):
|
|||||||
|
|
||||||
|
|
||||||
class iNaturalistDataset(torch.utils.data.Dataset):
|
class iNaturalistDataset(torch.utils.data.Dataset):
|
||||||
def __init__(self, validation=False, train=False):
|
def __init__(self, validation=False, train=False, transforms = None):
|
||||||
|
|
||||||
self.validation = validation
|
self.validation = validation
|
||||||
self.train = train
|
self.train = train
|
||||||
|
self.transforms = transforms
|
||||||
|
|
||||||
self.transforms = T.Compose([T.Resize(600, max_size=1024), T.ToTensor()])
|
|
||||||
|
|
||||||
if validation:
|
if validation:
|
||||||
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
|
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
|
||||||
@@ -65,7 +63,7 @@ class iNaturalistDataset(torch.utils.data.Dataset):
|
|||||||
|
|
||||||
for category in f["categories"]:
|
for category in f["categories"]:
|
||||||
if category["supercategory"] == "Aves":
|
if category["supercategory"] == "Aves":
|
||||||
if category['name'] in ['Archilochus colubris','Icterus galbula']:
|
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
|
||||||
print(category['name'])
|
print(category['name'])
|
||||||
categories.append(category)
|
categories.append(category)
|
||||||
|
|
||||||
@@ -101,44 +99,35 @@ class iNaturalistDataset(torch.utils.data.Dataset):
|
|||||||
self.idx_to_id = [x for x in self.images]
|
self.idx_to_id = [x for x in self.images]
|
||||||
self.num_classes = len(self.categories) + 1
|
self.num_classes = len(self.categories) + 1
|
||||||
self.num_samples = len(self.images)
|
self.num_samples = len(self.images)
|
||||||
self.transforms = [
|
|
||||||
data_aug.RandomHorizontalFlip(0.5),
|
|
||||||
data_aug.Resize(600),
|
|
||||||
]
|
|
||||||
self.pre_transform = T.Compose([T.ToTensor()])#],T.Normalize(mean=[0.485, 0.456, 0.406],
|
|
||||||
#std=[0.229, 0.224, 0.225])])
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.num_samples
|
return self.num_samples
|
||||||
|
|
||||||
def transform(self, img, bbox):
|
|
||||||
|
|
||||||
for x in self.transforms:
|
|
||||||
img, bbox = x(img, bbox)
|
|
||||||
img = self.pre_transform(img)
|
|
||||||
return img, bbox
|
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
idd = self.idx_to_id[idx]
|
idd = self.idx_to_id[idx]
|
||||||
c_image = self.images[idd]
|
c_image = self.images[idd]
|
||||||
# print(c_image, idx, self.validation, self.train)
|
img_path = c_image["path"]
|
||||||
# breakpoint()
|
img = Image.open(img_path).convert("RGB")
|
||||||
image = np.asarray(cv2.imread(c_image["path"])[:,:,::-1].copy(),dtype=np.float32)
|
|
||||||
annot = c_image["annotation"]
|
annot = c_image["annotation"]
|
||||||
bbox = annot["bbox"]
|
bbox = annot["bbox"]
|
||||||
bbox.append(annot["new_category_id"])
|
boxes = bbox
|
||||||
bbox = np.asarray([bbox], dtype=np.float32)
|
|
||||||
|
|
||||||
image, bbox = self.transform(image.copy(), bbox.copy())
|
|
||||||
boxes = torch.as_tensor(bbox[:,:4], dtype=torch.float32)
|
|
||||||
target = dict()
|
target = dict()
|
||||||
target["boxes"] = boxes
|
target["boxes"] = torch.as_tensor([boxes])
|
||||||
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
||||||
target['image_id'] = torch.tensor([annot['image_id']])
|
target['image_id'] = torch.tensor([annot['image_id']])
|
||||||
target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
|
target['area'] = torch.as_tensor([annot['area']])
|
||||||
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
||||||
|
|
||||||
return image, target
|
|
||||||
|
if self.transforms is not None:
|
||||||
|
img, target = self.transforms(img, target)
|
||||||
|
|
||||||
|
return img, target
|
||||||
|
# %%
|
||||||
|
# v = iNaturalistDataset(validation=True)
|
||||||
|
|
||||||
|
|
||||||
# v = iNaturalistDataset(validation= True)
|
# v = iNaturalistDataset(validation= True)
|
||||||
# o = v[10]
|
# o = v[10]
|
||||||
@@ -149,24 +138,45 @@ class iNaturalistDataset(torch.utils.data.Dataset):
|
|||||||
# plt.imshow(ox.permute([1,2,0]))
|
# plt.imshow(ox.permute([1,2,0]))
|
||||||
# plt.savefig('crap2.png')
|
# plt.savefig('crap2.png')
|
||||||
|
|
||||||
|
def get_model(num_classes):
|
||||||
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||||
|
num_classes = 2 # 1 class (person) + background
|
||||||
|
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||||
|
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
import transforms as T
|
||||||
|
|
||||||
|
def get_transform(train):
|
||||||
|
transforms = []
|
||||||
|
transforms.append(T.ToTensor())
|
||||||
|
if train:
|
||||||
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
from engine import train_one_epoch, evaluate
|
||||||
|
import utils
|
||||||
# %%
|
# %%
|
||||||
def run():
|
def run():
|
||||||
val_dataset = iNaturalistDataset(validation=True)
|
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
|
||||||
train_dataset = iNaturalistDataset(train=True)
|
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
|
||||||
|
|
||||||
|
|
||||||
train_data_loader = torch.utils.data.DataLoader(
|
train_data_loader = torch.utils.data.DataLoader(
|
||||||
train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn
|
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
)
|
)
|
||||||
val_data_loader = torch.utils.data.DataLoader(
|
val_data_loader = torch.utils.data.DataLoader(
|
||||||
val_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn
|
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
)
|
|
||||||
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
|
|
||||||
pretrained=True, num_classes=train_dataset.num_classes, progress=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
num_classes = 2
|
||||||
|
|
||||||
|
|
||||||
|
model = get_model(num_classes)
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
# construct an optimizer
|
# construct an optimizer
|
||||||
params = [p for p in model.parameters() if p.requires_grad]
|
params = [p for p in model.parameters() if p.requires_grad]
|
||||||
optimizer = torch.optim.SGD(params, lr=0.005,
|
optimizer = torch.optim.SGD(params, lr=0.005,
|
||||||
|
|||||||
BIN
data_aug/__pycache__/bbox_util.cpython-39.pyc
Normal file
BIN
data_aug/__pycache__/bbox_util.cpython-39.pyc
Normal file
Binary file not shown.
BIN
data_aug/__pycache__/data_aug.cpython-39.pyc
Normal file
BIN
data_aug/__pycache__/data_aug.cpython-39.pyc
Normal file
Binary file not shown.
300
data_aug/bbox_util.py
Normal file
300
data_aug/bbox_util.py
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def draw_rect(im, cords, color = None):
|
||||||
|
"""Draw the rectangle on the image
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
im : numpy.ndarray
|
||||||
|
numpy image
|
||||||
|
|
||||||
|
cords: numpy.ndarray
|
||||||
|
Numpy array containing bounding boxes of shape `N X 4` where N is the
|
||||||
|
number of bounding boxes and the bounding boxes are represented in the
|
||||||
|
format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
numpy image with bounding boxes drawn on it
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
im = im.copy()
|
||||||
|
|
||||||
|
cords = cords[:,:4]
|
||||||
|
cords = cords.reshape(-1,4)
|
||||||
|
if not color:
|
||||||
|
color = [255,255,255]
|
||||||
|
for cord in cords:
|
||||||
|
|
||||||
|
pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3])
|
||||||
|
|
||||||
|
pt1 = int(pt1[0]), int(pt1[1])
|
||||||
|
pt2 = int(pt2[0]), int(pt2[1])
|
||||||
|
|
||||||
|
im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200))
|
||||||
|
return im
|
||||||
|
|
||||||
|
def bbox_area(bbox):
|
||||||
|
return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1])
|
||||||
|
|
||||||
|
def clip_box(bbox, clip_box, alpha):
|
||||||
|
"""Clip the bounding boxes to the borders of an image
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
bbox: numpy.ndarray
|
||||||
|
Numpy array containing bounding boxes of shape `N X 4` where N is the
|
||||||
|
number of bounding boxes and the bounding boxes are represented in the
|
||||||
|
format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
clip_box: numpy.ndarray
|
||||||
|
An array of shape (4,) specifying the diagonal co-ordinates of the image
|
||||||
|
The coordinates are represented in the format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
alpha: float
|
||||||
|
If the fraction of a bounding box left in the image after being clipped is
|
||||||
|
less than `alpha` the bounding box is dropped.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the
|
||||||
|
number of bounding boxes left are being clipped and the bounding boxes are represented in the
|
||||||
|
format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
"""
|
||||||
|
ar_ = (bbox_area(bbox))
|
||||||
|
x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1)
|
||||||
|
y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1)
|
||||||
|
x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1)
|
||||||
|
y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1)
|
||||||
|
|
||||||
|
bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:]))
|
||||||
|
|
||||||
|
delta_area = ((ar_ - bbox_area(bbox))/ar_)
|
||||||
|
|
||||||
|
mask = (delta_area < (1 - alpha)).astype(int)
|
||||||
|
|
||||||
|
bbox = bbox[mask == 1,:]
|
||||||
|
|
||||||
|
|
||||||
|
return bbox
|
||||||
|
|
||||||
|
|
||||||
|
def rotate_im(image, angle):
|
||||||
|
"""Rotate the image.
|
||||||
|
|
||||||
|
Rotate the image such that the rotated image is enclosed inside the tightest
|
||||||
|
rectangle. The area not occupied by the pixels of the original image is colored
|
||||||
|
black.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
image : numpy.ndarray
|
||||||
|
numpy image
|
||||||
|
|
||||||
|
angle : float
|
||||||
|
angle by which the image is to be rotated
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Rotated Image
|
||||||
|
|
||||||
|
"""
|
||||||
|
# grab the dimensions of the image and then determine the
|
||||||
|
# centre
|
||||||
|
(h, w) = image.shape[:2]
|
||||||
|
(cX, cY) = (w // 2, h // 2)
|
||||||
|
|
||||||
|
# grab the rotation matrix (applying the negative of the
|
||||||
|
# angle to rotate clockwise), then grab the sine and cosine
|
||||||
|
# (i.e., the rotation components of the matrix)
|
||||||
|
M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
|
||||||
|
cos = np.abs(M[0, 0])
|
||||||
|
sin = np.abs(M[0, 1])
|
||||||
|
|
||||||
|
# compute the new bounding dimensions of the image
|
||||||
|
nW = int((h * sin) + (w * cos))
|
||||||
|
nH = int((h * cos) + (w * sin))
|
||||||
|
|
||||||
|
# adjust the rotation matrix to take into account translation
|
||||||
|
M[0, 2] += (nW / 2) - cX
|
||||||
|
M[1, 2] += (nH / 2) - cY
|
||||||
|
|
||||||
|
# perform the actual rotation and return the image
|
||||||
|
image = cv2.warpAffine(image, M, (nW, nH))
|
||||||
|
|
||||||
|
# image = cv2.resize(image, (w,h))
|
||||||
|
return image
|
||||||
|
|
||||||
|
def get_corners(bboxes):
|
||||||
|
|
||||||
|
"""Get corners of bounding boxes
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
bboxes: numpy.ndarray
|
||||||
|
Numpy array containing bounding boxes of shape `N X 4` where N is the
|
||||||
|
number of bounding boxes and the bounding boxes are represented in the
|
||||||
|
format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Numpy array of shape `N x 8` containing N bounding boxes each described by their
|
||||||
|
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
|
||||||
|
|
||||||
|
"""
|
||||||
|
width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1)
|
||||||
|
height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1)
|
||||||
|
|
||||||
|
x1 = bboxes[:,0].reshape(-1,1)
|
||||||
|
y1 = bboxes[:,1].reshape(-1,1)
|
||||||
|
|
||||||
|
x2 = x1 + width
|
||||||
|
y2 = y1
|
||||||
|
|
||||||
|
x3 = x1
|
||||||
|
y3 = y1 + height
|
||||||
|
|
||||||
|
x4 = bboxes[:,2].reshape(-1,1)
|
||||||
|
y4 = bboxes[:,3].reshape(-1,1)
|
||||||
|
|
||||||
|
corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
|
||||||
|
|
||||||
|
return corners
|
||||||
|
|
||||||
|
def rotate_box(corners,angle, cx, cy, h, w):
|
||||||
|
|
||||||
|
"""Rotate the bounding box.
|
||||||
|
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
corners : numpy.ndarray
|
||||||
|
Numpy array of shape `N x 8` containing N bounding boxes each described by their
|
||||||
|
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
|
||||||
|
|
||||||
|
angle : float
|
||||||
|
angle by which the image is to be rotated
|
||||||
|
|
||||||
|
cx : int
|
||||||
|
x coordinate of the center of image (about which the box will be rotated)
|
||||||
|
|
||||||
|
cy : int
|
||||||
|
y coordinate of the center of image (about which the box will be rotated)
|
||||||
|
|
||||||
|
h : int
|
||||||
|
height of the image
|
||||||
|
|
||||||
|
w : int
|
||||||
|
width of the image
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their
|
||||||
|
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
|
||||||
|
"""
|
||||||
|
|
||||||
|
corners = corners.reshape(-1,2)
|
||||||
|
corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0]))))
|
||||||
|
|
||||||
|
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
|
||||||
|
|
||||||
|
|
||||||
|
cos = np.abs(M[0, 0])
|
||||||
|
sin = np.abs(M[0, 1])
|
||||||
|
|
||||||
|
nW = int((h * sin) + (w * cos))
|
||||||
|
nH = int((h * cos) + (w * sin))
|
||||||
|
# adjust the rotation matrix to take into account translation
|
||||||
|
M[0, 2] += (nW / 2) - cx
|
||||||
|
M[1, 2] += (nH / 2) - cy
|
||||||
|
# Prepare the vector to be transformed
|
||||||
|
calculated = np.dot(M,corners.T).T
|
||||||
|
|
||||||
|
calculated = calculated.reshape(-1,8)
|
||||||
|
|
||||||
|
return calculated
|
||||||
|
|
||||||
|
|
||||||
|
def get_enclosing_box(corners):
|
||||||
|
"""Get an enclosing box for ratated corners of a bounding box
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
corners : numpy.ndarray
|
||||||
|
Numpy array of shape `N x 8` containing N bounding boxes each described by their
|
||||||
|
corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the
|
||||||
|
number of bounding boxes and the bounding boxes are represented in the
|
||||||
|
format `x1 y1 x2 y2`
|
||||||
|
|
||||||
|
"""
|
||||||
|
x_ = corners[:,[0,2,4,6]]
|
||||||
|
y_ = corners[:,[1,3,5,7]]
|
||||||
|
|
||||||
|
xmin = np.min(x_,1).reshape(-1,1)
|
||||||
|
ymin = np.min(y_,1).reshape(-1,1)
|
||||||
|
xmax = np.max(x_,1).reshape(-1,1)
|
||||||
|
ymax = np.max(y_,1).reshape(-1,1)
|
||||||
|
|
||||||
|
final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:]))
|
||||||
|
|
||||||
|
return final
|
||||||
|
|
||||||
|
|
||||||
|
def letterbox_image(img, inp_dim):
|
||||||
|
'''resize image with unchanged aspect ratio using padding
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
img : numpy.ndarray
|
||||||
|
Image
|
||||||
|
|
||||||
|
inp_dim: tuple(int)
|
||||||
|
shape of the reszied image
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndarray:
|
||||||
|
Resized image
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
inp_dim = (inp_dim, inp_dim)
|
||||||
|
img_w, img_h = img.shape[1], img.shape[0]
|
||||||
|
w, h = inp_dim
|
||||||
|
new_w = int(img_w * min(w/img_w, h/img_h))
|
||||||
|
new_h = int(img_h * min(w/img_w, h/img_h))
|
||||||
|
resized_image = cv2.resize(img, (new_w,new_h))
|
||||||
|
|
||||||
|
canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
|
||||||
|
|
||||||
|
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
|
||||||
|
|
||||||
|
return canvas
|
||||||
856
data_aug/data_aug.py
Normal file
856
data_aug/data_aug.py
Normal file
@@ -0,0 +1,856 @@
|
|||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from bbox_util import *
|
||||||
|
|
||||||
|
lib_path = os.path.join(os.path.realpath("."), "data_aug")
|
||||||
|
sys.path.append(lib_path)
|
||||||
|
|
||||||
|
|
||||||
|
class RandomHorizontalFlip(object):
|
||||||
|
|
||||||
|
"""Randomly horizontally flips the Image with the probability *p*
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
p: float
|
||||||
|
The probability with which the image is flipped
|
||||||
|
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Flipped image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, p=0.5):
|
||||||
|
self.p = p
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
img_center = np.array(img.shape[:2])[::-1]/2
|
||||||
|
img_center = np.hstack((img_center, img_center))
|
||||||
|
if random.random() < self.p:
|
||||||
|
img = img[:, ::-1, :]
|
||||||
|
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
|
||||||
|
|
||||||
|
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
|
||||||
|
|
||||||
|
bboxes[:, 0] -= box_w
|
||||||
|
bboxes[:, 2] += box_w
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class HorizontalFlip(object):
|
||||||
|
|
||||||
|
"""Randomly horizontally flips the Image with the probability *p*
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
p: float
|
||||||
|
The probability with which the image is flipped
|
||||||
|
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Flipped image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
img_center = np.array(img.shape[:2])[::-1]/2
|
||||||
|
img_center = np.hstack((img_center, img_center))
|
||||||
|
|
||||||
|
img = img[:, ::-1, :]
|
||||||
|
bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
|
||||||
|
|
||||||
|
box_w = abs(bboxes[:, 0] - bboxes[:, 2])
|
||||||
|
|
||||||
|
bboxes[:, 0] -= box_w
|
||||||
|
bboxes[:, 2] += box_w
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class RandomScale(object):
|
||||||
|
"""Randomly scales an image
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
scale: float or tuple(float)
|
||||||
|
if **float**, the image is scaled by a factor drawn
|
||||||
|
randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
|
||||||
|
the `scale` is drawn randomly from values specified by the
|
||||||
|
tuple
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Scaled image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, scale = 0.2, diff = False):
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
|
|
||||||
|
if type(self.scale) == tuple:
|
||||||
|
assert len(self.scale) == 2, "Invalid range"
|
||||||
|
assert self.scale[0] > -1, "Scale factor can't be less than -1"
|
||||||
|
assert self.scale[1] > -1, "Scale factor can't be less than -1"
|
||||||
|
else:
|
||||||
|
assert self.scale > 0, "Please input a positive float"
|
||||||
|
self.scale = (max(-1, -self.scale), self.scale)
|
||||||
|
|
||||||
|
self.diff = diff
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
|
||||||
|
#Chose a random digit to scale by
|
||||||
|
|
||||||
|
img_shape = img.shape
|
||||||
|
|
||||||
|
if self.diff:
|
||||||
|
scale_x = random.uniform(*self.scale)
|
||||||
|
scale_y = random.uniform(*self.scale)
|
||||||
|
else:
|
||||||
|
scale_x = random.uniform(*self.scale)
|
||||||
|
scale_y = scale_x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
resize_scale_x = 1 + scale_x
|
||||||
|
resize_scale_y = 1 + scale_y
|
||||||
|
|
||||||
|
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
|
||||||
|
|
||||||
|
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
canvas = np.zeros(img_shape, dtype = np.uint8)
|
||||||
|
|
||||||
|
y_lim = int(min(resize_scale_y,1)*img_shape[0])
|
||||||
|
x_lim = int(min(resize_scale_x,1)*img_shape[1])
|
||||||
|
|
||||||
|
|
||||||
|
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
|
||||||
|
|
||||||
|
img = canvas
|
||||||
|
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class Scale(object):
|
||||||
|
"""Scales the image
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
scale_x: float
|
||||||
|
The factor by which the image is scaled horizontally
|
||||||
|
|
||||||
|
scale_y: float
|
||||||
|
The factor by which the image is scaled vertically
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Scaled image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, scale_x = 0.2, scale_y = 0.2):
|
||||||
|
self.scale_x = scale_x
|
||||||
|
self.scale_y = scale_y
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
|
||||||
|
#Chose a random digit to scale by
|
||||||
|
|
||||||
|
img_shape = img.shape
|
||||||
|
|
||||||
|
|
||||||
|
resize_scale_x = 1 + self.scale_x
|
||||||
|
resize_scale_y = 1 + self.scale_y
|
||||||
|
|
||||||
|
img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
|
||||||
|
|
||||||
|
bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
canvas = np.zeros(img_shape, dtype = np.uint8)
|
||||||
|
|
||||||
|
y_lim = int(min(resize_scale_y,1)*img_shape[0])
|
||||||
|
x_lim = int(min(resize_scale_x,1)*img_shape[1])
|
||||||
|
|
||||||
|
|
||||||
|
canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:]
|
||||||
|
|
||||||
|
img = canvas
|
||||||
|
bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class RandomTranslate(object):
|
||||||
|
"""Randomly Translates the image
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
translate: float or tuple(float)
|
||||||
|
if **float**, the image is translated by a factor drawn
|
||||||
|
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
|
||||||
|
`translate` is drawn randomly from values specified by the
|
||||||
|
tuple
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Translated image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, translate = 0.2, diff = False):
|
||||||
|
self.translate = translate
|
||||||
|
|
||||||
|
if type(self.translate) == tuple:
|
||||||
|
assert len(self.translate) == 2, "Invalid range"
|
||||||
|
assert self.translate[0] > 0 & self.translate[0] < 1
|
||||||
|
assert self.translate[1] > 0 & self.translate[1] < 1
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert self.translate > 0 and self.translate < 1
|
||||||
|
self.translate = (-self.translate, self.translate)
|
||||||
|
|
||||||
|
|
||||||
|
self.diff = diff
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
#Chose a random digit to scale by
|
||||||
|
img_shape = img.shape
|
||||||
|
|
||||||
|
#translate the image
|
||||||
|
|
||||||
|
#percentage of the dimension of the image to translate
|
||||||
|
translate_factor_x = random.uniform(*self.translate)
|
||||||
|
translate_factor_y = random.uniform(*self.translate)
|
||||||
|
|
||||||
|
if not self.diff:
|
||||||
|
translate_factor_y = translate_factor_x
|
||||||
|
|
||||||
|
canvas = np.zeros(img_shape).astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
|
corner_x = int(translate_factor_x*img.shape[1])
|
||||||
|
corner_y = int(translate_factor_y*img.shape[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#change the origin to the top-left corner of the translated box
|
||||||
|
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
|
||||||
|
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
|
||||||
|
img = canvas
|
||||||
|
|
||||||
|
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
|
||||||
|
|
||||||
|
|
||||||
|
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class Translate(object):
|
||||||
|
"""Randomly Translates the image
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
translate: float or tuple(float)
|
||||||
|
if **float**, the image is translated by a factor drawn
|
||||||
|
randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
|
||||||
|
`translate` is drawn randomly from values specified by the
|
||||||
|
tuple
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Translated image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False):
|
||||||
|
self.translate_x = translate_x
|
||||||
|
self.translate_y = translate_y
|
||||||
|
|
||||||
|
assert self.translate_x > 0 and self.translate_x < 1
|
||||||
|
assert self.translate_y > 0 and self.translate_y < 1
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
#Chose a random digit to scale by
|
||||||
|
img_shape = img.shape
|
||||||
|
|
||||||
|
#translate the image
|
||||||
|
|
||||||
|
#percentage of the dimension of the image to translate
|
||||||
|
translate_factor_x = self.translate_x
|
||||||
|
translate_factor_y = self.translate_y
|
||||||
|
|
||||||
|
|
||||||
|
canvas = np.zeros(img_shape).astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
|
#get the top-left corner co-ordinates of the shifted box
|
||||||
|
corner_x = int(translate_factor_x*img.shape[1])
|
||||||
|
corner_y = int(translate_factor_y*img.shape[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#change the origin to the top-left corner of the translated box
|
||||||
|
orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
|
||||||
|
canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
|
||||||
|
img = canvas
|
||||||
|
|
||||||
|
bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
|
||||||
|
|
||||||
|
|
||||||
|
bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class RandomRotate(object):
|
||||||
|
"""Randomly rotates an image
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
angle: float or tuple(float)
|
||||||
|
if **float**, the image is rotated by a factor drawn
|
||||||
|
randomly from a range (-`angle`, `angle`). If **tuple**,
|
||||||
|
the `angle` is drawn randomly from values specified by the
|
||||||
|
tuple
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Rotated image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, angle = 10):
|
||||||
|
self.angle = angle
|
||||||
|
|
||||||
|
if type(self.angle) == tuple:
|
||||||
|
assert len(self.angle) == 2, "Invalid range"
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.angle = (-self.angle, self.angle)
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
angle = random.uniform(*self.angle)
|
||||||
|
|
||||||
|
w,h = img.shape[1], img.shape[0]
|
||||||
|
cx, cy = w//2, h//2
|
||||||
|
|
||||||
|
img = rotate_im(img, angle)
|
||||||
|
|
||||||
|
corners = get_corners(bboxes)
|
||||||
|
|
||||||
|
corners = np.hstack((corners, bboxes[:,4:]))
|
||||||
|
|
||||||
|
|
||||||
|
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
|
||||||
|
|
||||||
|
new_bbox = get_enclosing_box(corners)
|
||||||
|
|
||||||
|
|
||||||
|
scale_factor_x = img.shape[1] / w
|
||||||
|
|
||||||
|
scale_factor_y = img.shape[0] / h
|
||||||
|
|
||||||
|
img = cv2.resize(img, (w,h))
|
||||||
|
|
||||||
|
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
|
||||||
|
|
||||||
|
bboxes = new_bbox
|
||||||
|
|
||||||
|
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class Rotate(object):
|
||||||
|
"""Rotates an image
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
angle: float
|
||||||
|
The angle by which the image is to be rotated
|
||||||
|
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Rotated image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, angle):
|
||||||
|
self.angle = angle
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be flipped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Randomly flipped image.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
angle = self.angle
|
||||||
|
print(self.angle)
|
||||||
|
|
||||||
|
w,h = img.shape[1], img.shape[0]
|
||||||
|
cx, cy = w//2, h//2
|
||||||
|
|
||||||
|
corners = get_corners(bboxes)
|
||||||
|
|
||||||
|
corners = np.hstack((corners, bboxes[:,4:]))
|
||||||
|
|
||||||
|
img = rotate_im(img, angle)
|
||||||
|
|
||||||
|
corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
new_bbox = get_enclosing_box(corners)
|
||||||
|
|
||||||
|
|
||||||
|
scale_factor_x = img.shape[1] / w
|
||||||
|
|
||||||
|
scale_factor_y = img.shape[0] / h
|
||||||
|
|
||||||
|
img = cv2.resize(img, (w,h))
|
||||||
|
|
||||||
|
new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]
|
||||||
|
|
||||||
|
|
||||||
|
bboxes = new_bbox
|
||||||
|
|
||||||
|
bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RandomShear(object):
|
||||||
|
"""Randomly shears an image in horizontal direction
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
shear_factor: float or tuple(float)
|
||||||
|
if **float**, the image is sheared horizontally by a factor drawn
|
||||||
|
randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
|
||||||
|
the `shear_factor` is drawn randomly from values specified by the
|
||||||
|
tuple
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Sheared image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, shear_factor = 0.2):
|
||||||
|
self.shear_factor = shear_factor
|
||||||
|
|
||||||
|
if type(self.shear_factor) == tuple:
|
||||||
|
assert len(self.shear_factor) == 2, "Invalid range for scaling factor"
|
||||||
|
else:
|
||||||
|
self.shear_factor = (-self.shear_factor, self.shear_factor)
|
||||||
|
|
||||||
|
shear_factor = random.uniform(*self.shear_factor)
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
shear_factor = random.uniform(*self.shear_factor)
|
||||||
|
|
||||||
|
w,h = img.shape[1], img.shape[0]
|
||||||
|
|
||||||
|
if shear_factor < 0:
|
||||||
|
img, bboxes = HorizontalFlip()(img, bboxes)
|
||||||
|
|
||||||
|
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
|
||||||
|
|
||||||
|
nW = img.shape[1] + abs(shear_factor*img.shape[0])
|
||||||
|
|
||||||
|
bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int)
|
||||||
|
|
||||||
|
|
||||||
|
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
|
||||||
|
|
||||||
|
if shear_factor < 0:
|
||||||
|
img, bboxes = HorizontalFlip()(img, bboxes)
|
||||||
|
|
||||||
|
img = cv2.resize(img, (w,h))
|
||||||
|
|
||||||
|
scale_factor_x = nW / w
|
||||||
|
|
||||||
|
bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1]
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
class Shear(object):
|
||||||
|
"""Shears an image in horizontal direction
|
||||||
|
|
||||||
|
|
||||||
|
Bounding boxes which have an area of less than 25% in the remaining in the
|
||||||
|
transformed image is dropped. The resolution is maintained, and the remaining
|
||||||
|
area if any is filled by black color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
shear_factor: float
|
||||||
|
Factor by which the image is sheared in the x-direction
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Sheared image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Tranformed bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, shear_factor = 0.2):
|
||||||
|
self.shear_factor = shear_factor
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
shear_factor = self.shear_factor
|
||||||
|
if shear_factor < 0:
|
||||||
|
img, bboxes = HorizontalFlip()(img, bboxes)
|
||||||
|
|
||||||
|
|
||||||
|
M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
|
||||||
|
|
||||||
|
nW = img.shape[1] + abs(shear_factor*img.shape[0])
|
||||||
|
|
||||||
|
bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int)
|
||||||
|
|
||||||
|
|
||||||
|
img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
|
||||||
|
|
||||||
|
if shear_factor < 0:
|
||||||
|
img, bboxes = HorizontalFlip()(img, bboxes)
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
class Resize(object):
|
||||||
|
"""Resize the image in accordance to `image_letter_box` function in darknet
|
||||||
|
|
||||||
|
The aspect ratio is maintained. The longer side is resized to the input
|
||||||
|
size of the network, while the remaining space on the shorter side is filled
|
||||||
|
with black color. **This should be the last transform**
|
||||||
|
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
inp_dim : tuple(int)
|
||||||
|
tuple containing the size to which the image will be resized.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Sheared image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Resized bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, inp_dim):
|
||||||
|
self.inp_dim = inp_dim
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
w,h = img.shape[1], img.shape[0]
|
||||||
|
img = letterbox_image(img, self.inp_dim)
|
||||||
|
|
||||||
|
|
||||||
|
scale = min(self.inp_dim/h, self.inp_dim/w)
|
||||||
|
bboxes[:,:4] *= (scale)
|
||||||
|
|
||||||
|
new_w = scale*w
|
||||||
|
new_h = scale*h
|
||||||
|
inp_dim = self.inp_dim
|
||||||
|
|
||||||
|
del_h = (inp_dim - new_h)/2
|
||||||
|
del_w = (inp_dim - new_w)/2
|
||||||
|
|
||||||
|
add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
|
||||||
|
|
||||||
|
bboxes[:,:4] += add_matrix
|
||||||
|
|
||||||
|
img = img.astype(np.uint8)
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
|
||||||
|
class RandomHSV(object):
|
||||||
|
"""HSV Transform to vary hue saturation and brightness
|
||||||
|
|
||||||
|
Hue has a range of 0-179
|
||||||
|
Saturation and Brightness have a range of 0-255.
|
||||||
|
Chose the amount you want to change thhe above quantities accordingly.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
hue : None or int or tuple (int)
|
||||||
|
If None, the hue of the image is left unchanged. If int,
|
||||||
|
a random int is uniformly sampled from (-hue, hue) and added to the
|
||||||
|
hue of the image. If tuple, the int is sampled from the range
|
||||||
|
specified by the tuple.
|
||||||
|
|
||||||
|
saturation : None or int or tuple(int)
|
||||||
|
If None, the saturation of the image is left unchanged. If int,
|
||||||
|
a random int is uniformly sampled from (-saturation, saturation)
|
||||||
|
and added to the hue of the image. If tuple, the int is sampled
|
||||||
|
from the range specified by the tuple.
|
||||||
|
|
||||||
|
brightness : None or int or tuple(int)
|
||||||
|
If None, the brightness of the image is left unchanged. If int,
|
||||||
|
a random int is uniformly sampled from (-brightness, brightness)
|
||||||
|
and added to the hue of the image. If tuple, the int is sampled
|
||||||
|
from the range specified by the tuple.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
numpy.ndaaray
|
||||||
|
Transformed image in the numpy format of shape `HxWxC`
|
||||||
|
|
||||||
|
numpy.ndarray
|
||||||
|
Resized bounding box co-ordinates of the format `n x 4` where n is
|
||||||
|
number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, hue = None, saturation = None, brightness = None):
|
||||||
|
if hue:
|
||||||
|
self.hue = hue
|
||||||
|
else:
|
||||||
|
self.hue = 0
|
||||||
|
|
||||||
|
if saturation:
|
||||||
|
self.saturation = saturation
|
||||||
|
else:
|
||||||
|
self.saturation = 0
|
||||||
|
|
||||||
|
if brightness:
|
||||||
|
self.brightness = brightness
|
||||||
|
else:
|
||||||
|
self.brightness = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if type(self.hue) != tuple:
|
||||||
|
self.hue = (-self.hue, self.hue)
|
||||||
|
|
||||||
|
if type(self.saturation) != tuple:
|
||||||
|
self.saturation = (-self.saturation, self.saturation)
|
||||||
|
|
||||||
|
if type(brightness) != tuple:
|
||||||
|
self.brightness = (-self.brightness, self.brightness)
|
||||||
|
|
||||||
|
def __call__(self, img, bboxes):
|
||||||
|
|
||||||
|
hue = random.randint(*self.hue)
|
||||||
|
saturation = random.randint(*self.saturation)
|
||||||
|
brightness = random.randint(*self.brightness)
|
||||||
|
|
||||||
|
img = img.astype(int)
|
||||||
|
|
||||||
|
a = np.array([hue, saturation, brightness]).astype(int)
|
||||||
|
img += np.reshape(a, (1,1,3))
|
||||||
|
|
||||||
|
img = np.clip(img, 0, 255)
|
||||||
|
img[:,:,0] = np.clip(img[:,:,0],0, 179)
|
||||||
|
|
||||||
|
img = img.astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return img, bboxes
|
||||||
|
|
||||||
|
class Sequence(object):
|
||||||
|
|
||||||
|
"""Initialise Sequence object
|
||||||
|
|
||||||
|
Apply a Sequence of transformations to the images/boxes.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
augemnetations : list
|
||||||
|
List containing Transformation Objects in Sequence they are to be
|
||||||
|
applied
|
||||||
|
|
||||||
|
probs : int or list
|
||||||
|
If **int**, the probability with which each of the transformation will
|
||||||
|
be applied. If **list**, the length must be equal to *augmentations*.
|
||||||
|
Each element of this list is the probability with which each
|
||||||
|
corresponding transformation is applied
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
Sequence
|
||||||
|
Sequence Object
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, augmentations, probs = 1):
|
||||||
|
|
||||||
|
|
||||||
|
self.augmentations = augmentations
|
||||||
|
self.probs = probs
|
||||||
|
|
||||||
|
def __call__(self, images, bboxes):
|
||||||
|
for i, augmentation in enumerate(self.augmentations):
|
||||||
|
if type(self.probs) == list:
|
||||||
|
prob = self.probs[i]
|
||||||
|
else:
|
||||||
|
prob = self.probs
|
||||||
|
|
||||||
|
if random.random() < prob:
|
||||||
|
images, bboxes = augmentation(images, bboxes)
|
||||||
|
return images, bboxes
|
||||||
221
model.py
Normal file
221
model.py
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
# %%
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
import json
|
||||||
|
import torch
|
||||||
|
from torchvision import transforms as T
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
||||||
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||||
|
from references.detection import utils, engine
|
||||||
|
import data_aug
|
||||||
|
import bbox_util
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_transform(train):
|
||||||
|
transforms = []
|
||||||
|
transforms.append(T.ToTensor())
|
||||||
|
if train:
|
||||||
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
||||||
|
|
||||||
|
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
|
||||||
|
def create_map(list_in, from_key, to_key):
|
||||||
|
cmap = dict()
|
||||||
|
for l in list_in:
|
||||||
|
cmap[l[from_key]] = l[to_key]
|
||||||
|
return cmap
|
||||||
|
|
||||||
|
|
||||||
|
class iNaturalistDataset(torch.utils.data.Dataset):
|
||||||
|
def __init__(self, validation=False, train=False, transforms = None):
|
||||||
|
|
||||||
|
self.validation = validation
|
||||||
|
self.train = train
|
||||||
|
self.transforms = transforms
|
||||||
|
|
||||||
|
|
||||||
|
if validation:
|
||||||
|
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
|
||||||
|
elif train:
|
||||||
|
json_path = os.path.join(
|
||||||
|
PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(json_path, "r") as rj:
|
||||||
|
f = json.load(rj)
|
||||||
|
|
||||||
|
categories = list()
|
||||||
|
image_info = dict()
|
||||||
|
|
||||||
|
for category in f["categories"]:
|
||||||
|
if category["supercategory"] == "Aves":
|
||||||
|
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
|
||||||
|
print(category['name'])
|
||||||
|
categories.append(category)
|
||||||
|
|
||||||
|
categories = sorted(categories, key=lambda k: k["name"])
|
||||||
|
for idx, cat in enumerate(categories):
|
||||||
|
cat["new_id"] = idx + 1
|
||||||
|
|
||||||
|
orig_to_new_id = create_map(categories, "id", "new_id")
|
||||||
|
|
||||||
|
for annot in f["annotations"]:
|
||||||
|
if annot["category_id"] in orig_to_new_id:
|
||||||
|
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
|
||||||
|
id = annot["image_id"]
|
||||||
|
if id not in image_info:
|
||||||
|
image_info[id] = dict()
|
||||||
|
|
||||||
|
annot["bbox"][2] += annot["bbox"][0]
|
||||||
|
annot["bbox"][3] += annot["bbox"][1]
|
||||||
|
image_info[id]["annotation"] = annot
|
||||||
|
|
||||||
|
for img in f["images"]:
|
||||||
|
id = img["id"]
|
||||||
|
path = os.path.join(PATH_ROOT, img["file_name"])
|
||||||
|
height = img["height"]
|
||||||
|
width = img["width"]
|
||||||
|
if id in image_info:
|
||||||
|
image_info[id].update({"path": path, "height": height, "width": width})
|
||||||
|
|
||||||
|
for idx, (id, im_in) in enumerate(image_info.items()):
|
||||||
|
im_in["idx"] = idx
|
||||||
|
self.images = image_info
|
||||||
|
self.categories = categories
|
||||||
|
self.idx_to_id = [x for x in self.images]
|
||||||
|
self.num_classes = len(self.categories) + 1
|
||||||
|
self.num_samples = len(self.images)
|
||||||
|
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.num_samples
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
idd = self.idx_to_id[idx]
|
||||||
|
c_image = self.images[idd]
|
||||||
|
img_path = c_image["path"]
|
||||||
|
img = Image.open(img_path).convert("RGB")
|
||||||
|
|
||||||
|
annot = c_image["annotation"]
|
||||||
|
bbox = annot["bbox"]
|
||||||
|
boxes = bbox
|
||||||
|
target = dict()
|
||||||
|
target["boxes"] = torch.as_tensor([boxes])
|
||||||
|
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
||||||
|
target['image_id'] = torch.tensor([annot['image_id']])
|
||||||
|
target['area'] = torch.as_tensor([annot['area']])
|
||||||
|
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
||||||
|
|
||||||
|
|
||||||
|
if self.transforms is not None:
|
||||||
|
img, target = self.transforms(img, target)
|
||||||
|
|
||||||
|
return img, target
|
||||||
|
# %%
|
||||||
|
# v = iNaturalistDataset(validation=True)
|
||||||
|
|
||||||
|
|
||||||
|
# v = iNaturalistDataset(validation= True)
|
||||||
|
# o = v[10]
|
||||||
|
# %%
|
||||||
|
# oimage = t.tensor(o[0]*255, dtype=t.uint8)
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
|
||||||
|
# plt.imshow(ox.permute([1,2,0]))
|
||||||
|
# plt.savefig('crap2.png')
|
||||||
|
|
||||||
|
def get_model(num_classes):
|
||||||
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||||
|
num_classes = 2 # 1 class (person) + background
|
||||||
|
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||||
|
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
import transforms as T
|
||||||
|
|
||||||
|
def get_transform(train):
|
||||||
|
transforms = []
|
||||||
|
transforms.append(T.ToTensor())
|
||||||
|
if train:
|
||||||
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
from engine import train_one_epoch, evaluate
|
||||||
|
import utils
|
||||||
|
# %%
|
||||||
|
def run():
|
||||||
|
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
|
||||||
|
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
|
||||||
|
|
||||||
|
|
||||||
|
train_data_loader = torch.utils.data.DataLoader(
|
||||||
|
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
|
)
|
||||||
|
val_data_loader = torch.utils.data.DataLoader(
|
||||||
|
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
|
)
|
||||||
|
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
num_classes = 2
|
||||||
|
|
||||||
|
|
||||||
|
model = get_model(num_classes)
|
||||||
|
model.to(device)
|
||||||
|
# construct an optimizer
|
||||||
|
params = [p for p in model.parameters() if p.requires_grad]
|
||||||
|
optimizer = torch.optim.SGD(params, lr=0.005,
|
||||||
|
momentum=0.9, weight_decay=0.0005)
|
||||||
|
# and a learning rate scheduler
|
||||||
|
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
|
||||||
|
step_size=3,
|
||||||
|
gamma=0.1)
|
||||||
|
|
||||||
|
# let's train it for 10 epochs
|
||||||
|
num_epochs = 10
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
print(epoch)
|
||||||
|
torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
|
||||||
|
# train for one epoch, printing every 10 iterations
|
||||||
|
engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
|
||||||
|
torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
|
||||||
|
# update the learning rate
|
||||||
|
lr_scheduler.step()
|
||||||
|
torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
|
||||||
|
# evaluate on the test dataset
|
||||||
|
engine.evaluate(model, val_data_loader, device=device)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# # %%
|
||||||
|
# json_path = os.path.join(
|
||||||
|
# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||||
|
# )
|
||||||
|
# with open(json_path, "r") as rj:
|
||||||
|
# f = json.load(rj)
|
||||||
|
|
||||||
|
|
||||||
|
# # %%
|
||||||
|
# image_id: 2358
|
||||||
|
|
||||||
82
old_files/README.md
Normal file
82
old_files/README.md
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
# Object detection reference training scripts
|
||||||
|
|
||||||
|
This folder contains reference training scripts for object detection.
|
||||||
|
They serve as a log of how to train specific models, to provide baseline
|
||||||
|
training and evaluation scripts to quickly bootstrap research.
|
||||||
|
|
||||||
|
To execute the example commands below you must install the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
cython
|
||||||
|
pycocotools
|
||||||
|
matplotlib
|
||||||
|
```
|
||||||
|
|
||||||
|
You must modify the following flags:
|
||||||
|
|
||||||
|
`--data-path=/path/to/coco/dataset`
|
||||||
|
|
||||||
|
`--nproc_per_node=<number_of_gpus_available>`
|
||||||
|
|
||||||
|
Except otherwise noted, all models have been trained on 8x V100 GPUs.
|
||||||
|
|
||||||
|
### Faster R-CNN ResNet-50 FPN
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
|
||||||
|
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Faster R-CNN MobileNetV3-Large FPN
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\
|
||||||
|
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Faster R-CNN MobileNetV3-Large 320 FPN
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\
|
||||||
|
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### RetinaNet
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model retinanet_resnet50_fpn --epochs 26\
|
||||||
|
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01
|
||||||
|
```
|
||||||
|
|
||||||
|
### SSD300 VGG16
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model ssd300_vgg16 --epochs 120\
|
||||||
|
--lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\
|
||||||
|
--weight-decay 0.0005 --data-augmentation ssd
|
||||||
|
```
|
||||||
|
|
||||||
|
### SSDlite320 MobileNetV3-Large
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
|
||||||
|
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
|
||||||
|
--weight-decay 0.00004 --data-augmentation ssdlite
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Mask R-CNN
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
|
||||||
|
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Keypoint R-CNN
|
||||||
|
```
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||||
|
--dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
|
||||||
|
--lr-steps 36 43 --aspect-ratio-group-factor 3
|
||||||
|
```
|
||||||
|
|
||||||
352
old_files/coco_eval.py
Normal file
352
old_files/coco_eval.py
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import copy
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
import torch._six
|
||||||
|
|
||||||
|
from pycocotools.cocoeval import COCOeval
|
||||||
|
from pycocotools.coco import COCO
|
||||||
|
import pycocotools.mask as mask_util
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
|
class CocoEvaluator(object):
|
||||||
|
def __init__(self, coco_gt, iou_types):
|
||||||
|
assert isinstance(iou_types, (list, tuple))
|
||||||
|
coco_gt = copy.deepcopy(coco_gt)
|
||||||
|
self.coco_gt = coco_gt
|
||||||
|
|
||||||
|
self.iou_types = iou_types
|
||||||
|
self.coco_eval = {}
|
||||||
|
for iou_type in iou_types:
|
||||||
|
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
|
||||||
|
|
||||||
|
self.img_ids = []
|
||||||
|
self.eval_imgs = {k: [] for k in iou_types}
|
||||||
|
|
||||||
|
def update(self, predictions):
|
||||||
|
img_ids = list(np.unique(list(predictions.keys())))
|
||||||
|
self.img_ids.extend(img_ids)
|
||||||
|
|
||||||
|
for iou_type in self.iou_types:
|
||||||
|
results = self.prepare(predictions, iou_type)
|
||||||
|
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
|
||||||
|
coco_eval = self.coco_eval[iou_type]
|
||||||
|
|
||||||
|
coco_eval.cocoDt = coco_dt
|
||||||
|
coco_eval.params.imgIds = list(img_ids)
|
||||||
|
img_ids, eval_imgs = evaluate(coco_eval)
|
||||||
|
|
||||||
|
self.eval_imgs[iou_type].append(eval_imgs)
|
||||||
|
|
||||||
|
def synchronize_between_processes(self):
|
||||||
|
for iou_type in self.iou_types:
|
||||||
|
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
|
||||||
|
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
|
||||||
|
|
||||||
|
def accumulate(self):
|
||||||
|
for coco_eval in self.coco_eval.values():
|
||||||
|
coco_eval.accumulate()
|
||||||
|
|
||||||
|
def summarize(self):
|
||||||
|
for iou_type, coco_eval in self.coco_eval.items():
|
||||||
|
print("IoU metric: {}".format(iou_type))
|
||||||
|
coco_eval.summarize()
|
||||||
|
|
||||||
|
def prepare(self, predictions, iou_type):
|
||||||
|
if iou_type == "bbox":
|
||||||
|
return self.prepare_for_coco_detection(predictions)
|
||||||
|
elif iou_type == "segm":
|
||||||
|
return self.prepare_for_coco_segmentation(predictions)
|
||||||
|
elif iou_type == "keypoints":
|
||||||
|
return self.prepare_for_coco_keypoint(predictions)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown iou type {}".format(iou_type))
|
||||||
|
|
||||||
|
def prepare_for_coco_detection(self, predictions):
|
||||||
|
coco_results = []
|
||||||
|
for original_id, prediction in predictions.items():
|
||||||
|
if len(prediction) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
boxes = prediction["boxes"]
|
||||||
|
boxes = convert_to_xywh(boxes).tolist()
|
||||||
|
scores = prediction["scores"].tolist()
|
||||||
|
labels = prediction["labels"].tolist()
|
||||||
|
|
||||||
|
coco_results.extend(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"image_id": original_id,
|
||||||
|
"category_id": labels[k],
|
||||||
|
"bbox": box,
|
||||||
|
"score": scores[k],
|
||||||
|
}
|
||||||
|
for k, box in enumerate(boxes)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return coco_results
|
||||||
|
|
||||||
|
def prepare_for_coco_segmentation(self, predictions):
|
||||||
|
coco_results = []
|
||||||
|
for original_id, prediction in predictions.items():
|
||||||
|
if len(prediction) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
scores = prediction["scores"]
|
||||||
|
labels = prediction["labels"]
|
||||||
|
masks = prediction["masks"]
|
||||||
|
|
||||||
|
masks = masks > 0.5
|
||||||
|
|
||||||
|
scores = prediction["scores"].tolist()
|
||||||
|
labels = prediction["labels"].tolist()
|
||||||
|
|
||||||
|
rles = [
|
||||||
|
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
|
||||||
|
for mask in masks
|
||||||
|
]
|
||||||
|
for rle in rles:
|
||||||
|
rle["counts"] = rle["counts"].decode("utf-8")
|
||||||
|
|
||||||
|
coco_results.extend(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"image_id": original_id,
|
||||||
|
"category_id": labels[k],
|
||||||
|
"segmentation": rle,
|
||||||
|
"score": scores[k],
|
||||||
|
}
|
||||||
|
for k, rle in enumerate(rles)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return coco_results
|
||||||
|
|
||||||
|
def prepare_for_coco_keypoint(self, predictions):
|
||||||
|
coco_results = []
|
||||||
|
for original_id, prediction in predictions.items():
|
||||||
|
if len(prediction) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
boxes = prediction["boxes"]
|
||||||
|
boxes = convert_to_xywh(boxes).tolist()
|
||||||
|
scores = prediction["scores"].tolist()
|
||||||
|
labels = prediction["labels"].tolist()
|
||||||
|
keypoints = prediction["keypoints"]
|
||||||
|
keypoints = keypoints.flatten(start_dim=1).tolist()
|
||||||
|
|
||||||
|
coco_results.extend(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"image_id": original_id,
|
||||||
|
"category_id": labels[k],
|
||||||
|
'keypoints': keypoint,
|
||||||
|
"score": scores[k],
|
||||||
|
}
|
||||||
|
for k, keypoint in enumerate(keypoints)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return coco_results
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_xywh(boxes):
|
||||||
|
xmin, ymin, xmax, ymax = boxes.unbind(1)
|
||||||
|
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
def merge(img_ids, eval_imgs):
|
||||||
|
all_img_ids = utils.all_gather(img_ids)
|
||||||
|
all_eval_imgs = utils.all_gather(eval_imgs)
|
||||||
|
|
||||||
|
merged_img_ids = []
|
||||||
|
for p in all_img_ids:
|
||||||
|
merged_img_ids.extend(p)
|
||||||
|
|
||||||
|
merged_eval_imgs = []
|
||||||
|
for p in all_eval_imgs:
|
||||||
|
merged_eval_imgs.append(p)
|
||||||
|
|
||||||
|
merged_img_ids = np.array(merged_img_ids)
|
||||||
|
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
|
||||||
|
|
||||||
|
# keep only unique (and in sorted order) images
|
||||||
|
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
|
||||||
|
merged_eval_imgs = merged_eval_imgs[..., idx]
|
||||||
|
|
||||||
|
return merged_img_ids, merged_eval_imgs
|
||||||
|
|
||||||
|
|
||||||
|
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
|
||||||
|
img_ids, eval_imgs = merge(img_ids, eval_imgs)
|
||||||
|
img_ids = list(img_ids)
|
||||||
|
eval_imgs = list(eval_imgs.flatten())
|
||||||
|
|
||||||
|
coco_eval.evalImgs = eval_imgs
|
||||||
|
coco_eval.params.imgIds = img_ids
|
||||||
|
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################
|
||||||
|
# From pycocotools, just removed the prints and fixed
|
||||||
|
# a Python3 bug about unicode not defined
|
||||||
|
#################################################################
|
||||||
|
|
||||||
|
# Ideally, pycocotools wouldn't have hard-coded prints
|
||||||
|
# so that we could avoid copy-pasting those two functions
|
||||||
|
|
||||||
|
def createIndex(self):
|
||||||
|
# create index
|
||||||
|
# print('creating index...')
|
||||||
|
anns, cats, imgs = {}, {}, {}
|
||||||
|
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
|
||||||
|
if 'annotations' in self.dataset:
|
||||||
|
for ann in self.dataset['annotations']:
|
||||||
|
imgToAnns[ann['image_id']].append(ann)
|
||||||
|
anns[ann['id']] = ann
|
||||||
|
|
||||||
|
if 'images' in self.dataset:
|
||||||
|
for img in self.dataset['images']:
|
||||||
|
imgs[img['id']] = img
|
||||||
|
|
||||||
|
if 'categories' in self.dataset:
|
||||||
|
for cat in self.dataset['categories']:
|
||||||
|
cats[cat['id']] = cat
|
||||||
|
|
||||||
|
if 'annotations' in self.dataset and 'categories' in self.dataset:
|
||||||
|
for ann in self.dataset['annotations']:
|
||||||
|
catToImgs[ann['category_id']].append(ann['image_id'])
|
||||||
|
|
||||||
|
# print('index created!')
|
||||||
|
|
||||||
|
# create class members
|
||||||
|
self.anns = anns
|
||||||
|
self.imgToAnns = imgToAnns
|
||||||
|
self.catToImgs = catToImgs
|
||||||
|
self.imgs = imgs
|
||||||
|
self.cats = cats
|
||||||
|
|
||||||
|
|
||||||
|
maskUtils = mask_util
|
||||||
|
|
||||||
|
|
||||||
|
def loadRes(self, resFile):
|
||||||
|
"""
|
||||||
|
Load result file and return a result api object.
|
||||||
|
Args:
|
||||||
|
self (obj): coco object with ground truth annotations
|
||||||
|
resFile (str): file name of result file
|
||||||
|
Returns:
|
||||||
|
res (obj): result api object
|
||||||
|
"""
|
||||||
|
res = COCO()
|
||||||
|
res.dataset['images'] = [img for img in self.dataset['images']]
|
||||||
|
|
||||||
|
# print('Loading and preparing results...')
|
||||||
|
# tic = time.time()
|
||||||
|
if isinstance(resFile, torch._six.string_classes):
|
||||||
|
anns = json.load(open(resFile))
|
||||||
|
elif type(resFile) == np.ndarray:
|
||||||
|
anns = self.loadNumpyAnnotations(resFile)
|
||||||
|
else:
|
||||||
|
anns = resFile
|
||||||
|
assert type(anns) == list, 'results in not an array of objects'
|
||||||
|
annsImgIds = [ann['image_id'] for ann in anns]
|
||||||
|
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
|
||||||
|
'Results do not correspond to current coco set'
|
||||||
|
if 'caption' in anns[0]:
|
||||||
|
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
|
||||||
|
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
|
||||||
|
for id, ann in enumerate(anns):
|
||||||
|
ann['id'] = id + 1
|
||||||
|
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
|
||||||
|
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||||
|
for id, ann in enumerate(anns):
|
||||||
|
bb = ann['bbox']
|
||||||
|
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
|
||||||
|
if 'segmentation' not in ann:
|
||||||
|
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
|
||||||
|
ann['area'] = bb[2] * bb[3]
|
||||||
|
ann['id'] = id + 1
|
||||||
|
ann['iscrowd'] = 0
|
||||||
|
elif 'segmentation' in anns[0]:
|
||||||
|
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||||
|
for id, ann in enumerate(anns):
|
||||||
|
# now only support compressed RLE format as segmentation results
|
||||||
|
ann['area'] = maskUtils.area(ann['segmentation'])
|
||||||
|
if 'bbox' not in ann:
|
||||||
|
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
|
||||||
|
ann['id'] = id + 1
|
||||||
|
ann['iscrowd'] = 0
|
||||||
|
elif 'keypoints' in anns[0]:
|
||||||
|
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||||
|
for id, ann in enumerate(anns):
|
||||||
|
s = ann['keypoints']
|
||||||
|
x = s[0::3]
|
||||||
|
y = s[1::3]
|
||||||
|
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
|
||||||
|
ann['area'] = (x2 - x1) * (y2 - y1)
|
||||||
|
ann['id'] = id + 1
|
||||||
|
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
|
||||||
|
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
|
||||||
|
|
||||||
|
res.dataset['annotations'] = anns
|
||||||
|
createIndex(res)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(self):
|
||||||
|
'''
|
||||||
|
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
|
||||||
|
:return: None
|
||||||
|
'''
|
||||||
|
# tic = time.time()
|
||||||
|
# print('Running per image evaluation...')
|
||||||
|
p = self.params
|
||||||
|
# add backward compatibility if useSegm is specified in params
|
||||||
|
if p.useSegm is not None:
|
||||||
|
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
|
||||||
|
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
|
||||||
|
# print('Evaluate annotation type *{}*'.format(p.iouType))
|
||||||
|
p.imgIds = list(np.unique(p.imgIds))
|
||||||
|
if p.useCats:
|
||||||
|
p.catIds = list(np.unique(p.catIds))
|
||||||
|
p.maxDets = sorted(p.maxDets)
|
||||||
|
self.params = p
|
||||||
|
|
||||||
|
self._prepare()
|
||||||
|
# loop through images, area range, max detection number
|
||||||
|
catIds = p.catIds if p.useCats else [-1]
|
||||||
|
|
||||||
|
if p.iouType == 'segm' or p.iouType == 'bbox':
|
||||||
|
computeIoU = self.computeIoU
|
||||||
|
elif p.iouType == 'keypoints':
|
||||||
|
computeIoU = self.computeOks
|
||||||
|
self.ious = {
|
||||||
|
(imgId, catId): computeIoU(imgId, catId)
|
||||||
|
for imgId in p.imgIds
|
||||||
|
for catId in catIds}
|
||||||
|
|
||||||
|
evaluateImg = self.evaluateImg
|
||||||
|
maxDet = p.maxDets[-1]
|
||||||
|
evalImgs = [
|
||||||
|
evaluateImg(imgId, catId, areaRng, maxDet)
|
||||||
|
for catId in catIds
|
||||||
|
for areaRng in p.areaRng
|
||||||
|
for imgId in p.imgIds
|
||||||
|
]
|
||||||
|
# this is NOT in the pycocotools code, but could be done outside
|
||||||
|
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
|
||||||
|
self._paramsEval = copy.deepcopy(self.params)
|
||||||
|
# toc = time.time()
|
||||||
|
# print('DONE (t={:0.2f}s).'.format(toc-tic))
|
||||||
|
return p.imgIds, evalImgs
|
||||||
|
|
||||||
|
#################################################################
|
||||||
|
# end of straight copy from pycocotools, just removing the prints
|
||||||
|
#################################################################
|
||||||
252
old_files/coco_utils.py
Normal file
252
old_files/coco_utils.py
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
import copy
|
||||||
|
import os
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.utils.data
|
||||||
|
import torchvision
|
||||||
|
|
||||||
|
from pycocotools import mask as coco_mask
|
||||||
|
from pycocotools.coco import COCO
|
||||||
|
|
||||||
|
import transforms as T
|
||||||
|
|
||||||
|
|
||||||
|
class FilterAndRemapCocoCategories(object):
|
||||||
|
def __init__(self, categories, remap=True):
|
||||||
|
self.categories = categories
|
||||||
|
self.remap = remap
|
||||||
|
|
||||||
|
def __call__(self, image, target):
|
||||||
|
anno = target["annotations"]
|
||||||
|
anno = [obj for obj in anno if obj["category_id"] in self.categories]
|
||||||
|
if not self.remap:
|
||||||
|
target["annotations"] = anno
|
||||||
|
return image, target
|
||||||
|
anno = copy.deepcopy(anno)
|
||||||
|
for obj in anno:
|
||||||
|
obj["category_id"] = self.categories.index(obj["category_id"])
|
||||||
|
target["annotations"] = anno
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
def convert_coco_poly_to_mask(segmentations, height, width):
|
||||||
|
masks = []
|
||||||
|
for polygons in segmentations:
|
||||||
|
rles = coco_mask.frPyObjects(polygons, height, width)
|
||||||
|
mask = coco_mask.decode(rles)
|
||||||
|
if len(mask.shape) < 3:
|
||||||
|
mask = mask[..., None]
|
||||||
|
mask = torch.as_tensor(mask, dtype=torch.uint8)
|
||||||
|
mask = mask.any(dim=2)
|
||||||
|
masks.append(mask)
|
||||||
|
if masks:
|
||||||
|
masks = torch.stack(masks, dim=0)
|
||||||
|
else:
|
||||||
|
masks = torch.zeros((0, height, width), dtype=torch.uint8)
|
||||||
|
return masks
|
||||||
|
|
||||||
|
|
||||||
|
class ConvertCocoPolysToMask(object):
|
||||||
|
def __call__(self, image, target):
|
||||||
|
w, h = image.size
|
||||||
|
|
||||||
|
image_id = target["image_id"]
|
||||||
|
image_id = torch.tensor([image_id])
|
||||||
|
|
||||||
|
anno = target["annotations"]
|
||||||
|
|
||||||
|
anno = [obj for obj in anno if obj['iscrowd'] == 0]
|
||||||
|
|
||||||
|
boxes = [obj["bbox"] for obj in anno]
|
||||||
|
# guard against no boxes via resizing
|
||||||
|
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
|
||||||
|
boxes[:, 2:] += boxes[:, :2]
|
||||||
|
boxes[:, 0::2].clamp_(min=0, max=w)
|
||||||
|
boxes[:, 1::2].clamp_(min=0, max=h)
|
||||||
|
|
||||||
|
classes = [obj["category_id"] for obj in anno]
|
||||||
|
classes = torch.tensor(classes, dtype=torch.int64)
|
||||||
|
|
||||||
|
segmentations = [obj["segmentation"] for obj in anno]
|
||||||
|
masks = convert_coco_poly_to_mask(segmentations, h, w)
|
||||||
|
|
||||||
|
keypoints = None
|
||||||
|
if anno and "keypoints" in anno[0]:
|
||||||
|
keypoints = [obj["keypoints"] for obj in anno]
|
||||||
|
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
|
||||||
|
num_keypoints = keypoints.shape[0]
|
||||||
|
if num_keypoints:
|
||||||
|
keypoints = keypoints.view(num_keypoints, -1, 3)
|
||||||
|
|
||||||
|
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
|
||||||
|
boxes = boxes[keep]
|
||||||
|
classes = classes[keep]
|
||||||
|
masks = masks[keep]
|
||||||
|
if keypoints is not None:
|
||||||
|
keypoints = keypoints[keep]
|
||||||
|
|
||||||
|
target = {}
|
||||||
|
target["boxes"] = boxes
|
||||||
|
target["labels"] = classes
|
||||||
|
target["masks"] = masks
|
||||||
|
target["image_id"] = image_id
|
||||||
|
if keypoints is not None:
|
||||||
|
target["keypoints"] = keypoints
|
||||||
|
|
||||||
|
# for conversion to coco api
|
||||||
|
area = torch.tensor([obj["area"] for obj in anno])
|
||||||
|
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
|
||||||
|
target["area"] = area
|
||||||
|
target["iscrowd"] = iscrowd
|
||||||
|
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
def _coco_remove_images_without_annotations(dataset, cat_list=None):
|
||||||
|
def _has_only_empty_bbox(anno):
|
||||||
|
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
|
||||||
|
|
||||||
|
def _count_visible_keypoints(anno):
|
||||||
|
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
|
||||||
|
|
||||||
|
min_keypoints_per_image = 10
|
||||||
|
|
||||||
|
def _has_valid_annotation(anno):
|
||||||
|
# if it's empty, there is no annotation
|
||||||
|
if len(anno) == 0:
|
||||||
|
return False
|
||||||
|
# if all boxes have close to zero area, there is no annotation
|
||||||
|
if _has_only_empty_bbox(anno):
|
||||||
|
return False
|
||||||
|
# keypoints task have a slight different critera for considering
|
||||||
|
# if an annotation is valid
|
||||||
|
if "keypoints" not in anno[0]:
|
||||||
|
return True
|
||||||
|
# for keypoint detection tasks, only consider valid images those
|
||||||
|
# containing at least min_keypoints_per_image
|
||||||
|
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
assert isinstance(dataset, torchvision.datasets.CocoDetection)
|
||||||
|
ids = []
|
||||||
|
for ds_idx, img_id in enumerate(dataset.ids):
|
||||||
|
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
|
||||||
|
anno = dataset.coco.loadAnns(ann_ids)
|
||||||
|
if cat_list:
|
||||||
|
anno = [obj for obj in anno if obj["category_id"] in cat_list]
|
||||||
|
if _has_valid_annotation(anno):
|
||||||
|
ids.append(ds_idx)
|
||||||
|
|
||||||
|
dataset = torch.utils.data.Subset(dataset, ids)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_coco_api(ds):
|
||||||
|
coco_ds = COCO()
|
||||||
|
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
|
||||||
|
ann_id = 1
|
||||||
|
dataset = {'images': [], 'categories': [], 'annotations': []}
|
||||||
|
categories = set()
|
||||||
|
for img_idx in range(len(ds)):
|
||||||
|
# find better way to get target
|
||||||
|
# targets = ds.get_annotations(img_idx)
|
||||||
|
img, targets = ds[img_idx]
|
||||||
|
image_id = targets["image_id"].item()
|
||||||
|
img_dict = {}
|
||||||
|
img_dict['id'] = image_id
|
||||||
|
img_dict['height'] = img.shape[-2]
|
||||||
|
img_dict['width'] = img.shape[-1]
|
||||||
|
dataset['images'].append(img_dict)
|
||||||
|
bboxes = targets["boxes"]
|
||||||
|
bboxes[:, 2:] -= bboxes[:, :2]
|
||||||
|
bboxes = bboxes.tolist()
|
||||||
|
labels = targets['labels'].tolist()
|
||||||
|
areas = targets['area'].tolist()
|
||||||
|
iscrowd = targets['iscrowd'].tolist()
|
||||||
|
if 'masks' in targets:
|
||||||
|
masks = targets['masks']
|
||||||
|
# make masks Fortran contiguous for coco_mask
|
||||||
|
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
|
||||||
|
if 'keypoints' in targets:
|
||||||
|
keypoints = targets['keypoints']
|
||||||
|
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
|
||||||
|
num_objs = len(bboxes)
|
||||||
|
for i in range(num_objs):
|
||||||
|
ann = {}
|
||||||
|
ann['image_id'] = image_id
|
||||||
|
ann['bbox'] = bboxes[i]
|
||||||
|
ann['category_id'] = labels[i]
|
||||||
|
categories.add(labels[i])
|
||||||
|
ann['area'] = areas[i]
|
||||||
|
ann['iscrowd'] = iscrowd[i]
|
||||||
|
ann['id'] = ann_id
|
||||||
|
if 'masks' in targets:
|
||||||
|
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
|
||||||
|
if 'keypoints' in targets:
|
||||||
|
ann['keypoints'] = keypoints[i]
|
||||||
|
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
|
||||||
|
dataset['annotations'].append(ann)
|
||||||
|
ann_id += 1
|
||||||
|
dataset['categories'] = [{'id': i} for i in sorted(categories)]
|
||||||
|
coco_ds.dataset = dataset
|
||||||
|
coco_ds.createIndex()
|
||||||
|
return coco_ds
|
||||||
|
|
||||||
|
|
||||||
|
def get_coco_api_from_dataset(dataset):
|
||||||
|
for _ in range(10):
|
||||||
|
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||||
|
break
|
||||||
|
if isinstance(dataset, torch.utils.data.Subset):
|
||||||
|
dataset = dataset.dataset
|
||||||
|
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||||
|
return dataset.coco
|
||||||
|
return convert_to_coco_api(dataset)
|
||||||
|
|
||||||
|
|
||||||
|
class CocoDetection(torchvision.datasets.CocoDetection):
|
||||||
|
def __init__(self, img_folder, ann_file, transforms):
|
||||||
|
super(CocoDetection, self).__init__(img_folder, ann_file)
|
||||||
|
self._transforms = transforms
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
img, target = super(CocoDetection, self).__getitem__(idx)
|
||||||
|
image_id = self.ids[idx]
|
||||||
|
target = dict(image_id=image_id, annotations=target)
|
||||||
|
if self._transforms is not None:
|
||||||
|
img, target = self._transforms(img, target)
|
||||||
|
return img, target
|
||||||
|
|
||||||
|
|
||||||
|
def get_coco(root, image_set, transforms, mode='instances'):
|
||||||
|
anno_file_template = "{}_{}2017.json"
|
||||||
|
PATHS = {
|
||||||
|
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
|
||||||
|
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
|
||||||
|
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
|
||||||
|
}
|
||||||
|
|
||||||
|
t = [ConvertCocoPolysToMask()]
|
||||||
|
|
||||||
|
if transforms is not None:
|
||||||
|
t.append(transforms)
|
||||||
|
transforms = T.Compose(t)
|
||||||
|
|
||||||
|
img_folder, ann_file = PATHS[image_set]
|
||||||
|
img_folder = os.path.join(root, img_folder)
|
||||||
|
ann_file = os.path.join(root, ann_file)
|
||||||
|
|
||||||
|
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
|
||||||
|
|
||||||
|
if image_set == "train":
|
||||||
|
dataset = _coco_remove_images_without_annotations(dataset)
|
||||||
|
|
||||||
|
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
|
||||||
|
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
def get_coco_kp(root, image_set, transforms):
|
||||||
|
return get_coco(root, image_set, transforms, mode="person_keypoints")
|
||||||
BIN
old_files/crap.png
Normal file
BIN
old_files/crap.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 357 KiB |
BIN
old_files/crap2.png
Normal file
BIN
old_files/crap2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 357 KiB |
221
old_files/data.py
Normal file
221
old_files/data.py
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
# %%
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
import json
|
||||||
|
import torch
|
||||||
|
from torchvision import transforms as T
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
||||||
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||||
|
from references.detection import utils, engine
|
||||||
|
import data_aug
|
||||||
|
import bbox_util
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_transform(train):
|
||||||
|
transforms = []
|
||||||
|
transforms.append(T.ToTensor())
|
||||||
|
if train:
|
||||||
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
||||||
|
|
||||||
|
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
|
||||||
|
def create_map(list_in, from_key, to_key):
|
||||||
|
cmap = dict()
|
||||||
|
for l in list_in:
|
||||||
|
cmap[l[from_key]] = l[to_key]
|
||||||
|
return cmap
|
||||||
|
|
||||||
|
|
||||||
|
class iNaturalistDataset(torch.utils.data.Dataset):
|
||||||
|
def __init__(self, validation=False, train=False, transforms = None):
|
||||||
|
|
||||||
|
self.validation = validation
|
||||||
|
self.train = train
|
||||||
|
self.transforms = transforms
|
||||||
|
|
||||||
|
|
||||||
|
if validation:
|
||||||
|
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
|
||||||
|
elif train:
|
||||||
|
json_path = os.path.join(
|
||||||
|
PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(json_path, "r") as rj:
|
||||||
|
f = json.load(rj)
|
||||||
|
|
||||||
|
categories = list()
|
||||||
|
image_info = dict()
|
||||||
|
|
||||||
|
for category in f["categories"]:
|
||||||
|
if category["supercategory"] == "Aves":
|
||||||
|
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
|
||||||
|
print(category['name'])
|
||||||
|
categories.append(category)
|
||||||
|
|
||||||
|
categories = sorted(categories, key=lambda k: k["name"])
|
||||||
|
for idx, cat in enumerate(categories):
|
||||||
|
cat["new_id"] = idx + 1
|
||||||
|
|
||||||
|
orig_to_new_id = create_map(categories, "id", "new_id")
|
||||||
|
|
||||||
|
for annot in f["annotations"]:
|
||||||
|
if annot["category_id"] in orig_to_new_id:
|
||||||
|
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
|
||||||
|
id = annot["image_id"]
|
||||||
|
if id not in image_info:
|
||||||
|
image_info[id] = dict()
|
||||||
|
|
||||||
|
annot["bbox"][2] += annot["bbox"][0]
|
||||||
|
annot["bbox"][3] += annot["bbox"][1]
|
||||||
|
image_info[id]["annotation"] = annot
|
||||||
|
|
||||||
|
for img in f["images"]:
|
||||||
|
id = img["id"]
|
||||||
|
path = os.path.join(PATH_ROOT, img["file_name"])
|
||||||
|
height = img["height"]
|
||||||
|
width = img["width"]
|
||||||
|
if id in image_info:
|
||||||
|
image_info[id].update({"path": path, "height": height, "width": width})
|
||||||
|
|
||||||
|
for idx, (id, im_in) in enumerate(image_info.items()):
|
||||||
|
im_in["idx"] = idx
|
||||||
|
self.images = image_info
|
||||||
|
self.categories = categories
|
||||||
|
self.idx_to_id = [x for x in self.images]
|
||||||
|
self.num_classes = len(self.categories) + 1
|
||||||
|
self.num_samples = len(self.images)
|
||||||
|
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self.num_samples
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
idd = self.idx_to_id[idx]
|
||||||
|
c_image = self.images[idd]
|
||||||
|
img_path = c_image["path"]
|
||||||
|
img = Image.open(img_path).convert("RGB")
|
||||||
|
|
||||||
|
annot = c_image["annotation"]
|
||||||
|
bbox = annot["bbox"]
|
||||||
|
boxes = bbox
|
||||||
|
target = dict()
|
||||||
|
target["boxes"] = torch.as_tensor([boxes])
|
||||||
|
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
||||||
|
target['image_id'] = torch.tensor([annot['image_id']])
|
||||||
|
target['area'] = torch.as_tensor([annot['area']])
|
||||||
|
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
||||||
|
|
||||||
|
|
||||||
|
if self.transforms is not None:
|
||||||
|
img, target = self.transforms(img, target)
|
||||||
|
|
||||||
|
return img, target
|
||||||
|
# %%
|
||||||
|
# v = iNaturalistDataset(validation=True)
|
||||||
|
|
||||||
|
|
||||||
|
# v = iNaturalistDataset(validation= True)
|
||||||
|
# o = v[10]
|
||||||
|
# %%
|
||||||
|
# oimage = t.tensor(o[0]*255, dtype=t.uint8)
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
|
||||||
|
# plt.imshow(ox.permute([1,2,0]))
|
||||||
|
# plt.savefig('crap2.png')
|
||||||
|
|
||||||
|
def get_model(num_classes):
|
||||||
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||||
|
num_classes = 2 # 1 class (person) + background
|
||||||
|
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||||
|
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
import transforms as T
|
||||||
|
|
||||||
|
def get_transform(train):
|
||||||
|
transforms = []
|
||||||
|
transforms.append(T.ToTensor())
|
||||||
|
if train:
|
||||||
|
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||||
|
return T.Compose(transforms)
|
||||||
|
|
||||||
|
from engine import train_one_epoch, evaluate
|
||||||
|
import utils
|
||||||
|
# %%
|
||||||
|
def run():
|
||||||
|
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
|
||||||
|
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
|
||||||
|
|
||||||
|
|
||||||
|
train_data_loader = torch.utils.data.DataLoader(
|
||||||
|
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
|
)
|
||||||
|
val_data_loader = torch.utils.data.DataLoader(
|
||||||
|
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||||
|
)
|
||||||
|
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
num_classes = 2
|
||||||
|
|
||||||
|
|
||||||
|
model = get_model(num_classes)
|
||||||
|
model.to(device)
|
||||||
|
# construct an optimizer
|
||||||
|
params = [p for p in model.parameters() if p.requires_grad]
|
||||||
|
optimizer = torch.optim.SGD(params, lr=0.005,
|
||||||
|
momentum=0.9, weight_decay=0.0005)
|
||||||
|
# and a learning rate scheduler
|
||||||
|
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
|
||||||
|
step_size=3,
|
||||||
|
gamma=0.1)
|
||||||
|
|
||||||
|
# let's train it for 10 epochs
|
||||||
|
num_epochs = 10
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
print(epoch)
|
||||||
|
torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
|
||||||
|
# train for one epoch, printing every 10 iterations
|
||||||
|
engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
|
||||||
|
torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
|
||||||
|
# update the learning rate
|
||||||
|
lr_scheduler.step()
|
||||||
|
torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
|
||||||
|
# evaluate on the test dataset
|
||||||
|
engine.evaluate(model, val_data_loader, device=device)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# # %%
|
||||||
|
# json_path = os.path.join(
|
||||||
|
# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||||
|
# )
|
||||||
|
# with open(json_path, "r") as rj:
|
||||||
|
# f = json.load(rj)
|
||||||
|
|
||||||
|
|
||||||
|
# # %%
|
||||||
|
# image_id: 2358
|
||||||
|
|
||||||
110
old_files/engine.py
Normal file
110
old_files/engine.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
import math
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
|
||||||
|
import torchvision.models.detection.mask_rcnn
|
||||||
|
|
||||||
|
from coco_utils import get_coco_api_from_dataset
|
||||||
|
from coco_eval import CocoEvaluator
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
|
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
|
||||||
|
model.train()
|
||||||
|
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||||
|
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
|
||||||
|
header = 'Epoch: [{}]'.format(epoch)
|
||||||
|
|
||||||
|
lr_scheduler = None
|
||||||
|
if epoch == 0:
|
||||||
|
warmup_factor = 1. / 1000
|
||||||
|
warmup_iters = min(1000, len(data_loader) - 1)
|
||||||
|
|
||||||
|
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
|
||||||
|
|
||||||
|
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
|
||||||
|
images = list(image.to(device) for image in images)
|
||||||
|
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
||||||
|
|
||||||
|
loss_dict = model(images, targets)
|
||||||
|
|
||||||
|
losses = sum(loss for loss in loss_dict.values())
|
||||||
|
|
||||||
|
# reduce losses over all GPUs for logging purposes
|
||||||
|
loss_dict_reduced = utils.reduce_dict(loss_dict)
|
||||||
|
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
|
||||||
|
|
||||||
|
loss_value = losses_reduced.item()
|
||||||
|
|
||||||
|
if not math.isfinite(loss_value):
|
||||||
|
print("Loss is {}, stopping training".format(loss_value))
|
||||||
|
print(loss_dict_reduced)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
losses.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
if lr_scheduler is not None:
|
||||||
|
lr_scheduler.step()
|
||||||
|
|
||||||
|
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
|
||||||
|
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
|
||||||
|
|
||||||
|
return metric_logger
|
||||||
|
|
||||||
|
|
||||||
|
def _get_iou_types(model):
|
||||||
|
model_without_ddp = model
|
||||||
|
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
|
||||||
|
model_without_ddp = model.module
|
||||||
|
iou_types = ["bbox"]
|
||||||
|
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
|
||||||
|
iou_types.append("segm")
|
||||||
|
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
|
||||||
|
iou_types.append("keypoints")
|
||||||
|
return iou_types
|
||||||
|
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def evaluate(model, data_loader, device):
|
||||||
|
n_threads = torch.get_num_threads()
|
||||||
|
# FIXME remove this and make paste_masks_in_image run on the GPU
|
||||||
|
torch.set_num_threads(1)
|
||||||
|
cpu_device = torch.device("cpu")
|
||||||
|
model.eval()
|
||||||
|
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||||
|
header = 'Test:'
|
||||||
|
|
||||||
|
coco = get_coco_api_from_dataset(data_loader.dataset)
|
||||||
|
iou_types = _get_iou_types(model)
|
||||||
|
coco_evaluator = CocoEvaluator(coco, iou_types)
|
||||||
|
|
||||||
|
for images, targets in metric_logger.log_every(data_loader, 100, header):
|
||||||
|
images = list(img.to(device) for img in images)
|
||||||
|
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
model_time = time.time()
|
||||||
|
outputs = model(images)
|
||||||
|
|
||||||
|
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
|
||||||
|
model_time = time.time() - model_time
|
||||||
|
|
||||||
|
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
|
||||||
|
evaluator_time = time.time()
|
||||||
|
coco_evaluator.update(res)
|
||||||
|
evaluator_time = time.time() - evaluator_time
|
||||||
|
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
|
||||||
|
|
||||||
|
# gather the stats from all processes
|
||||||
|
metric_logger.synchronize_between_processes()
|
||||||
|
print("Averaged stats:", metric_logger)
|
||||||
|
coco_evaluator.synchronize_between_processes()
|
||||||
|
|
||||||
|
# accumulate predictions from all images
|
||||||
|
coco_evaluator.accumulate()
|
||||||
|
coco_evaluator.summarize()
|
||||||
|
torch.set_num_threads(n_threads)
|
||||||
|
return coco_evaluator
|
||||||
28
old_files/fine_tuned.py
Normal file
28
old_files/fine_tuned.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||||
|
|
||||||
|
|
||||||
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||||
|
num_classes = 1 # 1 class (person) + background
|
||||||
|
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||||
|
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||||
|
|
||||||
|
import torchvision
|
||||||
|
from torchvision.models.detection import FasterRCNN
|
||||||
|
from torchvision.models.detection.rpn import AnchorGenerator
|
||||||
|
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
|
||||||
|
backbone.out_channels = list(backbone.modules())[-3].out_channels
|
||||||
|
|
||||||
|
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
|
||||||
|
aspect_ratios=((0.5, 1.0, 2.0),))
|
||||||
|
|
||||||
|
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
|
||||||
|
output_size=7,
|
||||||
|
sampling_ratio=2)
|
||||||
|
|
||||||
|
model = FasterRCNN(backbone,
|
||||||
|
num_classes=2,
|
||||||
|
rpn_anchor_generator=anchor_generator,
|
||||||
|
box_roi_pool=roi_pooler)
|
||||||
|
# %%
|
||||||
@@ -15,19 +15,19 @@ sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
|||||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||||
|
|
||||||
|
|
||||||
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 3)
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2)
|
||||||
|
|
||||||
model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_1.pth'))
|
model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_9.pth'))
|
||||||
model.eval()
|
model.eval()
|
||||||
model.to('cuda')
|
model.to('cuda')
|
||||||
|
|
||||||
#img = r'D:\ishan\ml\inaturalist\test2017\00a903fa1d23b2f8f28248e81bc1c4a4.jpg'
|
|
||||||
#img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210617093423.mp4_023.jpg'
|
|
||||||
img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210609095848.mp4_133.jpg'
|
|
||||||
import random
|
import random
|
||||||
rtdir = r'J:\hummingbird_imagenet\hummingbird'
|
rtdir = r'J:\hummingbird_imagenet\hummingbird'
|
||||||
|
|
||||||
ff = os.listdir(rtdir)
|
ff = os.listdir(rtdir)
|
||||||
|
|
||||||
|
# %%
|
||||||
img = os.path.join(rtdir,random.choice(ff))
|
img = os.path.join(rtdir,random.choice(ff))
|
||||||
image = cv2.imread(img)[:,:,::-1].copy()
|
image = cv2.imread(img)[:,:,::-1].copy()
|
||||||
o = T.ToTensor()(image).cuda()
|
o = T.ToTensor()(image).cuda()
|
||||||
@@ -43,7 +43,6 @@ plt.imshow(ox.permute([1,2,0]))
|
|||||||
# %%
|
# %%
|
||||||
from data import iNaturalistDataset
|
from data import iNaturalistDataset
|
||||||
sd = iNaturalistDataset(validation=True)
|
sd = iNaturalistDataset(validation=True)
|
||||||
# # %%
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
233
old_files/train.py
Normal file
233
old_files/train.py
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
r"""PyTorch Detection Training.
|
||||||
|
|
||||||
|
To run in a multi-gpu environment, use the distributed launcher::
|
||||||
|
|
||||||
|
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
|
||||||
|
train.py ... --world-size $NGPU
|
||||||
|
|
||||||
|
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
|
||||||
|
--lr 0.02 --batch-size 2 --world-size 8
|
||||||
|
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
|
||||||
|
|
||||||
|
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
|
||||||
|
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||||
|
|
||||||
|
Also, if you train Keypoint R-CNN, the default hyperparameters are
|
||||||
|
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
|
||||||
|
Because the number of images is smaller in the person keypoint subset of COCO,
|
||||||
|
the number of epochs should be adapted so that we have the same number of iterations.
|
||||||
|
"""
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.utils.data
|
||||||
|
import torchvision
|
||||||
|
import torchvision.models.detection
|
||||||
|
import torchvision.models.detection.mask_rcnn
|
||||||
|
|
||||||
|
from coco_utils import get_coco, get_coco_kp
|
||||||
|
|
||||||
|
from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
|
||||||
|
from engine import train_one_epoch, evaluate
|
||||||
|
|
||||||
|
import presets
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
|
def get_dataset(name, image_set, transform, data_path):
|
||||||
|
paths = {
|
||||||
|
"coco": (data_path, get_coco, 91),
|
||||||
|
"coco_kp": (data_path, get_coco_kp, 2)
|
||||||
|
}
|
||||||
|
p, ds_fn, num_classes = paths[name]
|
||||||
|
|
||||||
|
ds = ds_fn(p, image_set=image_set, transforms=transform)
|
||||||
|
return ds, num_classes
|
||||||
|
|
||||||
|
|
||||||
|
def get_transform(train, data_augmentation):
|
||||||
|
return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
|
||||||
|
|
||||||
|
|
||||||
|
def get_args_parser(add_help=True):
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
|
||||||
|
|
||||||
|
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
|
||||||
|
parser.add_argument('--dataset', default='coco', help='dataset')
|
||||||
|
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
|
||||||
|
parser.add_argument('--device', default='cuda', help='device')
|
||||||
|
parser.add_argument('-b', '--batch-size', default=2, type=int,
|
||||||
|
help='images per gpu, the total batch size is $NGPU x batch_size')
|
||||||
|
parser.add_argument('--epochs', default=26, type=int, metavar='N',
|
||||||
|
help='number of total epochs to run')
|
||||||
|
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||||
|
help='number of data loading workers (default: 4)')
|
||||||
|
parser.add_argument('--lr', default=0.02, type=float,
|
||||||
|
help='initial learning rate, 0.02 is the default value for training '
|
||||||
|
'on 8 gpus and 2 images_per_gpu')
|
||||||
|
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||||
|
help='momentum')
|
||||||
|
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||||
|
metavar='W', help='weight decay (default: 1e-4)',
|
||||||
|
dest='weight_decay')
|
||||||
|
parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
|
||||||
|
parser.add_argument('--lr-step-size', default=8, type=int,
|
||||||
|
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
||||||
|
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
|
||||||
|
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
||||||
|
parser.add_argument('--lr-gamma', default=0.1, type=float,
|
||||||
|
help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
|
||||||
|
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
|
||||||
|
parser.add_argument('--output-dir', default='.', help='path where to save')
|
||||||
|
parser.add_argument('--resume', default='', help='resume from checkpoint')
|
||||||
|
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
|
||||||
|
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
|
||||||
|
parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
|
||||||
|
parser.add_argument('--trainable-backbone-layers', default=None, type=int,
|
||||||
|
help='number of trainable layers of backbone')
|
||||||
|
parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
|
||||||
|
parser.add_argument(
|
||||||
|
"--sync-bn",
|
||||||
|
dest="sync_bn",
|
||||||
|
help="Use sync batch norm",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--test-only",
|
||||||
|
dest="test_only",
|
||||||
|
help="Only test the model",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pretrained",
|
||||||
|
dest="pretrained",
|
||||||
|
help="Use pre-trained models from the modelzoo",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
|
||||||
|
# distributed training parameters
|
||||||
|
parser.add_argument('--world-size', default=1, type=int,
|
||||||
|
help='number of distributed processes')
|
||||||
|
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
if args.output_dir:
|
||||||
|
utils.mkdir(args.output_dir)
|
||||||
|
|
||||||
|
utils.init_distributed_mode(args)
|
||||||
|
print(args)
|
||||||
|
|
||||||
|
device = torch.device(args.device)
|
||||||
|
|
||||||
|
# Data loading code
|
||||||
|
print("Loading data")
|
||||||
|
|
||||||
|
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
|
||||||
|
args.data_path)
|
||||||
|
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
|
||||||
|
|
||||||
|
print("Creating data loaders")
|
||||||
|
if args.distributed:
|
||||||
|
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
||||||
|
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
|
||||||
|
else:
|
||||||
|
train_sampler = torch.utils.data.RandomSampler(dataset)
|
||||||
|
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
|
||||||
|
|
||||||
|
if args.aspect_ratio_group_factor >= 0:
|
||||||
|
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
|
||||||
|
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
|
||||||
|
else:
|
||||||
|
train_batch_sampler = torch.utils.data.BatchSampler(
|
||||||
|
train_sampler, args.batch_size, drop_last=True)
|
||||||
|
|
||||||
|
data_loader = torch.utils.data.DataLoader(
|
||||||
|
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
|
||||||
|
collate_fn=utils.collate_fn)
|
||||||
|
|
||||||
|
data_loader_test = torch.utils.data.DataLoader(
|
||||||
|
dataset_test, batch_size=1,
|
||||||
|
sampler=test_sampler, num_workers=args.workers,
|
||||||
|
collate_fn=utils.collate_fn)
|
||||||
|
|
||||||
|
print("Creating model")
|
||||||
|
kwargs = {
|
||||||
|
"trainable_backbone_layers": args.trainable_backbone_layers
|
||||||
|
}
|
||||||
|
if "rcnn" in args.model:
|
||||||
|
if args.rpn_score_thresh is not None:
|
||||||
|
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
|
||||||
|
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
|
||||||
|
**kwargs)
|
||||||
|
model.to(device)
|
||||||
|
if args.distributed and args.sync_bn:
|
||||||
|
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
||||||
|
|
||||||
|
model_without_ddp = model
|
||||||
|
if args.distributed:
|
||||||
|
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
||||||
|
model_without_ddp = model.module
|
||||||
|
|
||||||
|
params = [p for p in model.parameters() if p.requires_grad]
|
||||||
|
optimizer = torch.optim.SGD(
|
||||||
|
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||||
|
|
||||||
|
args.lr_scheduler = args.lr_scheduler.lower()
|
||||||
|
if args.lr_scheduler == 'multisteplr':
|
||||||
|
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
|
||||||
|
elif args.lr_scheduler == 'cosineannealinglr':
|
||||||
|
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
|
||||||
|
"are supported.".format(args.lr_scheduler))
|
||||||
|
|
||||||
|
if args.resume:
|
||||||
|
checkpoint = torch.load(args.resume, map_location='cpu')
|
||||||
|
model_without_ddp.load_state_dict(checkpoint['model'])
|
||||||
|
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||||
|
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
||||||
|
args.start_epoch = checkpoint['epoch'] + 1
|
||||||
|
|
||||||
|
if args.test_only:
|
||||||
|
evaluate(model, data_loader_test, device=device)
|
||||||
|
return
|
||||||
|
|
||||||
|
print("Start training")
|
||||||
|
start_time = time.time()
|
||||||
|
for epoch in range(args.start_epoch, args.epochs):
|
||||||
|
if args.distributed:
|
||||||
|
train_sampler.set_epoch(epoch)
|
||||||
|
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
|
||||||
|
lr_scheduler.step()
|
||||||
|
if args.output_dir:
|
||||||
|
checkpoint = {
|
||||||
|
'model': model_without_ddp.state_dict(),
|
||||||
|
'optimizer': optimizer.state_dict(),
|
||||||
|
'lr_scheduler': lr_scheduler.state_dict(),
|
||||||
|
'args': args,
|
||||||
|
'epoch': epoch
|
||||||
|
}
|
||||||
|
utils.save_on_master(
|
||||||
|
checkpoint,
|
||||||
|
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
|
||||||
|
utils.save_on_master(
|
||||||
|
checkpoint,
|
||||||
|
os.path.join(args.output_dir, 'checkpoint.pth'))
|
||||||
|
|
||||||
|
# evaluate after every epoch
|
||||||
|
evaluate(model, data_loader_test, device=device)
|
||||||
|
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||||
|
print('Training time {}'.format(total_time_str))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = get_args_parser().parse_args()
|
||||||
|
main(args)
|
||||||
239
old_files/transforms.py
Normal file
239
old_files/transforms.py
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
import torch
|
||||||
|
import torchvision
|
||||||
|
|
||||||
|
from torch import nn, Tensor
|
||||||
|
from torchvision.transforms import functional as F
|
||||||
|
from torchvision.transforms import transforms as T
|
||||||
|
from typing import List, Tuple, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _flip_coco_person_keypoints(kps, width):
|
||||||
|
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||||
|
flipped_data = kps[:, flip_inds]
|
||||||
|
flipped_data[..., 0] = width - flipped_data[..., 0]
|
||||||
|
# Maintain COCO convention that if visibility == 0, then x, y = 0
|
||||||
|
inds = flipped_data[..., 2] == 0
|
||||||
|
flipped_data[inds] = 0
|
||||||
|
return flipped_data
|
||||||
|
|
||||||
|
|
||||||
|
class Compose(object):
|
||||||
|
def __init__(self, transforms):
|
||||||
|
self.transforms = transforms
|
||||||
|
|
||||||
|
def __call__(self, image, target):
|
||||||
|
for t in self.transforms:
|
||||||
|
image, target = t(image, target)
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
class RandomHorizontalFlip(T.RandomHorizontalFlip):
|
||||||
|
def forward(self, image: Tensor,
|
||||||
|
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||||
|
if torch.rand(1) < self.p:
|
||||||
|
image = F.hflip(image)
|
||||||
|
if target is not None:
|
||||||
|
width, _ = F._get_image_size(image)
|
||||||
|
target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
|
||||||
|
if "masks" in target:
|
||||||
|
target["masks"] = target["masks"].flip(-1)
|
||||||
|
if "keypoints" in target:
|
||||||
|
keypoints = target["keypoints"]
|
||||||
|
keypoints = _flip_coco_person_keypoints(keypoints, width)
|
||||||
|
target["keypoints"] = keypoints
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
class ToTensor(nn.Module):
|
||||||
|
def forward(self, image: Tensor,
|
||||||
|
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||||
|
image = F.to_tensor(image)
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
class RandomIoUCrop(nn.Module):
|
||||||
|
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5,
|
||||||
|
max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40):
|
||||||
|
super().__init__()
|
||||||
|
# Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
|
||||||
|
self.min_scale = min_scale
|
||||||
|
self.max_scale = max_scale
|
||||||
|
self.min_aspect_ratio = min_aspect_ratio
|
||||||
|
self.max_aspect_ratio = max_aspect_ratio
|
||||||
|
if sampler_options is None:
|
||||||
|
sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
|
||||||
|
self.options = sampler_options
|
||||||
|
self.trials = trials
|
||||||
|
|
||||||
|
def forward(self, image: Tensor,
|
||||||
|
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||||
|
if target is None:
|
||||||
|
raise ValueError("The targets can't be None for this transform.")
|
||||||
|
|
||||||
|
if isinstance(image, torch.Tensor):
|
||||||
|
if image.ndimension() not in {2, 3}:
|
||||||
|
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||||
|
elif image.ndimension() == 2:
|
||||||
|
image = image.unsqueeze(0)
|
||||||
|
|
||||||
|
orig_w, orig_h = F._get_image_size(image)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# sample an option
|
||||||
|
idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
|
||||||
|
min_jaccard_overlap = self.options[idx]
|
||||||
|
if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
for _ in range(self.trials):
|
||||||
|
# check the aspect ratio limitations
|
||||||
|
r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
|
||||||
|
new_w = int(orig_w * r[0])
|
||||||
|
new_h = int(orig_h * r[1])
|
||||||
|
aspect_ratio = new_w / new_h
|
||||||
|
if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# check for 0 area crops
|
||||||
|
r = torch.rand(2)
|
||||||
|
left = int((orig_w - new_w) * r[0])
|
||||||
|
top = int((orig_h - new_h) * r[1])
|
||||||
|
right = left + new_w
|
||||||
|
bottom = top + new_h
|
||||||
|
if left == right or top == bottom:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# check for any valid boxes with centers within the crop area
|
||||||
|
cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
|
||||||
|
cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
|
||||||
|
is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
|
||||||
|
if not is_within_crop_area.any():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# check at least 1 box with jaccard limitations
|
||||||
|
boxes = target["boxes"][is_within_crop_area]
|
||||||
|
ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]],
|
||||||
|
dtype=boxes.dtype, device=boxes.device))
|
||||||
|
if ious.max() < min_jaccard_overlap:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# keep only valid boxes and perform cropping
|
||||||
|
target["boxes"] = boxes
|
||||||
|
target["labels"] = target["labels"][is_within_crop_area]
|
||||||
|
target["boxes"][:, 0::2] -= left
|
||||||
|
target["boxes"][:, 1::2] -= top
|
||||||
|
target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
|
||||||
|
target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
|
||||||
|
image = F.crop(image, top, left, new_h, new_w)
|
||||||
|
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
class RandomZoomOut(nn.Module):
|
||||||
|
def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5):
|
||||||
|
super().__init__()
|
||||||
|
if fill is None:
|
||||||
|
fill = [0., 0., 0.]
|
||||||
|
self.fill = fill
|
||||||
|
self.side_range = side_range
|
||||||
|
if side_range[0] < 1. or side_range[0] > side_range[1]:
|
||||||
|
raise ValueError("Invalid canvas side range provided {}.".format(side_range))
|
||||||
|
self.p = p
|
||||||
|
|
||||||
|
@torch.jit.unused
|
||||||
|
def _get_fill_value(self, is_pil):
|
||||||
|
# type: (bool) -> int
|
||||||
|
# We fake the type to make it work on JIT
|
||||||
|
return tuple(int(x) for x in self.fill) if is_pil else 0
|
||||||
|
|
||||||
|
def forward(self, image: Tensor,
|
||||||
|
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||||
|
if isinstance(image, torch.Tensor):
|
||||||
|
if image.ndimension() not in {2, 3}:
|
||||||
|
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||||
|
elif image.ndimension() == 2:
|
||||||
|
image = image.unsqueeze(0)
|
||||||
|
|
||||||
|
if torch.rand(1) < self.p:
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
orig_w, orig_h = F._get_image_size(image)
|
||||||
|
|
||||||
|
r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
|
||||||
|
canvas_width = int(orig_w * r)
|
||||||
|
canvas_height = int(orig_h * r)
|
||||||
|
|
||||||
|
r = torch.rand(2)
|
||||||
|
left = int((canvas_width - orig_w) * r[0])
|
||||||
|
top = int((canvas_height - orig_h) * r[1])
|
||||||
|
right = canvas_width - (left + orig_w)
|
||||||
|
bottom = canvas_height - (top + orig_h)
|
||||||
|
|
||||||
|
if torch.jit.is_scripting():
|
||||||
|
fill = 0
|
||||||
|
else:
|
||||||
|
fill = self._get_fill_value(F._is_pil_image(image))
|
||||||
|
|
||||||
|
image = F.pad(image, [left, top, right, bottom], fill=fill)
|
||||||
|
if isinstance(image, torch.Tensor):
|
||||||
|
v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
|
||||||
|
image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \
|
||||||
|
image[..., :, (left + orig_w):] = v
|
||||||
|
|
||||||
|
if target is not None:
|
||||||
|
target["boxes"][:, 0::2] += left
|
||||||
|
target["boxes"][:, 1::2] += top
|
||||||
|
|
||||||
|
return image, target
|
||||||
|
|
||||||
|
|
||||||
|
class RandomPhotometricDistort(nn.Module):
|
||||||
|
def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5),
|
||||||
|
hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5):
|
||||||
|
super().__init__()
|
||||||
|
self._brightness = T.ColorJitter(brightness=brightness)
|
||||||
|
self._contrast = T.ColorJitter(contrast=contrast)
|
||||||
|
self._hue = T.ColorJitter(hue=hue)
|
||||||
|
self._saturation = T.ColorJitter(saturation=saturation)
|
||||||
|
self.p = p
|
||||||
|
|
||||||
|
def forward(self, image: Tensor,
|
||||||
|
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||||
|
if isinstance(image, torch.Tensor):
|
||||||
|
if image.ndimension() not in {2, 3}:
|
||||||
|
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||||
|
elif image.ndimension() == 2:
|
||||||
|
image = image.unsqueeze(0)
|
||||||
|
|
||||||
|
r = torch.rand(7)
|
||||||
|
|
||||||
|
if r[0] < self.p:
|
||||||
|
image = self._brightness(image)
|
||||||
|
|
||||||
|
contrast_before = r[1] < 0.5
|
||||||
|
if contrast_before:
|
||||||
|
if r[2] < self.p:
|
||||||
|
image = self._contrast(image)
|
||||||
|
|
||||||
|
if r[3] < self.p:
|
||||||
|
image = self._saturation(image)
|
||||||
|
|
||||||
|
if r[4] < self.p:
|
||||||
|
image = self._hue(image)
|
||||||
|
|
||||||
|
if not contrast_before:
|
||||||
|
if r[5] < self.p:
|
||||||
|
image = self._contrast(image)
|
||||||
|
|
||||||
|
if r[6] < self.p:
|
||||||
|
channels = F._get_image_num_channels(image)
|
||||||
|
permutation = torch.randperm(channels)
|
||||||
|
|
||||||
|
is_pil = F._is_pil_image(image)
|
||||||
|
if is_pil:
|
||||||
|
image = F.to_tensor(image)
|
||||||
|
image = image[..., permutation, :, :]
|
||||||
|
if is_pil:
|
||||||
|
image = F.to_pil_image(image)
|
||||||
|
|
||||||
|
return image, target
|
||||||
295
old_files/utils.py
Normal file
295
old_files/utils.py
Normal file
@@ -0,0 +1,295 @@
|
|||||||
|
from collections import defaultdict, deque
|
||||||
|
import datetime
|
||||||
|
import errno
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
|
||||||
|
|
||||||
|
class SmoothedValue(object):
|
||||||
|
"""Track a series of values and provide access to smoothed values over a
|
||||||
|
window or the global series average.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, window_size=20, fmt=None):
|
||||||
|
if fmt is None:
|
||||||
|
fmt = "{median:.4f} ({global_avg:.4f})"
|
||||||
|
self.deque = deque(maxlen=window_size)
|
||||||
|
self.total = 0.0
|
||||||
|
self.count = 0
|
||||||
|
self.fmt = fmt
|
||||||
|
|
||||||
|
def update(self, value, n=1):
|
||||||
|
self.deque.append(value)
|
||||||
|
self.count += n
|
||||||
|
self.total += value * n
|
||||||
|
|
||||||
|
def synchronize_between_processes(self):
|
||||||
|
"""
|
||||||
|
Warning: does not synchronize the deque!
|
||||||
|
"""
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return
|
||||||
|
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
|
||||||
|
dist.barrier()
|
||||||
|
dist.all_reduce(t)
|
||||||
|
t = t.tolist()
|
||||||
|
self.count = int(t[0])
|
||||||
|
self.total = t[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def median(self):
|
||||||
|
d = torch.tensor(list(self.deque))
|
||||||
|
return d.median().item()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg(self):
|
||||||
|
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
||||||
|
return d.mean().item()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def global_avg(self):
|
||||||
|
return self.total / self.count
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max(self):
|
||||||
|
return max(self.deque)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self):
|
||||||
|
return self.deque[-1]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.fmt.format(
|
||||||
|
median=self.median,
|
||||||
|
avg=self.avg,
|
||||||
|
global_avg=self.global_avg,
|
||||||
|
max=self.max,
|
||||||
|
value=self.value)
|
||||||
|
|
||||||
|
|
||||||
|
def all_gather(data):
|
||||||
|
"""
|
||||||
|
Run all_gather on arbitrary picklable data (not necessarily tensors)
|
||||||
|
Args:
|
||||||
|
data: any picklable object
|
||||||
|
Returns:
|
||||||
|
list[data]: list of data gathered from each rank
|
||||||
|
"""
|
||||||
|
world_size = get_world_size()
|
||||||
|
if world_size == 1:
|
||||||
|
return [data]
|
||||||
|
data_list = [None] * world_size
|
||||||
|
dist.all_gather_object(data_list, data)
|
||||||
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
|
def reduce_dict(input_dict, average=True):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
input_dict (dict): all the values will be reduced
|
||||||
|
average (bool): whether to do average or sum
|
||||||
|
Reduce the values in the dictionary from all processes so that all processes
|
||||||
|
have the averaged results. Returns a dict with the same fields as
|
||||||
|
input_dict, after reduction.
|
||||||
|
"""
|
||||||
|
world_size = get_world_size()
|
||||||
|
if world_size < 2:
|
||||||
|
return input_dict
|
||||||
|
with torch.no_grad():
|
||||||
|
names = []
|
||||||
|
values = []
|
||||||
|
# sort the keys so that they are consistent across processes
|
||||||
|
for k in sorted(input_dict.keys()):
|
||||||
|
names.append(k)
|
||||||
|
values.append(input_dict[k])
|
||||||
|
values = torch.stack(values, dim=0)
|
||||||
|
dist.all_reduce(values)
|
||||||
|
if average:
|
||||||
|
values /= world_size
|
||||||
|
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||||
|
return reduced_dict
|
||||||
|
|
||||||
|
|
||||||
|
class MetricLogger(object):
|
||||||
|
def __init__(self, delimiter="\t"):
|
||||||
|
self.meters = defaultdict(SmoothedValue)
|
||||||
|
self.delimiter = delimiter
|
||||||
|
|
||||||
|
def update(self, **kwargs):
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
if isinstance(v, torch.Tensor):
|
||||||
|
v = v.item()
|
||||||
|
assert isinstance(v, (float, int))
|
||||||
|
self.meters[k].update(v)
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
if attr in self.meters:
|
||||||
|
return self.meters[attr]
|
||||||
|
if attr in self.__dict__:
|
||||||
|
return self.__dict__[attr]
|
||||||
|
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||||
|
type(self).__name__, attr))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
loss_str = []
|
||||||
|
for name, meter in self.meters.items():
|
||||||
|
loss_str.append(
|
||||||
|
"{}: {}".format(name, str(meter))
|
||||||
|
)
|
||||||
|
return self.delimiter.join(loss_str)
|
||||||
|
|
||||||
|
def synchronize_between_processes(self):
|
||||||
|
for meter in self.meters.values():
|
||||||
|
meter.synchronize_between_processes()
|
||||||
|
|
||||||
|
def add_meter(self, name, meter):
|
||||||
|
self.meters[name] = meter
|
||||||
|
|
||||||
|
def log_every(self, iterable, print_freq, header=None):
|
||||||
|
i = 0
|
||||||
|
if not header:
|
||||||
|
header = ''
|
||||||
|
start_time = time.time()
|
||||||
|
end = time.time()
|
||||||
|
iter_time = SmoothedValue(fmt='{avg:.4f}')
|
||||||
|
data_time = SmoothedValue(fmt='{avg:.4f}')
|
||||||
|
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
log_msg = self.delimiter.join([
|
||||||
|
header,
|
||||||
|
'[{0' + space_fmt + '}/{1}]',
|
||||||
|
'eta: {eta}',
|
||||||
|
'{meters}',
|
||||||
|
'time: {time}',
|
||||||
|
'data: {data}',
|
||||||
|
'max mem: {memory:.0f}'
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
log_msg = self.delimiter.join([
|
||||||
|
header,
|
||||||
|
'[{0' + space_fmt + '}/{1}]',
|
||||||
|
'eta: {eta}',
|
||||||
|
'{meters}',
|
||||||
|
'time: {time}',
|
||||||
|
'data: {data}'
|
||||||
|
])
|
||||||
|
MB = 1024.0 * 1024.0
|
||||||
|
for obj in iterable:
|
||||||
|
data_time.update(time.time() - end)
|
||||||
|
yield obj
|
||||||
|
iter_time.update(time.time() - end)
|
||||||
|
if i % print_freq == 0 or i == len(iterable) - 1:
|
||||||
|
eta_seconds = iter_time.global_avg * (len(iterable) - i)
|
||||||
|
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
print(log_msg.format(
|
||||||
|
i, len(iterable), eta=eta_string,
|
||||||
|
meters=str(self),
|
||||||
|
time=str(iter_time), data=str(data_time),
|
||||||
|
memory=torch.cuda.max_memory_allocated() / MB))
|
||||||
|
else:
|
||||||
|
print(log_msg.format(
|
||||||
|
i, len(iterable), eta=eta_string,
|
||||||
|
meters=str(self),
|
||||||
|
time=str(iter_time), data=str(data_time)))
|
||||||
|
i += 1
|
||||||
|
end = time.time()
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||||
|
print('{} Total time: {} ({:.4f} s / it)'.format(
|
||||||
|
header, total_time_str, total_time / len(iterable)))
|
||||||
|
|
||||||
|
|
||||||
|
def collate_fn(batch):
|
||||||
|
return tuple(zip(*batch))
|
||||||
|
|
||||||
|
|
||||||
|
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
|
||||||
|
|
||||||
|
def f(x):
|
||||||
|
if x >= warmup_iters:
|
||||||
|
return 1
|
||||||
|
alpha = float(x) / warmup_iters
|
||||||
|
return warmup_factor * (1 - alpha) + alpha
|
||||||
|
|
||||||
|
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
|
||||||
|
|
||||||
|
|
||||||
|
def mkdir(path):
|
||||||
|
try:
|
||||||
|
os.makedirs(path)
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def setup_for_distributed(is_master):
|
||||||
|
"""
|
||||||
|
This function disables printing when not in master process
|
||||||
|
"""
|
||||||
|
import builtins as __builtin__
|
||||||
|
builtin_print = __builtin__.print
|
||||||
|
|
||||||
|
def print(*args, **kwargs):
|
||||||
|
force = kwargs.pop('force', False)
|
||||||
|
if is_master or force:
|
||||||
|
builtin_print(*args, **kwargs)
|
||||||
|
|
||||||
|
__builtin__.print = print
|
||||||
|
|
||||||
|
|
||||||
|
def is_dist_avail_and_initialized():
|
||||||
|
if not dist.is_available():
|
||||||
|
return False
|
||||||
|
if not dist.is_initialized():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_world_size():
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return 1
|
||||||
|
return dist.get_world_size()
|
||||||
|
|
||||||
|
|
||||||
|
def get_rank():
|
||||||
|
if not is_dist_avail_and_initialized():
|
||||||
|
return 0
|
||||||
|
return dist.get_rank()
|
||||||
|
|
||||||
|
|
||||||
|
def is_main_process():
|
||||||
|
return get_rank() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def save_on_master(*args, **kwargs):
|
||||||
|
if is_main_process():
|
||||||
|
torch.save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def init_distributed_mode(args):
|
||||||
|
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
||||||
|
args.rank = int(os.environ["RANK"])
|
||||||
|
args.world_size = int(os.environ['WORLD_SIZE'])
|
||||||
|
args.gpu = int(os.environ['LOCAL_RANK'])
|
||||||
|
elif 'SLURM_PROCID' in os.environ:
|
||||||
|
args.rank = int(os.environ['SLURM_PROCID'])
|
||||||
|
args.gpu = args.rank % torch.cuda.device_count()
|
||||||
|
else:
|
||||||
|
print('Not using distributed mode')
|
||||||
|
args.distributed = False
|
||||||
|
return
|
||||||
|
|
||||||
|
args.distributed = True
|
||||||
|
|
||||||
|
torch.cuda.set_device(args.gpu)
|
||||||
|
args.dist_backend = 'nccl'
|
||||||
|
print('| distributed init (rank {}): {}'.format(
|
||||||
|
args.rank, args.dist_url), flush=True)
|
||||||
|
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||||
|
world_size=args.world_size, rank=args.rank)
|
||||||
|
torch.distributed.barrier()
|
||||||
|
setup_for_distributed(args.rank == 0)
|
||||||
233
train.py
233
train.py
@@ -1,233 +0,0 @@
|
|||||||
r"""PyTorch Detection Training.
|
|
||||||
|
|
||||||
To run in a multi-gpu environment, use the distributed launcher::
|
|
||||||
|
|
||||||
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
|
|
||||||
train.py ... --world-size $NGPU
|
|
||||||
|
|
||||||
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
|
|
||||||
--lr 0.02 --batch-size 2 --world-size 8
|
|
||||||
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
|
|
||||||
|
|
||||||
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
|
|
||||||
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
|
|
||||||
|
|
||||||
Also, if you train Keypoint R-CNN, the default hyperparameters are
|
|
||||||
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
|
|
||||||
Because the number of images is smaller in the person keypoint subset of COCO,
|
|
||||||
the number of epochs should be adapted so that we have the same number of iterations.
|
|
||||||
"""
|
|
||||||
import datetime
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
import torchvision
|
|
||||||
import torchvision.models.detection
|
|
||||||
import torchvision.models.detection.mask_rcnn
|
|
||||||
|
|
||||||
from coco_utils import get_coco, get_coco_kp
|
|
||||||
|
|
||||||
from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
|
|
||||||
from engine import train_one_epoch, evaluate
|
|
||||||
|
|
||||||
import presets
|
|
||||||
import utils
|
|
||||||
|
|
||||||
|
|
||||||
def get_dataset(name, image_set, transform, data_path):
|
|
||||||
paths = {
|
|
||||||
"coco": (data_path, get_coco, 91),
|
|
||||||
"coco_kp": (data_path, get_coco_kp, 2)
|
|
||||||
}
|
|
||||||
p, ds_fn, num_classes = paths[name]
|
|
||||||
|
|
||||||
ds = ds_fn(p, image_set=image_set, transforms=transform)
|
|
||||||
return ds, num_classes
|
|
||||||
|
|
||||||
|
|
||||||
def get_transform(train, data_augmentation):
|
|
||||||
return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
|
|
||||||
|
|
||||||
|
|
||||||
def get_args_parser(add_help=True):
|
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
|
|
||||||
|
|
||||||
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
|
|
||||||
parser.add_argument('--dataset', default='coco', help='dataset')
|
|
||||||
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
|
|
||||||
parser.add_argument('--device', default='cuda', help='device')
|
|
||||||
parser.add_argument('-b', '--batch-size', default=2, type=int,
|
|
||||||
help='images per gpu, the total batch size is $NGPU x batch_size')
|
|
||||||
parser.add_argument('--epochs', default=26, type=int, metavar='N',
|
|
||||||
help='number of total epochs to run')
|
|
||||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
|
||||||
help='number of data loading workers (default: 4)')
|
|
||||||
parser.add_argument('--lr', default=0.02, type=float,
|
|
||||||
help='initial learning rate, 0.02 is the default value for training '
|
|
||||||
'on 8 gpus and 2 images_per_gpu')
|
|
||||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
|
||||||
help='momentum')
|
|
||||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
|
||||||
metavar='W', help='weight decay (default: 1e-4)',
|
|
||||||
dest='weight_decay')
|
|
||||||
parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
|
|
||||||
parser.add_argument('--lr-step-size', default=8, type=int,
|
|
||||||
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
|
||||||
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
|
|
||||||
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
|
||||||
parser.add_argument('--lr-gamma', default=0.1, type=float,
|
|
||||||
help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
|
|
||||||
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
|
|
||||||
parser.add_argument('--output-dir', default='.', help='path where to save')
|
|
||||||
parser.add_argument('--resume', default='', help='resume from checkpoint')
|
|
||||||
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
|
|
||||||
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
|
|
||||||
parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
|
|
||||||
parser.add_argument('--trainable-backbone-layers', default=None, type=int,
|
|
||||||
help='number of trainable layers of backbone')
|
|
||||||
parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
|
|
||||||
parser.add_argument(
|
|
||||||
"--sync-bn",
|
|
||||||
dest="sync_bn",
|
|
||||||
help="Use sync batch norm",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--test-only",
|
|
||||||
dest="test_only",
|
|
||||||
help="Only test the model",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--pretrained",
|
|
||||||
dest="pretrained",
|
|
||||||
help="Use pre-trained models from the modelzoo",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
|
|
||||||
# distributed training parameters
|
|
||||||
parser.add_argument('--world-size', default=1, type=int,
|
|
||||||
help='number of distributed processes')
|
|
||||||
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
|
|
||||||
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
if args.output_dir:
|
|
||||||
utils.mkdir(args.output_dir)
|
|
||||||
|
|
||||||
utils.init_distributed_mode(args)
|
|
||||||
print(args)
|
|
||||||
|
|
||||||
device = torch.device(args.device)
|
|
||||||
|
|
||||||
# Data loading code
|
|
||||||
print("Loading data")
|
|
||||||
|
|
||||||
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
|
|
||||||
args.data_path)
|
|
||||||
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
|
|
||||||
|
|
||||||
print("Creating data loaders")
|
|
||||||
if args.distributed:
|
|
||||||
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
|
||||||
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
|
|
||||||
else:
|
|
||||||
train_sampler = torch.utils.data.RandomSampler(dataset)
|
|
||||||
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
|
|
||||||
|
|
||||||
if args.aspect_ratio_group_factor >= 0:
|
|
||||||
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
|
|
||||||
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
|
|
||||||
else:
|
|
||||||
train_batch_sampler = torch.utils.data.BatchSampler(
|
|
||||||
train_sampler, args.batch_size, drop_last=True)
|
|
||||||
|
|
||||||
data_loader = torch.utils.data.DataLoader(
|
|
||||||
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
|
|
||||||
collate_fn=utils.collate_fn)
|
|
||||||
|
|
||||||
data_loader_test = torch.utils.data.DataLoader(
|
|
||||||
dataset_test, batch_size=1,
|
|
||||||
sampler=test_sampler, num_workers=args.workers,
|
|
||||||
collate_fn=utils.collate_fn)
|
|
||||||
|
|
||||||
print("Creating model")
|
|
||||||
kwargs = {
|
|
||||||
"trainable_backbone_layers": args.trainable_backbone_layers
|
|
||||||
}
|
|
||||||
if "rcnn" in args.model:
|
|
||||||
if args.rpn_score_thresh is not None:
|
|
||||||
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
|
|
||||||
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
|
|
||||||
**kwargs)
|
|
||||||
model.to(device)
|
|
||||||
if args.distributed and args.sync_bn:
|
|
||||||
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
|
||||||
|
|
||||||
model_without_ddp = model
|
|
||||||
if args.distributed:
|
|
||||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
|
||||||
model_without_ddp = model.module
|
|
||||||
|
|
||||||
params = [p for p in model.parameters() if p.requires_grad]
|
|
||||||
optimizer = torch.optim.SGD(
|
|
||||||
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
|
||||||
|
|
||||||
args.lr_scheduler = args.lr_scheduler.lower()
|
|
||||||
if args.lr_scheduler == 'multisteplr':
|
|
||||||
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
|
|
||||||
elif args.lr_scheduler == 'cosineannealinglr':
|
|
||||||
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
|
|
||||||
else:
|
|
||||||
raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
|
|
||||||
"are supported.".format(args.lr_scheduler))
|
|
||||||
|
|
||||||
if args.resume:
|
|
||||||
checkpoint = torch.load(args.resume, map_location='cpu')
|
|
||||||
model_without_ddp.load_state_dict(checkpoint['model'])
|
|
||||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
|
||||||
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
|
||||||
args.start_epoch = checkpoint['epoch'] + 1
|
|
||||||
|
|
||||||
if args.test_only:
|
|
||||||
evaluate(model, data_loader_test, device=device)
|
|
||||||
return
|
|
||||||
|
|
||||||
print("Start training")
|
|
||||||
start_time = time.time()
|
|
||||||
for epoch in range(args.start_epoch, args.epochs):
|
|
||||||
if args.distributed:
|
|
||||||
train_sampler.set_epoch(epoch)
|
|
||||||
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
|
|
||||||
lr_scheduler.step()
|
|
||||||
if args.output_dir:
|
|
||||||
checkpoint = {
|
|
||||||
'model': model_without_ddp.state_dict(),
|
|
||||||
'optimizer': optimizer.state_dict(),
|
|
||||||
'lr_scheduler': lr_scheduler.state_dict(),
|
|
||||||
'args': args,
|
|
||||||
'epoch': epoch
|
|
||||||
}
|
|
||||||
utils.save_on_master(
|
|
||||||
checkpoint,
|
|
||||||
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
|
|
||||||
utils.save_on_master(
|
|
||||||
checkpoint,
|
|
||||||
os.path.join(args.output_dir, 'checkpoint.pth'))
|
|
||||||
|
|
||||||
# evaluate after every epoch
|
|
||||||
evaluate(model, data_loader_test, device=device)
|
|
||||||
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
|
||||||
print('Training time {}'.format(total_time_str))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
args = get_args_parser().parse_args()
|
|
||||||
main(args)
|
|
||||||
|
|||||||
1
vision
Submodule
1
vision
Submodule
Submodule vision added at a83b9a17e4
Reference in New Issue
Block a user