diff --git a/.gitignore b/.gitignore index 936da7d..b845ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.pth +example diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..324d91c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "black" +} \ No newline at end of file diff --git a/__pycache__/coco_eval.cpython-39.pyc b/__pycache__/coco_eval.cpython-39.pyc new file mode 100644 index 0000000..b0ed73a Binary files /dev/null and b/__pycache__/coco_eval.cpython-39.pyc differ diff --git a/__pycache__/coco_utils.cpython-39.pyc b/__pycache__/coco_utils.cpython-39.pyc new file mode 100644 index 0000000..d3730f1 Binary files /dev/null and b/__pycache__/coco_utils.cpython-39.pyc differ diff --git a/__pycache__/data.cpython-39.pyc b/__pycache__/data.cpython-39.pyc new file mode 100644 index 0000000..e8f6917 Binary files /dev/null and b/__pycache__/data.cpython-39.pyc differ diff --git a/__pycache__/engine.cpython-39.pyc b/__pycache__/engine.cpython-39.pyc new file mode 100644 index 0000000..0b7cfdc Binary files /dev/null and b/__pycache__/engine.cpython-39.pyc differ diff --git a/__pycache__/transforms.cpython-39.pyc b/__pycache__/transforms.cpython-39.pyc new file mode 100644 index 0000000..138d151 Binary files /dev/null and b/__pycache__/transforms.cpython-39.pyc differ diff --git a/__pycache__/utils.cpython-39.pyc b/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..0efb69b Binary files /dev/null and b/__pycache__/utils.cpython-39.pyc differ diff --git a/data.py b/data.py index bfcb7e1..8d00650 100644 --- a/data.py +++ b/data.py @@ -1,18 +1,19 @@ # %% +import os +import numpy as np +import torch +from PIL import Image import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor -from collections import defaultdict as ddict +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor import json import torch -from torchvision import datasets, transforms as T -import cv2 +from torchvision import transforms as T import numpy as np import os import sys - - - sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") from references.detection import utils, engine @@ -28,9 +29,6 @@ def get_transform(train): transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms) - -IMAGE_MEAN = [0.485, 0.456, 0.406] -IMAGE_STD = [0.229, 0.224, 0.225] PATH_ROOT = r"D:\ishan\ml\inaturalist\\" device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') @@ -43,12 +41,12 @@ def create_map(list_in, from_key, to_key): class iNaturalistDataset(torch.utils.data.Dataset): - def __init__(self, validation=False, train=False): + def __init__(self, validation=False, train=False, transforms = None): self.validation = validation self.train = train + self.transforms = transforms - self.transforms = T.Compose([T.Resize(600, max_size=1024), T.ToTensor()]) if validation: json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json") @@ -65,7 +63,7 @@ class iNaturalistDataset(torch.utils.data.Dataset): for category in f["categories"]: if category["supercategory"] == "Aves": - if category['name'] in ['Archilochus colubris','Icterus galbula']: + if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']: print(category['name']) categories.append(category) @@ -101,44 +99,35 @@ class iNaturalistDataset(torch.utils.data.Dataset): self.idx_to_id = [x for x in self.images] self.num_classes = len(self.categories) + 1 self.num_samples = len(self.images) - self.transforms = [ - data_aug.RandomHorizontalFlip(0.5), - data_aug.Resize(600), - ] - self.pre_transform = T.Compose([T.ToTensor()])#],T.Normalize(mean=[0.485, 0.456, 0.406], - #std=[0.229, 0.224, 0.225])]) + def __len__(self): return self.num_samples - def transform(self, img, bbox): - - for x in self.transforms: - img, bbox = x(img, bbox) - img = self.pre_transform(img) - return img, bbox - def __getitem__(self, idx): idd = self.idx_to_id[idx] c_image = self.images[idd] - # print(c_image, idx, self.validation, self.train) - # breakpoint() - image = np.asarray(cv2.imread(c_image["path"])[:,:,::-1].copy(),dtype=np.float32) + img_path = c_image["path"] + img = Image.open(img_path).convert("RGB") + annot = c_image["annotation"] bbox = annot["bbox"] - bbox.append(annot["new_category_id"]) - bbox = np.asarray([bbox], dtype=np.float32) - - image, bbox = self.transform(image.copy(), bbox.copy()) - boxes = torch.as_tensor(bbox[:,:4], dtype=torch.float32) + boxes = bbox target = dict() - target["boxes"] = boxes + target["boxes"] = torch.as_tensor([boxes]) target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64) target['image_id'] = torch.tensor([annot['image_id']]) - target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) + target['area'] = torch.as_tensor([annot['area']]) target['iscrowd'] = torch.zeros((1,), dtype=torch.int64) - return image, target + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target +# %% +# v = iNaturalistDataset(validation=True) + # v = iNaturalistDataset(validation= True) # o = v[10] @@ -149,24 +138,45 @@ class iNaturalistDataset(torch.utils.data.Dataset): # plt.imshow(ox.permute([1,2,0])) # plt.savefig('crap2.png') +def get_model(num_classes): + model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) + num_classes = 2 # 1 class (person) + background + in_features = model.roi_heads.box_predictor.cls_score.in_features + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + return model + + +import transforms as T + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + +from engine import train_one_epoch, evaluate +import utils # %% def run(): - val_dataset = iNaturalistDataset(validation=True) - train_dataset = iNaturalistDataset(train=True) + val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True)) + train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False)) train_data_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn + train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn ) val_data_loader = torch.utils.data.DataLoader( - val_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn - ) - model = torchvision.models.detection.fasterrcnn_resnet50_fpn( - pretrained=True, num_classes=train_dataset.num_classes, progress=True + val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn ) + import torchvision + from torchvision.models.detection.faster_rcnn import FastRCNNPredictor + num_classes = 2 + + + model = get_model(num_classes) model.to(device) - # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, diff --git a/data_aug/__pycache__/bbox_util.cpython-39.pyc b/data_aug/__pycache__/bbox_util.cpython-39.pyc new file mode 100644 index 0000000..6396696 Binary files /dev/null and b/data_aug/__pycache__/bbox_util.cpython-39.pyc differ diff --git a/data_aug/__pycache__/data_aug.cpython-39.pyc b/data_aug/__pycache__/data_aug.cpython-39.pyc new file mode 100644 index 0000000..9bd9a79 Binary files /dev/null and b/data_aug/__pycache__/data_aug.cpython-39.pyc differ diff --git a/data_aug/bbox_util.py b/data_aug/bbox_util.py new file mode 100644 index 0000000..67ab648 --- /dev/null +++ b/data_aug/bbox_util.py @@ -0,0 +1,300 @@ +import cv2 +import numpy as np + + +def draw_rect(im, cords, color = None): + """Draw the rectangle on the image + + Parameters + ---------- + + im : numpy.ndarray + numpy image + + cords: numpy.ndarray + Numpy array containing bounding boxes of shape `N X 4` where N is the + number of bounding boxes and the bounding boxes are represented in the + format `x1 y1 x2 y2` + + Returns + ------- + + numpy.ndarray + numpy image with bounding boxes drawn on it + + """ + + im = im.copy() + + cords = cords[:,:4] + cords = cords.reshape(-1,4) + if not color: + color = [255,255,255] + for cord in cords: + + pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3]) + + pt1 = int(pt1[0]), int(pt1[1]) + pt2 = int(pt2[0]), int(pt2[1]) + + im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200)) + return im + +def bbox_area(bbox): + return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1]) + +def clip_box(bbox, clip_box, alpha): + """Clip the bounding boxes to the borders of an image + + Parameters + ---------- + + bbox: numpy.ndarray + Numpy array containing bounding boxes of shape `N X 4` where N is the + number of bounding boxes and the bounding boxes are represented in the + format `x1 y1 x2 y2` + + clip_box: numpy.ndarray + An array of shape (4,) specifying the diagonal co-ordinates of the image + The coordinates are represented in the format `x1 y1 x2 y2` + + alpha: float + If the fraction of a bounding box left in the image after being clipped is + less than `alpha` the bounding box is dropped. + + Returns + ------- + + numpy.ndarray + Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the + number of bounding boxes left are being clipped and the bounding boxes are represented in the + format `x1 y1 x2 y2` + + """ + ar_ = (bbox_area(bbox)) + x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1) + y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1) + x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1) + y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1) + + bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:])) + + delta_area = ((ar_ - bbox_area(bbox))/ar_) + + mask = (delta_area < (1 - alpha)).astype(int) + + bbox = bbox[mask == 1,:] + + + return bbox + + +def rotate_im(image, angle): + """Rotate the image. + + Rotate the image such that the rotated image is enclosed inside the tightest + rectangle. The area not occupied by the pixels of the original image is colored + black. + + Parameters + ---------- + + image : numpy.ndarray + numpy image + + angle : float + angle by which the image is to be rotated + + Returns + ------- + + numpy.ndarray + Rotated Image + + """ + # grab the dimensions of the image and then determine the + # centre + (h, w) = image.shape[:2] + (cX, cY) = (w // 2, h // 2) + + # grab the rotation matrix (applying the negative of the + # angle to rotate clockwise), then grab the sine and cosine + # (i.e., the rotation components of the matrix) + M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0) + cos = np.abs(M[0, 0]) + sin = np.abs(M[0, 1]) + + # compute the new bounding dimensions of the image + nW = int((h * sin) + (w * cos)) + nH = int((h * cos) + (w * sin)) + + # adjust the rotation matrix to take into account translation + M[0, 2] += (nW / 2) - cX + M[1, 2] += (nH / 2) - cY + + # perform the actual rotation and return the image + image = cv2.warpAffine(image, M, (nW, nH)) + +# image = cv2.resize(image, (w,h)) + return image + +def get_corners(bboxes): + + """Get corners of bounding boxes + + Parameters + ---------- + + bboxes: numpy.ndarray + Numpy array containing bounding boxes of shape `N X 4` where N is the + number of bounding boxes and the bounding boxes are represented in the + format `x1 y1 x2 y2` + + returns + ------- + + numpy.ndarray + Numpy array of shape `N x 8` containing N bounding boxes each described by their + corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` + + """ + width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1) + height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1) + + x1 = bboxes[:,0].reshape(-1,1) + y1 = bboxes[:,1].reshape(-1,1) + + x2 = x1 + width + y2 = y1 + + x3 = x1 + y3 = y1 + height + + x4 = bboxes[:,2].reshape(-1,1) + y4 = bboxes[:,3].reshape(-1,1) + + corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4)) + + return corners + +def rotate_box(corners,angle, cx, cy, h, w): + + """Rotate the bounding box. + + + Parameters + ---------- + + corners : numpy.ndarray + Numpy array of shape `N x 8` containing N bounding boxes each described by their + corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` + + angle : float + angle by which the image is to be rotated + + cx : int + x coordinate of the center of image (about which the box will be rotated) + + cy : int + y coordinate of the center of image (about which the box will be rotated) + + h : int + height of the image + + w : int + width of the image + + Returns + ------- + + numpy.ndarray + Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their + corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` + """ + + corners = corners.reshape(-1,2) + corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0])))) + + M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) + + + cos = np.abs(M[0, 0]) + sin = np.abs(M[0, 1]) + + nW = int((h * sin) + (w * cos)) + nH = int((h * cos) + (w * sin)) + # adjust the rotation matrix to take into account translation + M[0, 2] += (nW / 2) - cx + M[1, 2] += (nH / 2) - cy + # Prepare the vector to be transformed + calculated = np.dot(M,corners.T).T + + calculated = calculated.reshape(-1,8) + + return calculated + + +def get_enclosing_box(corners): + """Get an enclosing box for ratated corners of a bounding box + + Parameters + ---------- + + corners : numpy.ndarray + Numpy array of shape `N x 8` containing N bounding boxes each described by their + corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4` + + Returns + ------- + + numpy.ndarray + Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the + number of bounding boxes and the bounding boxes are represented in the + format `x1 y1 x2 y2` + + """ + x_ = corners[:,[0,2,4,6]] + y_ = corners[:,[1,3,5,7]] + + xmin = np.min(x_,1).reshape(-1,1) + ymin = np.min(y_,1).reshape(-1,1) + xmax = np.max(x_,1).reshape(-1,1) + ymax = np.max(y_,1).reshape(-1,1) + + final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:])) + + return final + + +def letterbox_image(img, inp_dim): + '''resize image with unchanged aspect ratio using padding + + Parameters + ---------- + + img : numpy.ndarray + Image + + inp_dim: tuple(int) + shape of the reszied image + + Returns + ------- + + numpy.ndarray: + Resized image + + ''' + + inp_dim = (inp_dim, inp_dim) + img_w, img_h = img.shape[1], img.shape[0] + w, h = inp_dim + new_w = int(img_w * min(w/img_w, h/img_h)) + new_h = int(img_h * min(w/img_w, h/img_h)) + resized_image = cv2.resize(img, (new_w,new_h)) + + canvas = np.full((inp_dim[1], inp_dim[0], 3), 0) + + canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image + + return canvas \ No newline at end of file diff --git a/data_aug/data_aug.py b/data_aug/data_aug.py new file mode 100644 index 0000000..9b5262b --- /dev/null +++ b/data_aug/data_aug.py @@ -0,0 +1,856 @@ +import random +import numpy as np +import cv2 +import matplotlib.pyplot as plt +import sys +import os +from bbox_util import * + +lib_path = os.path.join(os.path.realpath("."), "data_aug") +sys.path.append(lib_path) + + +class RandomHorizontalFlip(object): + + """Randomly horizontally flips the Image with the probability *p* + + Parameters + ---------- + p: float + The probability with which the image is flipped + + + Returns + ------- + + numpy.ndaaray + Flipped image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img, bboxes): + img_center = np.array(img.shape[:2])[::-1]/2 + img_center = np.hstack((img_center, img_center)) + if random.random() < self.p: + img = img[:, ::-1, :] + bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]]) + + box_w = abs(bboxes[:, 0] - bboxes[:, 2]) + + bboxes[:, 0] -= box_w + bboxes[:, 2] += box_w + + return img, bboxes + + +class HorizontalFlip(object): + + """Randomly horizontally flips the Image with the probability *p* + + Parameters + ---------- + p: float + The probability with which the image is flipped + + + Returns + ------- + + numpy.ndaaray + Flipped image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self): + pass + + def __call__(self, img, bboxes): + img_center = np.array(img.shape[:2])[::-1]/2 + img_center = np.hstack((img_center, img_center)) + + img = img[:, ::-1, :] + bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]]) + + box_w = abs(bboxes[:, 0] - bboxes[:, 2]) + + bboxes[:, 0] -= box_w + bboxes[:, 2] += box_w + + return img, bboxes + + +class RandomScale(object): + """Randomly scales an image + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + scale: float or tuple(float) + if **float**, the image is scaled by a factor drawn + randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**, + the `scale` is drawn randomly from values specified by the + tuple + + Returns + ------- + + numpy.ndaaray + Scaled image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, scale = 0.2, diff = False): + self.scale = scale + + + if type(self.scale) == tuple: + assert len(self.scale) == 2, "Invalid range" + assert self.scale[0] > -1, "Scale factor can't be less than -1" + assert self.scale[1] > -1, "Scale factor can't be less than -1" + else: + assert self.scale > 0, "Please input a positive float" + self.scale = (max(-1, -self.scale), self.scale) + + self.diff = diff + + + + def __call__(self, img, bboxes): + + + #Chose a random digit to scale by + + img_shape = img.shape + + if self.diff: + scale_x = random.uniform(*self.scale) + scale_y = random.uniform(*self.scale) + else: + scale_x = random.uniform(*self.scale) + scale_y = scale_x + + + + resize_scale_x = 1 + scale_x + resize_scale_y = 1 + scale_y + + img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y) + + bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y] + + + + canvas = np.zeros(img_shape, dtype = np.uint8) + + y_lim = int(min(resize_scale_y,1)*img_shape[0]) + x_lim = int(min(resize_scale_x,1)*img_shape[1]) + + + canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:] + + img = canvas + bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25) + + + return img, bboxes + + +class Scale(object): + """Scales the image + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + + Parameters + ---------- + scale_x: float + The factor by which the image is scaled horizontally + + scale_y: float + The factor by which the image is scaled vertically + + Returns + ------- + + numpy.ndaaray + Scaled image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, scale_x = 0.2, scale_y = 0.2): + self.scale_x = scale_x + self.scale_y = scale_y + + + def __call__(self, img, bboxes): + + + #Chose a random digit to scale by + + img_shape = img.shape + + + resize_scale_x = 1 + self.scale_x + resize_scale_y = 1 + self.scale_y + + img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y) + + bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y] + + + + canvas = np.zeros(img_shape, dtype = np.uint8) + + y_lim = int(min(resize_scale_y,1)*img_shape[0]) + x_lim = int(min(resize_scale_x,1)*img_shape[1]) + + + canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:] + + img = canvas + bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25) + + + return img, bboxes + + +class RandomTranslate(object): + """Randomly Translates the image + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + translate: float or tuple(float) + if **float**, the image is translated by a factor drawn + randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**, + `translate` is drawn randomly from values specified by the + tuple + + Returns + ------- + + numpy.ndaaray + Translated image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, translate = 0.2, diff = False): + self.translate = translate + + if type(self.translate) == tuple: + assert len(self.translate) == 2, "Invalid range" + assert self.translate[0] > 0 & self.translate[0] < 1 + assert self.translate[1] > 0 & self.translate[1] < 1 + + + else: + assert self.translate > 0 and self.translate < 1 + self.translate = (-self.translate, self.translate) + + + self.diff = diff + + def __call__(self, img, bboxes): + #Chose a random digit to scale by + img_shape = img.shape + + #translate the image + + #percentage of the dimension of the image to translate + translate_factor_x = random.uniform(*self.translate) + translate_factor_y = random.uniform(*self.translate) + + if not self.diff: + translate_factor_y = translate_factor_x + + canvas = np.zeros(img_shape).astype(np.uint8) + + + corner_x = int(translate_factor_x*img.shape[1]) + corner_y = int(translate_factor_y*img.shape[0]) + + + + #change the origin to the top-left corner of the translated box + orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])] + + + + + mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:] + canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask + img = canvas + + bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y] + + + bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25) + + + + + + return img, bboxes + + +class Translate(object): + """Randomly Translates the image + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + translate: float or tuple(float) + if **float**, the image is translated by a factor drawn + randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**, + `translate` is drawn randomly from values specified by the + tuple + + Returns + ------- + + numpy.ndaaray + Translated image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False): + self.translate_x = translate_x + self.translate_y = translate_y + + assert self.translate_x > 0 and self.translate_x < 1 + assert self.translate_y > 0 and self.translate_y < 1 + + + def __call__(self, img, bboxes): + #Chose a random digit to scale by + img_shape = img.shape + + #translate the image + + #percentage of the dimension of the image to translate + translate_factor_x = self.translate_x + translate_factor_y = self.translate_y + + + canvas = np.zeros(img_shape).astype(np.uint8) + + + #get the top-left corner co-ordinates of the shifted box + corner_x = int(translate_factor_x*img.shape[1]) + corner_y = int(translate_factor_y*img.shape[0]) + + + + #change the origin to the top-left corner of the translated box + orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])] + + + + + mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:] + canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask + img = canvas + + bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y] + + + bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25) + + + + + + return img, bboxes + + +class RandomRotate(object): + """Randomly rotates an image + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + angle: float or tuple(float) + if **float**, the image is rotated by a factor drawn + randomly from a range (-`angle`, `angle`). If **tuple**, + the `angle` is drawn randomly from values specified by the + tuple + + Returns + ------- + + numpy.ndaaray + Rotated image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, angle = 10): + self.angle = angle + + if type(self.angle) == tuple: + assert len(self.angle) == 2, "Invalid range" + + else: + self.angle = (-self.angle, self.angle) + + def __call__(self, img, bboxes): + + angle = random.uniform(*self.angle) + + w,h = img.shape[1], img.shape[0] + cx, cy = w//2, h//2 + + img = rotate_im(img, angle) + + corners = get_corners(bboxes) + + corners = np.hstack((corners, bboxes[:,4:])) + + + corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w) + + new_bbox = get_enclosing_box(corners) + + + scale_factor_x = img.shape[1] / w + + scale_factor_y = img.shape[0] / h + + img = cv2.resize(img, (w,h)) + + new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] + + bboxes = new_bbox + + bboxes = clip_box(bboxes, [0,0,w, h], 0.25) + + return img, bboxes + + +class Rotate(object): + """Rotates an image + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + angle: float + The angle by which the image is to be rotated + + + Returns + ------- + + numpy.ndaaray + Rotated image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, angle): + self.angle = angle + + + def __call__(self, img, bboxes): + """ + Args: + img (PIL Image): Image to be flipped. + + Returns: + PIL Image: Randomly flipped image. + + + """ + + angle = self.angle + print(self.angle) + + w,h = img.shape[1], img.shape[0] + cx, cy = w//2, h//2 + + corners = get_corners(bboxes) + + corners = np.hstack((corners, bboxes[:,4:])) + + img = rotate_im(img, angle) + + corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w) + + + + + new_bbox = get_enclosing_box(corners) + + + scale_factor_x = img.shape[1] / w + + scale_factor_y = img.shape[0] / h + + img = cv2.resize(img, (w,h)) + + new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] + + + bboxes = new_bbox + + bboxes = clip_box(bboxes, [0,0,w, h], 0.25) + + return img, bboxes + + + +class RandomShear(object): + """Randomly shears an image in horizontal direction + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + shear_factor: float or tuple(float) + if **float**, the image is sheared horizontally by a factor drawn + randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**, + the `shear_factor` is drawn randomly from values specified by the + tuple + + Returns + ------- + + numpy.ndaaray + Sheared image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, shear_factor = 0.2): + self.shear_factor = shear_factor + + if type(self.shear_factor) == tuple: + assert len(self.shear_factor) == 2, "Invalid range for scaling factor" + else: + self.shear_factor = (-self.shear_factor, self.shear_factor) + + shear_factor = random.uniform(*self.shear_factor) + + def __call__(self, img, bboxes): + + shear_factor = random.uniform(*self.shear_factor) + + w,h = img.shape[1], img.shape[0] + + if shear_factor < 0: + img, bboxes = HorizontalFlip()(img, bboxes) + + M = np.array([[1, abs(shear_factor), 0],[0,1,0]]) + + nW = img.shape[1] + abs(shear_factor*img.shape[0]) + + bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int) + + + img = cv2.warpAffine(img, M, (int(nW), img.shape[0])) + + if shear_factor < 0: + img, bboxes = HorizontalFlip()(img, bboxes) + + img = cv2.resize(img, (w,h)) + + scale_factor_x = nW / w + + bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] + + + return img, bboxes + +class Shear(object): + """Shears an image in horizontal direction + + + Bounding boxes which have an area of less than 25% in the remaining in the + transformed image is dropped. The resolution is maintained, and the remaining + area if any is filled by black color. + + Parameters + ---------- + shear_factor: float + Factor by which the image is sheared in the x-direction + + Returns + ------- + + numpy.ndaaray + Sheared image in the numpy format of shape `HxWxC` + + numpy.ndarray + Tranformed bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, shear_factor = 0.2): + self.shear_factor = shear_factor + + + def __call__(self, img, bboxes): + + shear_factor = self.shear_factor + if shear_factor < 0: + img, bboxes = HorizontalFlip()(img, bboxes) + + + M = np.array([[1, abs(shear_factor), 0],[0,1,0]]) + + nW = img.shape[1] + abs(shear_factor*img.shape[0]) + + bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int) + + + img = cv2.warpAffine(img, M, (int(nW), img.shape[0])) + + if shear_factor < 0: + img, bboxes = HorizontalFlip()(img, bboxes) + + + return img, bboxes + +class Resize(object): + """Resize the image in accordance to `image_letter_box` function in darknet + + The aspect ratio is maintained. The longer side is resized to the input + size of the network, while the remaining space on the shorter side is filled + with black color. **This should be the last transform** + + + Parameters + ---------- + inp_dim : tuple(int) + tuple containing the size to which the image will be resized. + + Returns + ------- + + numpy.ndaaray + Sheared image in the numpy format of shape `HxWxC` + + numpy.ndarray + Resized bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, inp_dim): + self.inp_dim = inp_dim + + def __call__(self, img, bboxes): + w,h = img.shape[1], img.shape[0] + img = letterbox_image(img, self.inp_dim) + + + scale = min(self.inp_dim/h, self.inp_dim/w) + bboxes[:,:4] *= (scale) + + new_w = scale*w + new_h = scale*h + inp_dim = self.inp_dim + + del_h = (inp_dim - new_h)/2 + del_w = (inp_dim - new_w)/2 + + add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int) + + bboxes[:,:4] += add_matrix + + img = img.astype(np.uint8) + + return img, bboxes + + +class RandomHSV(object): + """HSV Transform to vary hue saturation and brightness + + Hue has a range of 0-179 + Saturation and Brightness have a range of 0-255. + Chose the amount you want to change thhe above quantities accordingly. + + + + + Parameters + ---------- + hue : None or int or tuple (int) + If None, the hue of the image is left unchanged. If int, + a random int is uniformly sampled from (-hue, hue) and added to the + hue of the image. If tuple, the int is sampled from the range + specified by the tuple. + + saturation : None or int or tuple(int) + If None, the saturation of the image is left unchanged. If int, + a random int is uniformly sampled from (-saturation, saturation) + and added to the hue of the image. If tuple, the int is sampled + from the range specified by the tuple. + + brightness : None or int or tuple(int) + If None, the brightness of the image is left unchanged. If int, + a random int is uniformly sampled from (-brightness, brightness) + and added to the hue of the image. If tuple, the int is sampled + from the range specified by the tuple. + + Returns + ------- + + numpy.ndaaray + Transformed image in the numpy format of shape `HxWxC` + + numpy.ndarray + Resized bounding box co-ordinates of the format `n x 4` where n is + number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box + + """ + + def __init__(self, hue = None, saturation = None, brightness = None): + if hue: + self.hue = hue + else: + self.hue = 0 + + if saturation: + self.saturation = saturation + else: + self.saturation = 0 + + if brightness: + self.brightness = brightness + else: + self.brightness = 0 + + + + if type(self.hue) != tuple: + self.hue = (-self.hue, self.hue) + + if type(self.saturation) != tuple: + self.saturation = (-self.saturation, self.saturation) + + if type(brightness) != tuple: + self.brightness = (-self.brightness, self.brightness) + + def __call__(self, img, bboxes): + + hue = random.randint(*self.hue) + saturation = random.randint(*self.saturation) + brightness = random.randint(*self.brightness) + + img = img.astype(int) + + a = np.array([hue, saturation, brightness]).astype(int) + img += np.reshape(a, (1,1,3)) + + img = np.clip(img, 0, 255) + img[:,:,0] = np.clip(img[:,:,0],0, 179) + + img = img.astype(np.uint8) + + + + return img, bboxes + +class Sequence(object): + + """Initialise Sequence object + + Apply a Sequence of transformations to the images/boxes. + + Parameters + ---------- + augemnetations : list + List containing Transformation Objects in Sequence they are to be + applied + + probs : int or list + If **int**, the probability with which each of the transformation will + be applied. If **list**, the length must be equal to *augmentations*. + Each element of this list is the probability with which each + corresponding transformation is applied + + Returns + ------- + + Sequence + Sequence Object + + """ + def __init__(self, augmentations, probs = 1): + + + self.augmentations = augmentations + self.probs = probs + + def __call__(self, images, bboxes): + for i, augmentation in enumerate(self.augmentations): + if type(self.probs) == list: + prob = self.probs[i] + else: + prob = self.probs + + if random.random() < prob: + images, bboxes = augmentation(images, bboxes) + return images, bboxes diff --git a/model.py b/model.py new file mode 100644 index 0000000..8d00650 --- /dev/null +++ b/model.py @@ -0,0 +1,221 @@ +# %% +import os +import numpy as np +import torch +from PIL import Image +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +import json +import torch +from torchvision import transforms as T +import numpy as np +import os +import sys + +sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") +sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") +from references.detection import utils, engine +import data_aug +import bbox_util + + + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + +PATH_ROOT = r"D:\ishan\ml\inaturalist\\" + +device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + +def create_map(list_in, from_key, to_key): + cmap = dict() + for l in list_in: + cmap[l[from_key]] = l[to_key] + return cmap + + +class iNaturalistDataset(torch.utils.data.Dataset): + def __init__(self, validation=False, train=False, transforms = None): + + self.validation = validation + self.train = train + self.transforms = transforms + + + if validation: + json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json") + elif train: + json_path = os.path.join( + PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" + ) + + with open(json_path, "r") as rj: + f = json.load(rj) + + categories = list() + image_info = dict() + + for category in f["categories"]: + if category["supercategory"] == "Aves": + if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']: + print(category['name']) + categories.append(category) + + categories = sorted(categories, key=lambda k: k["name"]) + for idx, cat in enumerate(categories): + cat["new_id"] = idx + 1 + + orig_to_new_id = create_map(categories, "id", "new_id") + + for annot in f["annotations"]: + if annot["category_id"] in orig_to_new_id: + annot["new_category_id"] = orig_to_new_id[annot["category_id"]] + id = annot["image_id"] + if id not in image_info: + image_info[id] = dict() + + annot["bbox"][2] += annot["bbox"][0] + annot["bbox"][3] += annot["bbox"][1] + image_info[id]["annotation"] = annot + + for img in f["images"]: + id = img["id"] + path = os.path.join(PATH_ROOT, img["file_name"]) + height = img["height"] + width = img["width"] + if id in image_info: + image_info[id].update({"path": path, "height": height, "width": width}) + + for idx, (id, im_in) in enumerate(image_info.items()): + im_in["idx"] = idx + self.images = image_info + self.categories = categories + self.idx_to_id = [x for x in self.images] + self.num_classes = len(self.categories) + 1 + self.num_samples = len(self.images) + + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + idd = self.idx_to_id[idx] + c_image = self.images[idd] + img_path = c_image["path"] + img = Image.open(img_path).convert("RGB") + + annot = c_image["annotation"] + bbox = annot["bbox"] + boxes = bbox + target = dict() + target["boxes"] = torch.as_tensor([boxes]) + target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64) + target['image_id'] = torch.tensor([annot['image_id']]) + target['area'] = torch.as_tensor([annot['area']]) + target['iscrowd'] = torch.zeros((1,), dtype=torch.int64) + + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target +# %% +# v = iNaturalistDataset(validation=True) + + +# v = iNaturalistDataset(validation= True) +# o = v[10] +# %% +# oimage = t.tensor(o[0]*255, dtype=t.uint8) +# import matplotlib.pyplot as plt +# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1) +# plt.imshow(ox.permute([1,2,0])) +# plt.savefig('crap2.png') + +def get_model(num_classes): + model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) + num_classes = 2 # 1 class (person) + background + in_features = model.roi_heads.box_predictor.cls_score.in_features + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + return model + + +import transforms as T + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + +from engine import train_one_epoch, evaluate +import utils +# %% +def run(): + val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True)) + train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False)) + + + train_data_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn + ) + val_data_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn + ) + + import torchvision + from torchvision.models.detection.faster_rcnn import FastRCNNPredictor + num_classes = 2 + + + model = get_model(num_classes) + model.to(device) + # construct an optimizer + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=0.005, + momentum=0.9, weight_decay=0.0005) + # and a learning rate scheduler + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, + step_size=3, + gamma=0.1) + + # let's train it for 10 epochs + num_epochs = 10 + + for epoch in range(num_epochs): + print(epoch) + torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth') + # train for one epoch, printing every 10 iterations + engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10) + torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth') + # update the learning rate + lr_scheduler.step() + torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth') + # evaluate on the test dataset + engine.evaluate(model, val_data_loader, device=device) + + + +if __name__ == "__main__": + run() + + + +# # %% +# json_path = os.path.join( +# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" +# ) +# with open(json_path, "r") as rj: +# f = json.load(rj) + + +# # %% +# image_id: 2358 + diff --git a/old_files/README.md b/old_files/README.md new file mode 100644 index 0000000..ea5be6e --- /dev/null +++ b/old_files/README.md @@ -0,0 +1,82 @@ +# Object detection reference training scripts + +This folder contains reference training scripts for object detection. +They serve as a log of how to train specific models, to provide baseline +training and evaluation scripts to quickly bootstrap research. + +To execute the example commands below you must install the following: + +``` +cython +pycocotools +matplotlib +``` + +You must modify the following flags: + +`--data-path=/path/to/coco/dataset` + +`--nproc_per_node=` + +Except otherwise noted, all models have been trained on 8x V100 GPUs. + +### Faster R-CNN ResNet-50 FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### Faster R-CNN MobileNetV3-Large FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### Faster R-CNN MobileNetV3-Large 320 FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### RetinaNet +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model retinanet_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 +``` + +### SSD300 VGG16 +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model ssd300_vgg16 --epochs 120\ + --lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\ + --weight-decay 0.0005 --data-augmentation ssd +``` + +### SSDlite320 MobileNetV3-Large +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite +``` + + +### Mask R-CNN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + + +### Keypoint R-CNN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\ + --lr-steps 36 43 --aspect-ratio-group-factor 3 +``` + diff --git a/old_files/coco_eval.py b/old_files/coco_eval.py new file mode 100644 index 0000000..09648f2 --- /dev/null +++ b/old_files/coco_eval.py @@ -0,0 +1,352 @@ +import json +import tempfile + +import numpy as np +import copy +import time +import torch +import torch._six + +from pycocotools.cocoeval import COCOeval +from pycocotools.coco import COCO +import pycocotools.mask as mask_util + +from collections import defaultdict + +import utils + + +class CocoEvaluator(object): + def __init__(self, coco_gt, iou_types): + assert isinstance(iou_types, (list, tuple)) + coco_gt = copy.deepcopy(coco_gt) + self.coco_gt = coco_gt + + self.iou_types = iou_types + self.coco_eval = {} + for iou_type in iou_types: + self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) + + self.img_ids = [] + self.eval_imgs = {k: [] for k in iou_types} + + def update(self, predictions): + img_ids = list(np.unique(list(predictions.keys()))) + self.img_ids.extend(img_ids) + + for iou_type in self.iou_types: + results = self.prepare(predictions, iou_type) + coco_dt = loadRes(self.coco_gt, results) if results else COCO() + coco_eval = self.coco_eval[iou_type] + + coco_eval.cocoDt = coco_dt + coco_eval.params.imgIds = list(img_ids) + img_ids, eval_imgs = evaluate(coco_eval) + + self.eval_imgs[iou_type].append(eval_imgs) + + def synchronize_between_processes(self): + for iou_type in self.iou_types: + self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) + create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) + + def accumulate(self): + for coco_eval in self.coco_eval.values(): + coco_eval.accumulate() + + def summarize(self): + for iou_type, coco_eval in self.coco_eval.items(): + print("IoU metric: {}".format(iou_type)) + coco_eval.summarize() + + def prepare(self, predictions, iou_type): + if iou_type == "bbox": + return self.prepare_for_coco_detection(predictions) + elif iou_type == "segm": + return self.prepare_for_coco_segmentation(predictions) + elif iou_type == "keypoints": + return self.prepare_for_coco_keypoint(predictions) + else: + raise ValueError("Unknown iou type {}".format(iou_type)) + + def prepare_for_coco_detection(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "bbox": box, + "score": scores[k], + } + for k, box in enumerate(boxes) + ] + ) + return coco_results + + def prepare_for_coco_segmentation(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + scores = prediction["scores"] + labels = prediction["labels"] + masks = prediction["masks"] + + masks = masks > 0.5 + + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "segmentation": rle, + "score": scores[k], + } + for k, rle in enumerate(rles) + ] + ) + return coco_results + + def prepare_for_coco_keypoint(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + keypoints = prediction["keypoints"] + keypoints = keypoints.flatten(start_dim=1).tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + 'keypoints': keypoint, + "score": scores[k], + } + for k, keypoint in enumerate(keypoints) + ] + ) + return coco_results + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + + +def merge(img_ids, eval_imgs): + all_img_ids = utils.all_gather(img_ids) + all_eval_imgs = utils.all_gather(eval_imgs) + + merged_img_ids = [] + for p in all_img_ids: + merged_img_ids.extend(p) + + merged_eval_imgs = [] + for p in all_eval_imgs: + merged_eval_imgs.append(p) + + merged_img_ids = np.array(merged_img_ids) + merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) + + # keep only unique (and in sorted order) images + merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) + merged_eval_imgs = merged_eval_imgs[..., idx] + + return merged_img_ids, merged_eval_imgs + + +def create_common_coco_eval(coco_eval, img_ids, eval_imgs): + img_ids, eval_imgs = merge(img_ids, eval_imgs) + img_ids = list(img_ids) + eval_imgs = list(eval_imgs.flatten()) + + coco_eval.evalImgs = eval_imgs + coco_eval.params.imgIds = img_ids + coco_eval._paramsEval = copy.deepcopy(coco_eval.params) + + +################################################################# +# From pycocotools, just removed the prints and fixed +# a Python3 bug about unicode not defined +################################################################# + +# Ideally, pycocotools wouldn't have hard-coded prints +# so that we could avoid copy-pasting those two functions + +def createIndex(self): + # create index + # print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns, catToImgs = defaultdict(list), defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + # print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + +maskUtils = mask_util + + +def loadRes(self, resFile): + """ + Load result file and return a result api object. + Args: + self (obj): coco object with ground truth annotations + resFile (str): file name of result file + Returns: + res (obj): result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + # print('Loading and preparing results...') + # tic = time.time() + if isinstance(resFile, torch._six.string_classes): + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id + 1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + if 'segmentation' not in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2] * bb[3] + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if 'bbox' not in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x2 - x1) * (y2 - y1) + ann['id'] = id + 1 + ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] + # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + createIndex(res) + return res + + +def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + # tic = time.time() + # print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + # print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + evalImgs = [ + evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + # this is NOT in the pycocotools code, but could be done outside + evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) + self._paramsEval = copy.deepcopy(self.params) + # toc = time.time() + # print('DONE (t={:0.2f}s).'.format(toc-tic)) + return p.imgIds, evalImgs + +################################################################# +# end of straight copy from pycocotools, just removing the prints +################################################################# diff --git a/old_files/coco_utils.py b/old_files/coco_utils.py new file mode 100644 index 0000000..26701a2 --- /dev/null +++ b/old_files/coco_utils.py @@ -0,0 +1,252 @@ +import copy +import os +from PIL import Image + +import torch +import torch.utils.data +import torchvision + +from pycocotools import mask as coco_mask +from pycocotools.coco import COCO + +import transforms as T + + +class FilterAndRemapCocoCategories(object): + def __init__(self, categories, remap=True): + self.categories = categories + self.remap = remap + + def __call__(self, image, target): + anno = target["annotations"] + anno = [obj for obj in anno if obj["category_id"] in self.categories] + if not self.remap: + target["annotations"] = anno + return image, target + anno = copy.deepcopy(anno) + for obj in anno: + obj["category_id"] = self.categories.index(obj["category_id"]) + target["annotations"] = anno + return image, target + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +class ConvertCocoPolysToMask(object): + def __call__(self, image, target): + w, h = image.size + + image_id = target["image_id"] + image_id = torch.tensor([image_id]) + + anno = target["annotations"] + + anno = [obj for obj in anno if obj['iscrowd'] == 0] + + boxes = [obj["bbox"] for obj in anno] + # guard against no boxes via resizing + boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) + boxes[:, 2:] += boxes[:, :2] + boxes[:, 0::2].clamp_(min=0, max=w) + boxes[:, 1::2].clamp_(min=0, max=h) + + classes = [obj["category_id"] for obj in anno] + classes = torch.tensor(classes, dtype=torch.int64) + + segmentations = [obj["segmentation"] for obj in anno] + masks = convert_coco_poly_to_mask(segmentations, h, w) + + keypoints = None + if anno and "keypoints" in anno[0]: + keypoints = [obj["keypoints"] for obj in anno] + keypoints = torch.as_tensor(keypoints, dtype=torch.float32) + num_keypoints = keypoints.shape[0] + if num_keypoints: + keypoints = keypoints.view(num_keypoints, -1, 3) + + keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + + target = {} + target["boxes"] = boxes + target["labels"] = classes + target["masks"] = masks + target["image_id"] = image_id + if keypoints is not None: + target["keypoints"] = keypoints + + # for conversion to coco api + area = torch.tensor([obj["area"] for obj in anno]) + iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) + target["area"] = area + target["iscrowd"] = iscrowd + + return image, target + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_only_empty_bbox(anno): + return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) + + def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + min_keypoints_per_image = 10 + + def _has_valid_annotation(anno): + # if it's empty, there is no annotation + if len(anno) == 0: + return False + # if all boxes have close to zero area, there is no annotation + if _has_only_empty_bbox(anno): + return False + # keypoints task have a slight different critera for considering + # if an annotation is valid + if "keypoints" not in anno[0]: + return True + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) >= min_keypoints_per_image: + return True + return False + + assert isinstance(dataset, torchvision.datasets.CocoDetection) + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset + + +def convert_to_coco_api(ds): + coco_ds = COCO() + # annotation IDs need to start at 1, not 0, see torchvision issue #1530 + ann_id = 1 + dataset = {'images': [], 'categories': [], 'annotations': []} + categories = set() + for img_idx in range(len(ds)): + # find better way to get target + # targets = ds.get_annotations(img_idx) + img, targets = ds[img_idx] + image_id = targets["image_id"].item() + img_dict = {} + img_dict['id'] = image_id + img_dict['height'] = img.shape[-2] + img_dict['width'] = img.shape[-1] + dataset['images'].append(img_dict) + bboxes = targets["boxes"] + bboxes[:, 2:] -= bboxes[:, :2] + bboxes = bboxes.tolist() + labels = targets['labels'].tolist() + areas = targets['area'].tolist() + iscrowd = targets['iscrowd'].tolist() + if 'masks' in targets: + masks = targets['masks'] + # make masks Fortran contiguous for coco_mask + masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) + if 'keypoints' in targets: + keypoints = targets['keypoints'] + keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() + num_objs = len(bboxes) + for i in range(num_objs): + ann = {} + ann['image_id'] = image_id + ann['bbox'] = bboxes[i] + ann['category_id'] = labels[i] + categories.add(labels[i]) + ann['area'] = areas[i] + ann['iscrowd'] = iscrowd[i] + ann['id'] = ann_id + if 'masks' in targets: + ann["segmentation"] = coco_mask.encode(masks[i].numpy()) + if 'keypoints' in targets: + ann['keypoints'] = keypoints[i] + ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) + dataset['annotations'].append(ann) + ann_id += 1 + dataset['categories'] = [{'id': i} for i in sorted(categories)] + coco_ds.dataset = dataset + coco_ds.createIndex() + return coco_ds + + +def get_coco_api_from_dataset(dataset): + for _ in range(10): + if isinstance(dataset, torchvision.datasets.CocoDetection): + break + if isinstance(dataset, torch.utils.data.Subset): + dataset = dataset.dataset + if isinstance(dataset, torchvision.datasets.CocoDetection): + return dataset.coco + return convert_to_coco_api(dataset) + + +class CocoDetection(torchvision.datasets.CocoDetection): + def __init__(self, img_folder, ann_file, transforms): + super(CocoDetection, self).__init__(img_folder, ann_file) + self._transforms = transforms + + def __getitem__(self, idx): + img, target = super(CocoDetection, self).__getitem__(idx) + image_id = self.ids[idx] + target = dict(image_id=image_id, annotations=target) + if self._transforms is not None: + img, target = self._transforms(img, target) + return img, target + + +def get_coco(root, image_set, transforms, mode='instances'): + anno_file_template = "{}_{}2017.json" + PATHS = { + "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), + "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), + # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) + } + + t = [ConvertCocoPolysToMask()] + + if transforms is not None: + t.append(transforms) + transforms = T.Compose(t) + + img_folder, ann_file = PATHS[image_set] + img_folder = os.path.join(root, img_folder) + ann_file = os.path.join(root, ann_file) + + dataset = CocoDetection(img_folder, ann_file, transforms=transforms) + + if image_set == "train": + dataset = _coco_remove_images_without_annotations(dataset) + + # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) + + return dataset + + +def get_coco_kp(root, image_set, transforms): + return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/old_files/crap.png b/old_files/crap.png new file mode 100644 index 0000000..fe3667a Binary files /dev/null and b/old_files/crap.png differ diff --git a/old_files/crap2.png b/old_files/crap2.png new file mode 100644 index 0000000..fe3667a Binary files /dev/null and b/old_files/crap2.png differ diff --git a/old_files/data.py b/old_files/data.py new file mode 100644 index 0000000..8d00650 --- /dev/null +++ b/old_files/data.py @@ -0,0 +1,221 @@ +# %% +import os +import numpy as np +import torch +from PIL import Image +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +import json +import torch +from torchvision import transforms as T +import numpy as np +import os +import sys + +sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") +sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") +from references.detection import utils, engine +import data_aug +import bbox_util + + + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + +PATH_ROOT = r"D:\ishan\ml\inaturalist\\" + +device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + +def create_map(list_in, from_key, to_key): + cmap = dict() + for l in list_in: + cmap[l[from_key]] = l[to_key] + return cmap + + +class iNaturalistDataset(torch.utils.data.Dataset): + def __init__(self, validation=False, train=False, transforms = None): + + self.validation = validation + self.train = train + self.transforms = transforms + + + if validation: + json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json") + elif train: + json_path = os.path.join( + PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" + ) + + with open(json_path, "r") as rj: + f = json.load(rj) + + categories = list() + image_info = dict() + + for category in f["categories"]: + if category["supercategory"] == "Aves": + if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']: + print(category['name']) + categories.append(category) + + categories = sorted(categories, key=lambda k: k["name"]) + for idx, cat in enumerate(categories): + cat["new_id"] = idx + 1 + + orig_to_new_id = create_map(categories, "id", "new_id") + + for annot in f["annotations"]: + if annot["category_id"] in orig_to_new_id: + annot["new_category_id"] = orig_to_new_id[annot["category_id"]] + id = annot["image_id"] + if id not in image_info: + image_info[id] = dict() + + annot["bbox"][2] += annot["bbox"][0] + annot["bbox"][3] += annot["bbox"][1] + image_info[id]["annotation"] = annot + + for img in f["images"]: + id = img["id"] + path = os.path.join(PATH_ROOT, img["file_name"]) + height = img["height"] + width = img["width"] + if id in image_info: + image_info[id].update({"path": path, "height": height, "width": width}) + + for idx, (id, im_in) in enumerate(image_info.items()): + im_in["idx"] = idx + self.images = image_info + self.categories = categories + self.idx_to_id = [x for x in self.images] + self.num_classes = len(self.categories) + 1 + self.num_samples = len(self.images) + + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + idd = self.idx_to_id[idx] + c_image = self.images[idd] + img_path = c_image["path"] + img = Image.open(img_path).convert("RGB") + + annot = c_image["annotation"] + bbox = annot["bbox"] + boxes = bbox + target = dict() + target["boxes"] = torch.as_tensor([boxes]) + target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64) + target['image_id'] = torch.tensor([annot['image_id']]) + target['area'] = torch.as_tensor([annot['area']]) + target['iscrowd'] = torch.zeros((1,), dtype=torch.int64) + + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target +# %% +# v = iNaturalistDataset(validation=True) + + +# v = iNaturalistDataset(validation= True) +# o = v[10] +# %% +# oimage = t.tensor(o[0]*255, dtype=t.uint8) +# import matplotlib.pyplot as plt +# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1) +# plt.imshow(ox.permute([1,2,0])) +# plt.savefig('crap2.png') + +def get_model(num_classes): + model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) + num_classes = 2 # 1 class (person) + background + in_features = model.roi_heads.box_predictor.cls_score.in_features + model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + return model + + +import transforms as T + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + +from engine import train_one_epoch, evaluate +import utils +# %% +def run(): + val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True)) + train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False)) + + + train_data_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn + ) + val_data_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn + ) + + import torchvision + from torchvision.models.detection.faster_rcnn import FastRCNNPredictor + num_classes = 2 + + + model = get_model(num_classes) + model.to(device) + # construct an optimizer + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=0.005, + momentum=0.9, weight_decay=0.0005) + # and a learning rate scheduler + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, + step_size=3, + gamma=0.1) + + # let's train it for 10 epochs + num_epochs = 10 + + for epoch in range(num_epochs): + print(epoch) + torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth') + # train for one epoch, printing every 10 iterations + engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10) + torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth') + # update the learning rate + lr_scheduler.step() + torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth') + # evaluate on the test dataset + engine.evaluate(model, val_data_loader, device=device) + + + +if __name__ == "__main__": + run() + + + +# # %% +# json_path = os.path.join( +# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" +# ) +# with open(json_path, "r") as rj: +# f = json.load(rj) + + +# # %% +# image_id: 2358 + diff --git a/old_files/engine.py b/old_files/engine.py new file mode 100644 index 0000000..49992af --- /dev/null +++ b/old_files/engine.py @@ -0,0 +1,110 @@ +import math +import sys +import time +import torch + +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco_api_from_dataset +from coco_eval import CocoEvaluator +import utils + + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + + lr_scheduler = None + if epoch == 0: + warmup_factor = 1. / 1000 + warmup_iters = min(1000, len(data_loader) - 1) + + lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) + + for images, targets in metric_logger.log_every(data_loader, print_freq, header): + images = list(image.to(device) for image in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + loss_dict = model(images, targets) + + losses = sum(loss for loss in loss_dict.values()) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + + loss_value = losses_reduced.item() + + if not math.isfinite(loss_value): + print("Loss is {}, stopping training".format(loss_value)) + print(loss_dict_reduced) + sys.exit(1) + + optimizer.zero_grad() + losses.backward() + optimizer.step() + + if lr_scheduler is not None: + lr_scheduler.step() + + metric_logger.update(loss=losses_reduced, **loss_dict_reduced) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + + return metric_logger + + +def _get_iou_types(model): + model_without_ddp = model + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model_without_ddp = model.module + iou_types = ["bbox"] + if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): + iou_types.append("segm") + if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): + iou_types.append("keypoints") + return iou_types + + +@torch.no_grad() +def evaluate(model, data_loader, device): + n_threads = torch.get_num_threads() + # FIXME remove this and make paste_masks_in_image run on the GPU + torch.set_num_threads(1) + cpu_device = torch.device("cpu") + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + coco = get_coco_api_from_dataset(data_loader.dataset) + iou_types = _get_iou_types(model) + coco_evaluator = CocoEvaluator(coco, iou_types) + + for images, targets in metric_logger.log_every(data_loader, 100, header): + images = list(img.to(device) for img in images) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + model_time = time.time() + outputs = model(images) + + outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] + model_time = time.time() - model_time + + res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} + evaluator_time = time.time() + coco_evaluator.update(res) + evaluator_time = time.time() - evaluator_time + metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + coco_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + coco_evaluator.accumulate() + coco_evaluator.summarize() + torch.set_num_threads(n_threads) + return coco_evaluator diff --git a/old_files/fine_tuned.py b/old_files/fine_tuned.py new file mode 100644 index 0000000..1a57455 --- /dev/null +++ b/old_files/fine_tuned.py @@ -0,0 +1,28 @@ + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor + + +model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) +num_classes = 1 # 1 class (person) + background +in_features = model.roi_heads.box_predictor.cls_score.in_features +model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) + +import torchvision +from torchvision.models.detection import FasterRCNN +from torchvision.models.detection.rpn import AnchorGenerator +backbone = torchvision.models.mobilenet_v2(pretrained=True).features +backbone.out_channels = list(backbone.modules())[-3].out_channels + +anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), + aspect_ratios=((0.5, 1.0, 2.0),)) + +roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + output_size=7, + sampling_ratio=2) + +model = FasterRCNN(backbone, + num_classes=2, + rpn_anchor_generator=anchor_generator, + box_roi_pool=roi_pooler) +# %% \ No newline at end of file diff --git a/group_by_aspect_ratio.py b/old_files/group_by_aspect_ratio.py similarity index 100% rename from group_by_aspect_ratio.py rename to old_files/group_by_aspect_ratio.py diff --git a/presets.py b/old_files/presets.py similarity index 100% rename from presets.py rename to old_files/presets.py diff --git a/test.py b/old_files/test.py similarity index 75% rename from test.py rename to old_files/test.py index fce8894..c367069 100644 --- a/test.py +++ b/old_files/test.py @@ -15,19 +15,19 @@ sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") -model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 3) +model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2) -model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_1.pth')) +model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_9.pth')) model.eval() model.to('cuda') -#img = r'D:\ishan\ml\inaturalist\test2017\00a903fa1d23b2f8f28248e81bc1c4a4.jpg' -#img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210617093423.mp4_023.jpg' -img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210609095848.mp4_133.jpg' + import random rtdir = r'J:\hummingbird_imagenet\hummingbird' ff = os.listdir(rtdir) + +# %% img = os.path.join(rtdir,random.choice(ff)) image = cv2.imread(img)[:,:,::-1].copy() o = T.ToTensor()(image).cuda() @@ -43,7 +43,6 @@ plt.imshow(ox.permute([1,2,0])) # %% from data import iNaturalistDataset sd = iNaturalistDataset(validation=True) -# # %% diff --git a/old_files/train.py b/old_files/train.py new file mode 100644 index 0000000..cd4148e --- /dev/null +++ b/old_files/train.py @@ -0,0 +1,233 @@ +r"""PyTorch Detection Training. + +To run in a multi-gpu environment, use the distributed launcher:: + + python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ + train.py ... --world-size $NGPU + +The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. + --lr 0.02 --batch-size 2 --world-size 8 +If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. + +On top of that, for training Faster/Mask R-CNN, the default hyperparameters are + --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 + +Also, if you train Keypoint R-CNN, the default hyperparameters are + --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 +Because the number of images is smaller in the person keypoint subset of COCO, +the number of epochs should be adapted so that we have the same number of iterations. +""" +import datetime +import os +import time + +import torch +import torch.utils.data +import torchvision +import torchvision.models.detection +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco, get_coco_kp + +from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups +from engine import train_one_epoch, evaluate + +import presets +import utils + + +def get_dataset(name, image_set, transform, data_path): + paths = { + "coco": (data_path, get_coco, 91), + "coco_kp": (data_path, get_coco_kp, 2) + } + p, ds_fn, num_classes = paths[name] + + ds = ds_fn(p, image_set=image_set, transforms=transform) + return ds, num_classes + + +def get_transform(train, data_augmentation): + return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval() + + +def get_args_parser(add_help=True): + import argparse + parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help) + + parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset') + parser.add_argument('--dataset', default='coco', help='dataset') + parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model') + parser.add_argument('--device', default='cuda', help='device') + parser.add_argument('-b', '--batch-size', default=2, type=int, + help='images per gpu, the total batch size is $NGPU x batch_size') + parser.add_argument('--epochs', default=26, type=int, metavar='N', + help='number of total epochs to run') + parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') + parser.add_argument('--lr', default=0.02, type=float, + help='initial learning rate, 0.02 is the default value for training ' + 'on 8 gpus and 2 images_per_gpu') + parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') + parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') + parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)') + parser.add_argument('--lr-step-size', default=8, type=int, + help='decrease lr every step-size epochs (multisteplr scheduler only)') + parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, + help='decrease lr every step-size epochs (multisteplr scheduler only)') + parser.add_argument('--lr-gamma', default=0.1, type=float, + help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)') + parser.add_argument('--print-freq', default=20, type=int, help='print frequency') + parser.add_argument('--output-dir', default='.', help='path where to save') + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') + parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) + parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn') + parser.add_argument('--trainable-backbone-layers', default=None, type=int, + help='number of trainable layers of backbone') + parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)') + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--pretrained", + dest="pretrained", + help="Use pre-trained models from the modelzoo", + action="store_true", + ) + + # distributed training parameters + parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + + return parser + + +def main(args): + if args.output_dir: + utils.mkdir(args.output_dir) + + utils.init_distributed_mode(args) + print(args) + + device = torch.device(args.device) + + # Data loading code + print("Loading data") + + dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation), + args.data_path) + dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path) + + print("Creating data loaders") + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + else: + train_sampler = torch.utils.data.RandomSampler(dataset) + test_sampler = torch.utils.data.SequentialSampler(dataset_test) + + if args.aspect_ratio_group_factor >= 0: + group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) + train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) + else: + train_batch_sampler = torch.utils.data.BatchSampler( + train_sampler, args.batch_size, drop_last=True) + + data_loader = torch.utils.data.DataLoader( + dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, + collate_fn=utils.collate_fn) + + data_loader_test = torch.utils.data.DataLoader( + dataset_test, batch_size=1, + sampler=test_sampler, num_workers=args.workers, + collate_fn=utils.collate_fn) + + print("Creating model") + kwargs = { + "trainable_backbone_layers": args.trainable_backbone_layers + } + if "rcnn" in args.model: + if args.rpn_score_thresh is not None: + kwargs["rpn_score_thresh"] = args.rpn_score_thresh + model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained, + **kwargs) + model.to(device) + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model_without_ddp = model + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + model_without_ddp = model.module + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD( + params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + args.lr_scheduler = args.lr_scheduler.lower() + if args.lr_scheduler == 'multisteplr': + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) + elif args.lr_scheduler == 'cosineannealinglr': + lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + else: + raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR " + "are supported.".format(args.lr_scheduler)) + + if args.resume: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + + if args.test_only: + evaluate(model, data_loader_test, device=device) + return + + print("Start training") + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + lr_scheduler.step() + if args.output_dir: + checkpoint = { + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'args': args, + 'epoch': epoch + } + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'checkpoint.pth')) + + # evaluate after every epoch + evaluate(model, data_loader_test, device=device) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/old_files/transforms.py b/old_files/transforms.py new file mode 100644 index 0000000..8e4b887 --- /dev/null +++ b/old_files/transforms.py @@ -0,0 +1,239 @@ +import torch +import torchvision + +from torch import nn, Tensor +from torchvision.transforms import functional as F +from torchvision.transforms import transforms as T +from typing import List, Tuple, Dict, Optional + + +def _flip_coco_person_keypoints(kps, width): + flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + flipped_data = kps[:, flip_inds] + flipped_data[..., 0] = width - flipped_data[..., 0] + # Maintain COCO convention that if visibility == 0, then x, y = 0 + inds = flipped_data[..., 2] == 0 + flipped_data[inds] = 0 + return flipped_data + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + return image, target + + +class RandomHorizontalFlip(T.RandomHorizontalFlip): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torch.rand(1) < self.p: + image = F.hflip(image) + if target is not None: + width, _ = F._get_image_size(image) + target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] + if "masks" in target: + target["masks"] = target["masks"].flip(-1) + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = _flip_coco_person_keypoints(keypoints, width) + target["keypoints"] = keypoints + return image, target + + +class ToTensor(nn.Module): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.to_tensor(image) + return image, target + + +class RandomIoUCrop(nn.Module): + def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40): + super().__init__() + # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + if sampler_options is None: + sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] + self.options = sampler_options + self.trials = trials + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if target is None: + raise ValueError("The targets can't be None for this transform.") + + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + orig_w, orig_h = F._get_image_size(image) + + while True: + # sample an option + idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) + min_jaccard_overlap = self.options[idx] + if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option + return image, target + + for _ in range(self.trials): + # check the aspect ratio limitations + r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) + new_w = int(orig_w * r[0]) + new_h = int(orig_h * r[1]) + aspect_ratio = new_w / new_h + if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): + continue + + # check for 0 area crops + r = torch.rand(2) + left = int((orig_w - new_w) * r[0]) + top = int((orig_h - new_h) * r[1]) + right = left + new_w + bottom = top + new_h + if left == right or top == bottom: + continue + + # check for any valid boxes with centers within the crop area + cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) + cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) + is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) + if not is_within_crop_area.any(): + continue + + # check at least 1 box with jaccard limitations + boxes = target["boxes"][is_within_crop_area] + ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]], + dtype=boxes.dtype, device=boxes.device)) + if ious.max() < min_jaccard_overlap: + continue + + # keep only valid boxes and perform cropping + target["boxes"] = boxes + target["labels"] = target["labels"][is_within_crop_area] + target["boxes"][:, 0::2] -= left + target["boxes"][:, 1::2] -= top + target["boxes"][:, 0::2].clamp_(min=0, max=new_w) + target["boxes"][:, 1::2].clamp_(min=0, max=new_h) + image = F.crop(image, top, left, new_h, new_w) + + return image, target + + +class RandomZoomOut(nn.Module): + def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5): + super().__init__() + if fill is None: + fill = [0., 0., 0.] + self.fill = fill + self.side_range = side_range + if side_range[0] < 1. or side_range[0] > side_range[1]: + raise ValueError("Invalid canvas side range provided {}.".format(side_range)) + self.p = p + + @torch.jit.unused + def _get_fill_value(self, is_pil): + # type: (bool) -> int + # We fake the type to make it work on JIT + return tuple(int(x) for x in self.fill) if is_pil else 0 + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + if torch.rand(1) < self.p: + return image, target + + orig_w, orig_h = F._get_image_size(image) + + r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) + canvas_width = int(orig_w * r) + canvas_height = int(orig_h * r) + + r = torch.rand(2) + left = int((canvas_width - orig_w) * r[0]) + top = int((canvas_height - orig_h) * r[1]) + right = canvas_width - (left + orig_w) + bottom = canvas_height - (top + orig_h) + + if torch.jit.is_scripting(): + fill = 0 + else: + fill = self._get_fill_value(F._is_pil_image(image)) + + image = F.pad(image, [left, top, right, bottom], fill=fill) + if isinstance(image, torch.Tensor): + v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) + image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \ + image[..., :, (left + orig_w):] = v + + if target is not None: + target["boxes"][:, 0::2] += left + target["boxes"][:, 1::2] += top + + return image, target + + +class RandomPhotometricDistort(nn.Module): + def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5), + hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5): + super().__init__() + self._brightness = T.ColorJitter(brightness=brightness) + self._contrast = T.ColorJitter(contrast=contrast) + self._hue = T.ColorJitter(hue=hue) + self._saturation = T.ColorJitter(saturation=saturation) + self.p = p + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + r = torch.rand(7) + + if r[0] < self.p: + image = self._brightness(image) + + contrast_before = r[1] < 0.5 + if contrast_before: + if r[2] < self.p: + image = self._contrast(image) + + if r[3] < self.p: + image = self._saturation(image) + + if r[4] < self.p: + image = self._hue(image) + + if not contrast_before: + if r[5] < self.p: + image = self._contrast(image) + + if r[6] < self.p: + channels = F._get_image_num_channels(image) + permutation = torch.randperm(channels) + + is_pil = F._is_pil_image(image) + if is_pil: + image = F.to_tensor(image) + image = image[..., permutation, :, :] + if is_pil: + image = F.to_pil_image(image) + + return image, target diff --git a/old_files/utils.py b/old_files/utils.py new file mode 100644 index 0000000..3c52abb --- /dev/null +++ b/old_files/utils.py @@ -0,0 +1,295 @@ +from collections import defaultdict, deque +import datetime +import errno +import os +import time + +import torch +import torch.distributed as dist + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + data_list = [None] * world_size + dist.all_gather_object(data_list, data) + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.cuda.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): + + def f(x): + if x >= warmup_iters: + return 1 + alpha = float(x) / warmup_iters + return warmup_factor * (1 - alpha) + alpha + + return torch.optim.lr_scheduler.LambdaLR(optimizer, f) + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) diff --git a/train.py b/train.py index cd4148e..e69de29 100644 --- a/train.py +++ b/train.py @@ -1,233 +0,0 @@ -r"""PyTorch Detection Training. - -To run in a multi-gpu environment, use the distributed launcher:: - - python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ - train.py ... --world-size $NGPU - -The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. - --lr 0.02 --batch-size 2 --world-size 8 -If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. - -On top of that, for training Faster/Mask R-CNN, the default hyperparameters are - --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 - -Also, if you train Keypoint R-CNN, the default hyperparameters are - --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 -Because the number of images is smaller in the person keypoint subset of COCO, -the number of epochs should be adapted so that we have the same number of iterations. -""" -import datetime -import os -import time - -import torch -import torch.utils.data -import torchvision -import torchvision.models.detection -import torchvision.models.detection.mask_rcnn - -from coco_utils import get_coco, get_coco_kp - -from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups -from engine import train_one_epoch, evaluate - -import presets -import utils - - -def get_dataset(name, image_set, transform, data_path): - paths = { - "coco": (data_path, get_coco, 91), - "coco_kp": (data_path, get_coco_kp, 2) - } - p, ds_fn, num_classes = paths[name] - - ds = ds_fn(p, image_set=image_set, transforms=transform) - return ds, num_classes - - -def get_transform(train, data_augmentation): - return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval() - - -def get_args_parser(add_help=True): - import argparse - parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help) - - parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset') - parser.add_argument('--dataset', default='coco', help='dataset') - parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=2, type=int, - help='images per gpu, the total batch size is $NGPU x batch_size') - parser.add_argument('--epochs', default=26, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') - parser.add_argument('--lr', default=0.02, type=float, - help='initial learning rate, 0.02 is the default value for training ' - 'on 8 gpus and 2 images_per_gpu') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)') - parser.add_argument('--lr-step-size', default=8, type=int, - help='decrease lr every step-size epochs (multisteplr scheduler only)') - parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, - help='decrease lr every step-size epochs (multisteplr scheduler only)') - parser.add_argument('--lr-gamma', default=0.1, type=float, - help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)') - parser.add_argument('--print-freq', default=20, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') - parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) - parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn') - parser.add_argument('--trainable-backbone-layers', default=None, type=int, - help='number of trainable layers of backbone') - parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)') - parser.add_argument( - "--sync-bn", - dest="sync_bn", - help="Use sync batch norm", - action="store_true", - ) - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", - ) - - # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') - - return parser - - -def main(args): - if args.output_dir: - utils.mkdir(args.output_dir) - - utils.init_distributed_mode(args) - print(args) - - device = torch.device(args.device) - - # Data loading code - print("Loading data") - - dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation), - args.data_path) - dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path) - - print("Creating data loaders") - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) - else: - train_sampler = torch.utils.data.RandomSampler(dataset) - test_sampler = torch.utils.data.SequentialSampler(dataset_test) - - if args.aspect_ratio_group_factor >= 0: - group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) - train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) - else: - train_batch_sampler = torch.utils.data.BatchSampler( - train_sampler, args.batch_size, drop_last=True) - - data_loader = torch.utils.data.DataLoader( - dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn) - - data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=1, - sampler=test_sampler, num_workers=args.workers, - collate_fn=utils.collate_fn) - - print("Creating model") - kwargs = { - "trainable_backbone_layers": args.trainable_backbone_layers - } - if "rcnn" in args.model: - if args.rpn_score_thresh is not None: - kwargs["rpn_score_thresh"] = args.rpn_score_thresh - model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained, - **kwargs) - model.to(device) - if args.distributed and args.sync_bn: - model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - - model_without_ddp = model - if args.distributed: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) - model_without_ddp = model.module - - params = [p for p in model.parameters() if p.requires_grad] - optimizer = torch.optim.SGD( - params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - args.lr_scheduler = args.lr_scheduler.lower() - if args.lr_scheduler == 'multisteplr': - lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) - elif args.lr_scheduler == 'cosineannealinglr': - lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) - else: - raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR " - "are supported.".format(args.lr_scheduler)) - - if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 - - if args.test_only: - evaluate(model, data_loader_test, device=device) - return - - print("Start training") - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) - lr_scheduler.step() - if args.output_dir: - checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'args': args, - 'epoch': epoch - } - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) - - # evaluate after every epoch - evaluate(model, data_loader_test, device=device) - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - -if __name__ == "__main__": - args = get_args_parser().parse_args() - main(args) diff --git a/vision b/vision new file mode 160000 index 0000000..a83b9a1 --- /dev/null +++ b/vision @@ -0,0 +1 @@ +Subproject commit a83b9a17e441e6d77e9d59ca14d75fb5ba8c31f6