yacwc

2021-07-01 15:22:44 -04:00
parent b0a8d52855
commit 83195da92c
30 changed files with 3253 additions and 283 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 *.pth
 example
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
 {
    "python.formatting.provider": "black"
 }
--- a/pycache/coco_eval.cpython-39.pyc
+++ b/pycache/coco_eval.cpython-39.pyc
--- a/pycache/coco_utils.cpython-39.pyc
+++ b/pycache/coco_utils.cpython-39.pyc
--- a/pycache/data.cpython-39.pyc
+++ b/pycache/data.cpython-39.pyc
--- a/pycache/engine.cpython-39.pyc
+++ b/pycache/engine.cpython-39.pyc
--- a/pycache/transforms.cpython-39.pyc
+++ b/pycache/transforms.cpython-39.pyc
--- a/pycache/utils.cpython-39.pyc
+++ b/pycache/utils.cpython-39.pyc
--- a/data.py
+++ b/data.py
@@ -1,18 +1,19 @@
 # %%
 import os
 import numpy as np
 import torch
 from PIL import Image
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-from collections import defaultdict as ddict
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 import json
 import torch
-from torchvision import datasets, transforms as T
+from torchvision import transforms as T
 import cv2
 import numpy as np
 import os
 import sys
 sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
 sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
 from references.detection import utils, engine
@@ -28,9 +29,6 @@ def get_transform(train):
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 IMAGE_MEAN = [0.485, 0.456, 0.406]
 IMAGE_STD = [0.229, 0.224, 0.225]
 PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
@@ -43,12 +41,12 @@ def create_map(list_in, from_key, to_key):
 class iNaturalistDataset(torch.utils.data.Dataset):
-    def __init__(self, validation=False, train=False):
+    def __init__(self, validation=False, train=False, transforms = None):
        self.validation = validation
        self.train = train
        self.transforms = transforms
        self.transforms = T.Compose([T.Resize(600, max_size=1024), T.ToTensor()])
        if validation:
            json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
@@ -65,7 +63,7 @@ class iNaturalistDataset(torch.utils.data.Dataset):
        for category in f["categories"]:
            if category["supercategory"] == "Aves":
-                if category['name'] in ['Archilochus colubris','Icterus galbula']:
+                if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
                    print(category['name'])
                    categories.append(category)
@@ -101,44 +99,35 @@ class iNaturalistDataset(torch.utils.data.Dataset):
        self.idx_to_id = [x for x in self.images]
        self.num_classes = len(self.categories) + 1
        self.num_samples = len(self.images)
-        self.transforms = [
+        
            data_aug.RandomHorizontalFlip(0.5),
            data_aug.Resize(600),
        ]
        self.pre_transform = T.Compose([T.ToTensor()])#],T.Normalize(mean=[0.485, 0.456, 0.406],
                             #std=[0.229, 0.224, 0.225])])
    def __len__(self):
        return self.num_samples
    def transform(self, img, bbox):
        for x in self.transforms:
            img, bbox = x(img, bbox)
        img = self.pre_transform(img)
        return img, bbox
    def __getitem__(self, idx):
        idd = self.idx_to_id[idx]
        c_image = self.images[idd]
-        # print(c_image, idx, self.validation, self.train)
+        img_path = c_image["path"]
-        # breakpoint()
+        img = Image.open(img_path).convert("RGB")
-        image = np.asarray(cv2.imread(c_image["path"])[:,:,::-1].copy(),dtype=np.float32)
+    
        annot = c_image["annotation"]
        bbox = annot["bbox"]
-        bbox.append(annot["new_category_id"])
+        boxes = bbox
        bbox = np.asarray([bbox], dtype=np.float32)
        image, bbox = self.transform(image.copy(), bbox.copy())
        boxes = torch.as_tensor(bbox[:,:4], dtype=torch.float32)
        target = dict()
-        target["boxes"] = boxes
+        target["boxes"] = torch.as_tensor([boxes])
        target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
        target['image_id'] = torch.tensor([annot['image_id']])
-        target['area'] =  (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        target['area'] =  torch.as_tensor([annot['area']])
        target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
-        return image, target
+        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
 # %%
 # v = iNaturalistDataset(validation=True)
 # v = iNaturalistDataset(validation= True)
 # o = v[10]
@@ -149,24 +138,45 @@ class iNaturalistDataset(torch.utils.data.Dataset):
 # plt.imshow(ox.permute([1,2,0]))
 # plt.savefig('crap2.png')
 def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 2  # 1 class (person) + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model
 import transforms as T
 def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 from engine import train_one_epoch, evaluate
 import utils
 # %%
 def run():
-    val_dataset = iNaturalistDataset(validation=True)
+    val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
-    train_dataset = iNaturalistDataset(train=True)
+    train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
    train_data_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=16, shuffle=True, num_workers=4,  collate_fn=utils.collate_fn
+        train_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    val_data_loader = torch.utils.data.DataLoader(
-        val_dataset, batch_size=16, shuffle=True, num_workers=4,  collate_fn=utils.collate_fn
+        val_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True, num_classes=train_dataset.num_classes, progress=True
    )
    import torchvision
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    num_classes = 2
    model = get_model(num_classes)
    model.to(device)
  # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
--- a/data_aug/pycache/bbox_util.cpython-39.pyc
+++ b/data_aug/pycache/bbox_util.cpython-39.pyc
--- a/data_aug/pycache/data_aug.cpython-39.pyc
+++ b/data_aug/pycache/data_aug.cpython-39.pyc
--- a/data_aug/bbox_util.py
+++ b/data_aug/bbox_util.py
@@ -0,0 +1,300 @@
 import cv2 
 import numpy as np
 def draw_rect(im, cords, color = None):
    """Draw the rectangle on the image
    Parameters
    ----------
    im : numpy.ndarray
        numpy image 
    cords: numpy.ndarray
        Numpy array containing bounding boxes of shape `N X 4` where N is the 
        number of bounding boxes and the bounding boxes are represented in the
        format `x1 y1 x2 y2`
    Returns
    -------
    numpy.ndarray
        numpy image with bounding boxes drawn on it
    """
    im = im.copy()
    cords = cords[:,:4]
    cords = cords.reshape(-1,4)
    if not color:
        color = [255,255,255]
    for cord in cords:
        pt1, pt2 = (cord[0], cord[1]) , (cord[2], cord[3])
        pt1 = int(pt1[0]), int(pt1[1])
        pt2 = int(pt2[0]), int(pt2[1])
        im = cv2.rectangle(im.copy(), pt1, pt2, color, int(max(im.shape[:2])/200))
    return im
 def bbox_area(bbox):
    return (bbox[:,2] - bbox[:,0])*(bbox[:,3] - bbox[:,1])
 def clip_box(bbox, clip_box, alpha):
    """Clip the bounding boxes to the borders of an image
    Parameters
    ----------
    bbox: numpy.ndarray
        Numpy array containing bounding boxes of shape `N X 4` where N is the 
        number of bounding boxes and the bounding boxes are represented in the
        format `x1 y1 x2 y2`
    clip_box: numpy.ndarray
        An array of shape (4,) specifying the diagonal co-ordinates of the image
        The coordinates are represented in the format `x1 y1 x2 y2`
    alpha: float
        If the fraction of a bounding box left in the image after being clipped is 
        less than `alpha` the bounding box is dropped. 
    Returns
    -------
    numpy.ndarray
        Numpy array containing **clipped** bounding boxes of shape `N X 4` where N is the 
        number of bounding boxes left are being clipped and the bounding boxes are represented in the
        format `x1 y1 x2 y2` 
    """
    ar_ = (bbox_area(bbox))
    x_min = np.maximum(bbox[:,0], clip_box[0]).reshape(-1,1)
    y_min = np.maximum(bbox[:,1], clip_box[1]).reshape(-1,1)
    x_max = np.minimum(bbox[:,2], clip_box[2]).reshape(-1,1)
    y_max = np.minimum(bbox[:,3], clip_box[3]).reshape(-1,1)
    bbox = np.hstack((x_min, y_min, x_max, y_max, bbox[:,4:]))
    delta_area = ((ar_ - bbox_area(bbox))/ar_)
    mask = (delta_area < (1 - alpha)).astype(int)
    bbox = bbox[mask == 1,:]
    return bbox
 def rotate_im(image, angle):
    """Rotate the image.
    Rotate the image such that the rotated image is enclosed inside the tightest
    rectangle. The area not occupied by the pixels of the original image is colored
    black. 
    Parameters
    ----------
    image : numpy.ndarray
        numpy image
    angle : float
        angle by which the image is to be rotated
    Returns
    -------
    numpy.ndarray
        Rotated Image
    """
    # grab the dimensions of the image and then determine the
    # centre
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    # grab the rotation matrix (applying the negative of the
    # angle to rotate clockwise), then grab the sine and cosine
    # (i.e., the rotation components of the matrix)
    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    # perform the actual rotation and return the image
    image = cv2.warpAffine(image, M, (nW, nH))
 #    image = cv2.resize(image, (w,h))
    return image
 def get_corners(bboxes):
    """Get corners of bounding boxes
    Parameters
    ----------
    bboxes: numpy.ndarray
        Numpy array containing bounding boxes of shape `N X 4` where N is the 
        number of bounding boxes and the bounding boxes are represented in the
        format `x1 y1 x2 y2`
    returns
    -------
    numpy.ndarray
        Numpy array of shape `N x 8` containing N bounding boxes each described by their 
        corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`      
    """
    width = (bboxes[:,2] - bboxes[:,0]).reshape(-1,1)
    height = (bboxes[:,3] - bboxes[:,1]).reshape(-1,1)
    x1 = bboxes[:,0].reshape(-1,1)
    y1 = bboxes[:,1].reshape(-1,1)
    x2 = x1 + width
    y2 = y1 
    x3 = x1
    y3 = y1 + height
    x4 = bboxes[:,2].reshape(-1,1)
    y4 = bboxes[:,3].reshape(-1,1)
    corners = np.hstack((x1,y1,x2,y2,x3,y3,x4,y4))
    return corners
 def rotate_box(corners,angle,  cx, cy, h, w):
    """Rotate the bounding box.
    Parameters
    ----------
    corners : numpy.ndarray
        Numpy array of shape `N x 8` containing N bounding boxes each described by their 
        corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
    angle : float
        angle by which the image is to be rotated
    cx : int
        x coordinate of the center of image (about which the box will be rotated)
    cy : int
        y coordinate of the center of image (about which the box will be rotated)
    h : int 
        height of the image
    w : int 
        width of the image
    Returns
    -------
    numpy.ndarray
        Numpy array of shape `N x 8` containing N rotated bounding boxes each described by their 
        corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`
    """
    corners = corners.reshape(-1,2)
    corners = np.hstack((corners, np.ones((corners.shape[0],1), dtype = type(corners[0][0]))))
    M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cx
    M[1, 2] += (nH / 2) - cy
    # Prepare the vector to be transformed
    calculated = np.dot(M,corners.T).T
    calculated = calculated.reshape(-1,8)
    return calculated
 def get_enclosing_box(corners):
    """Get an enclosing box for ratated corners of a bounding box
    Parameters
    ----------
    corners : numpy.ndarray
        Numpy array of shape `N x 8` containing N bounding boxes each described by their 
        corner co-ordinates `x1 y1 x2 y2 x3 y3 x4 y4`  
    Returns 
    -------
    numpy.ndarray
        Numpy array containing enclosing bounding boxes of shape `N X 4` where N is the 
        number of bounding boxes and the bounding boxes are represented in the
        format `x1 y1 x2 y2`
    """
    x_ = corners[:,[0,2,4,6]]
    y_ = corners[:,[1,3,5,7]]
    xmin = np.min(x_,1).reshape(-1,1)
    ymin = np.min(y_,1).reshape(-1,1)
    xmax = np.max(x_,1).reshape(-1,1)
    ymax = np.max(y_,1).reshape(-1,1)
    final = np.hstack((xmin, ymin, xmax, ymax,corners[:,8:]))
    return final
 def letterbox_image(img, inp_dim):
    '''resize image with unchanged aspect ratio using padding
    Parameters
    ----------
    img : numpy.ndarray
        Image 
    inp_dim: tuple(int)
        shape of the reszied image
    Returns
    -------
    numpy.ndarray:
        Resized image
    '''
    inp_dim = (inp_dim, inp_dim)
    img_w, img_h = img.shape[1], img.shape[0]
    w, h = inp_dim
    new_w = int(img_w * min(w/img_w, h/img_h))
    new_h = int(img_h * min(w/img_w, h/img_h))
    resized_image = cv2.resize(img, (new_w,new_h))
    canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
    return canvas
--- a/data_aug/data_aug.py
+++ b/data_aug/data_aug.py
@@ -0,0 +1,856 @@
 import random
 import numpy as np
 import cv2
 import matplotlib.pyplot as plt
 import sys
 import os
 from bbox_util import *
 lib_path = os.path.join(os.path.realpath("."), "data_aug")
 sys.path.append(lib_path)
 class RandomHorizontalFlip(object):
    """Randomly horizontally flips the Image with the probability *p*
    Parameters
    ----------
    p: float
        The probability with which the image is flipped
    Returns
    -------
    numpy.ndaaray
        Flipped image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, p=0.5):
        self.p = p
    def __call__(self, img, bboxes):
            img_center = np.array(img.shape[:2])[::-1]/2
            img_center = np.hstack((img_center, img_center))
            if random.random() < self.p:
                img = img[:, ::-1, :]
                bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
                box_w = abs(bboxes[:, 0] - bboxes[:, 2])
                bboxes[:, 0] -= box_w
                bboxes[:, 2] += box_w
            return img, bboxes
 class HorizontalFlip(object):
    """Randomly horizontally flips the Image with the probability *p*
    Parameters
    ----------
    p: float
        The probability with which the image is flipped
    Returns
    -------
    numpy.ndaaray
        Flipped image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self):
        pass
    def __call__(self, img, bboxes):
        img_center = np.array(img.shape[:2])[::-1]/2
        img_center = np.hstack((img_center, img_center))
        img = img[:, ::-1, :]
        bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]])
        box_w = abs(bboxes[:, 0] - bboxes[:, 2])
        bboxes[:, 0] -= box_w
        bboxes[:, 2] += box_w
        return img, bboxes
 class RandomScale(object):
    """Randomly scales an image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    scale: float or tuple(float)
        if **float**, the image is scaled by a factor drawn 
        randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
        the `scale` is drawn randomly from values specified by the 
        tuple
    Returns
    -------
    numpy.ndaaray
        Scaled image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, scale = 0.2, diff = False):
        self.scale = scale
        if type(self.scale) == tuple:
            assert len(self.scale) == 2, "Invalid range"
            assert self.scale[0] > -1, "Scale factor can't be less than -1"
            assert self.scale[1] > -1, "Scale factor can't be less than -1"
        else:
            assert self.scale > 0, "Please input a positive float"
            self.scale = (max(-1, -self.scale), self.scale)
        self.diff = diff
    def __call__(self, img, bboxes):
        #Chose a random digit to scale by 
        img_shape = img.shape
        if self.diff:
            scale_x = random.uniform(*self.scale)
            scale_y = random.uniform(*self.scale)
        else:
            scale_x = random.uniform(*self.scale)
            scale_y = scale_x
        resize_scale_x = 1 + scale_x
        resize_scale_y = 1 + scale_y
        img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
        bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
        canvas = np.zeros(img_shape, dtype = np.uint8)
        y_lim = int(min(resize_scale_y,1)*img_shape[0])
        x_lim = int(min(resize_scale_x,1)*img_shape[1])
        canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:]
        img = canvas
        bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
        return img, bboxes
 class Scale(object):
    """Scales the image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    scale_x: float
        The factor by which the image is scaled horizontally
    scale_y: float
        The factor by which the image is scaled vertically
    Returns
    -------
    numpy.ndaaray
        Scaled image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, scale_x = 0.2, scale_y = 0.2):
        self.scale_x = scale_x
        self.scale_y = scale_y
    def __call__(self, img, bboxes):
        #Chose a random digit to scale by 
        img_shape = img.shape
        resize_scale_x = 1 + self.scale_x
        resize_scale_y = 1 + self.scale_y
        img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
        bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
        canvas = np.zeros(img_shape, dtype = np.uint8)
        y_lim = int(min(resize_scale_y,1)*img_shape[0])
        x_lim = int(min(resize_scale_x,1)*img_shape[1])
        canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:]
        img = canvas
        bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25)
        return img, bboxes  
 class RandomTranslate(object):
    """Randomly Translates the image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    translate: float or tuple(float)
        if **float**, the image is translated by a factor drawn 
        randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
        `translate` is drawn randomly from values specified by the 
        tuple
    Returns
    -------
    numpy.ndaaray
        Translated image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, translate = 0.2, diff = False):
        self.translate = translate
        if type(self.translate) == tuple:
            assert len(self.translate) == 2, "Invalid range"  
            assert self.translate[0] > 0 & self.translate[0] < 1
            assert self.translate[1] > 0 & self.translate[1] < 1
        else:
            assert self.translate > 0 and self.translate < 1
            self.translate = (-self.translate, self.translate)
        self.diff = diff
    def __call__(self, img, bboxes):        
        #Chose a random digit to scale by 
        img_shape = img.shape
        #translate the image
        #percentage of the dimension of the image to translate
        translate_factor_x = random.uniform(*self.translate)
        translate_factor_y = random.uniform(*self.translate)
        if not self.diff:
            translate_factor_y = translate_factor_x
        canvas = np.zeros(img_shape).astype(np.uint8)
        corner_x = int(translate_factor_x*img.shape[1])
        corner_y = int(translate_factor_y*img.shape[0])
        #change the origin to the top-left corner of the translated box
        orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
        mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
        canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
        img = canvas
        bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
        bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
        return img, bboxes
 class Translate(object):
    """Randomly Translates the image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    translate: float or tuple(float)
        if **float**, the image is translated by a factor drawn 
        randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
        `translate` is drawn randomly from values specified by the 
        tuple
    Returns
    -------
    numpy.ndaaray
        Translated image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, translate_x = 0.2, translate_y = 0.2, diff = False):
        self.translate_x = translate_x
        self.translate_y = translate_y
        assert self.translate_x > 0 and self.translate_x < 1
        assert self.translate_y > 0 and self.translate_y < 1
    def __call__(self, img, bboxes):        
        #Chose a random digit to scale by 
        img_shape = img.shape
        #translate the image
        #percentage of the dimension of the image to translate
        translate_factor_x = self.translate_x
        translate_factor_y = self.translate_y
        canvas = np.zeros(img_shape).astype(np.uint8)
        #get the top-left corner co-ordinates of the shifted box 
        corner_x = int(translate_factor_x*img.shape[1])
        corner_y = int(translate_factor_y*img.shape[0])
        #change the origin to the top-left corner of the translated box
        orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
        mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
        canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
        img = canvas
        bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y]
        bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
        return img, bboxes
 class RandomRotate(object):
    """Randomly rotates an image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    angle: float or tuple(float)
        if **float**, the image is rotated by a factor drawn 
        randomly from a range (-`angle`, `angle`). If **tuple**,
        the `angle` is drawn randomly from values specified by the 
        tuple
    Returns
    -------
    numpy.ndaaray
        Rotated image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, angle = 10):
        self.angle = angle
        if type(self.angle) == tuple:
            assert len(self.angle) == 2, "Invalid range"  
        else:
            self.angle = (-self.angle, self.angle)
    def __call__(self, img, bboxes):
        angle = random.uniform(*self.angle)
        w,h = img.shape[1], img.shape[0]
        cx, cy = w//2, h//2
        img = rotate_im(img, angle)
        corners = get_corners(bboxes)
        corners = np.hstack((corners, bboxes[:,4:]))
        corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
        new_bbox = get_enclosing_box(corners)
        scale_factor_x = img.shape[1] / w
        scale_factor_y = img.shape[0] / h
        img = cv2.resize(img, (w,h))
        new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
        bboxes  = new_bbox
        bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
        return img, bboxes
 class Rotate(object):
    """Rotates an image    
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    angle: float
        The angle by which the image is to be rotated 
    Returns
    -------
    numpy.ndaaray
        Rotated image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, angle):
        self.angle = angle
    def __call__(self, img, bboxes):
        """
        Args:
            img (PIL Image): Image to be flipped.
        Returns:
            PIL Image: Randomly flipped image.
        """
        angle = self.angle
        print(self.angle)
        w,h = img.shape[1], img.shape[0]
        cx, cy = w//2, h//2
        corners = get_corners(bboxes)
        corners = np.hstack((corners, bboxes[:,4:]))
        img = rotate_im(img, angle)
        corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w)
        new_bbox = get_enclosing_box(corners)
        scale_factor_x = img.shape[1] / w
        scale_factor_y = img.shape[0] / h
        img = cv2.resize(img, (w,h))
        new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
        bboxes  = new_bbox
        bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
        return img, bboxes
 class RandomShear(object):
    """Randomly shears an image in horizontal direction   
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    shear_factor: float or tuple(float)
        if **float**, the image is sheared horizontally by a factor drawn 
        randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
        the `shear_factor` is drawn randomly from values specified by the 
        tuple
    Returns
    -------
    numpy.ndaaray
        Sheared image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, shear_factor = 0.2):
        self.shear_factor = shear_factor
        if type(self.shear_factor) == tuple:
            assert len(self.shear_factor) == 2, "Invalid range for scaling factor"   
        else:
            self.shear_factor = (-self.shear_factor, self.shear_factor)
        shear_factor = random.uniform(*self.shear_factor)
    def __call__(self, img, bboxes):
        shear_factor = random.uniform(*self.shear_factor)
        w,h = img.shape[1], img.shape[0]
        if shear_factor < 0:
            img, bboxes = HorizontalFlip()(img, bboxes)
        M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
        nW =  img.shape[1] + abs(shear_factor*img.shape[0])
        bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int) 
        img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
        if shear_factor < 0:
        	img, bboxes = HorizontalFlip()(img, bboxes)
        img = cv2.resize(img, (w,h))
        scale_factor_x = nW / w
        bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] 
        return img, bboxes
 class Shear(object):
    """Shears an image in horizontal direction   
    Bounding boxes which have an area of less than 25% in the remaining in the 
    transformed image is dropped. The resolution is maintained, and the remaining
    area if any is filled by black color.
    Parameters
    ----------
    shear_factor: float
        Factor by which the image is sheared in the x-direction
    Returns
    -------
    numpy.ndaaray
        Sheared image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Tranformed bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, shear_factor = 0.2):
        self.shear_factor = shear_factor
    def __call__(self, img, bboxes):
        shear_factor = self.shear_factor
        if shear_factor < 0:
            img, bboxes = HorizontalFlip()(img, bboxes)
        M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
        nW =  img.shape[1] + abs(shear_factor*img.shape[0])
        bboxes[:,[0,2]] += ((bboxes[:,[1,3]])*abs(shear_factor)).astype(int) 
        img = cv2.warpAffine(img, M, (int(nW), img.shape[0]))
        if shear_factor < 0:
             img, bboxes = HorizontalFlip()(img, bboxes)
        return img, bboxes
 class Resize(object):
    """Resize the image in accordance to `image_letter_box` function in darknet 
    The aspect ratio is maintained. The longer side is resized to the input 
    size of the network, while the remaining space on the shorter side is filled 
    with black color. **This should be the last transform**
    Parameters
    ----------
    inp_dim : tuple(int)
        tuple containing the size to which the image will be resized.
    Returns
    -------
    numpy.ndaaray
        Sheared image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Resized bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, inp_dim):
        self.inp_dim = inp_dim
    def __call__(self, img, bboxes):
        w,h = img.shape[1], img.shape[0]
        img = letterbox_image(img, self.inp_dim)
        scale = min(self.inp_dim/h, self.inp_dim/w)
        bboxes[:,:4] *= (scale)
        new_w = scale*w
        new_h = scale*h
        inp_dim = self.inp_dim   
        del_h = (inp_dim - new_h)/2
        del_w = (inp_dim - new_w)/2
        add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
        bboxes[:,:4] += add_matrix
        img = img.astype(np.uint8)
        return img, bboxes 
 class RandomHSV(object):
    """HSV Transform to vary hue saturation and brightness
    Hue has a range of 0-179
    Saturation and Brightness have a range of 0-255. 
    Chose the amount you want to change thhe above quantities accordingly. 
    Parameters
    ----------
    hue : None or int or tuple (int)
        If None, the hue of the image is left unchanged. If int, 
        a random int is uniformly sampled from (-hue, hue) and added to the 
        hue of the image. If tuple, the int is sampled from the range 
        specified by the tuple.   
    saturation : None or int or tuple(int)
        If None, the saturation of the image is left unchanged. If int, 
        a random int is uniformly sampled from (-saturation, saturation) 
        and added to the hue of the image. If tuple, the int is sampled
        from the range  specified by the tuple.   
    brightness : None or int or tuple(int)
        If None, the brightness of the image is left unchanged. If int, 
        a random int is uniformly sampled from (-brightness, brightness) 
        and added to the hue of the image. If tuple, the int is sampled
        from the range  specified by the tuple.   
    Returns
    -------
    numpy.ndaaray
        Transformed image in the numpy format of shape `HxWxC`
    numpy.ndarray
        Resized bounding box co-ordinates of the format `n x 4` where n is 
        number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    """
    def __init__(self, hue = None, saturation = None, brightness = None):
        if hue:
            self.hue = hue 
        else:
            self.hue = 0
        if saturation:
            self.saturation = saturation 
        else:
            self.saturation = 0
        if brightness:
            self.brightness = brightness
        else:
            self.brightness = 0
        if type(self.hue) != tuple:
            self.hue = (-self.hue, self.hue)
        if type(self.saturation) != tuple:
            self.saturation = (-self.saturation, self.saturation)
        if type(brightness) != tuple:
            self.brightness = (-self.brightness, self.brightness)
    def __call__(self, img, bboxes):
        hue = random.randint(*self.hue)
        saturation = random.randint(*self.saturation)
        brightness = random.randint(*self.brightness)
        img = img.astype(int)
        a = np.array([hue, saturation, brightness]).astype(int)
        img += np.reshape(a, (1,1,3))
        img = np.clip(img, 0, 255)
        img[:,:,0] = np.clip(img[:,:,0],0, 179)
        img = img.astype(np.uint8)
        return img, bboxes
 class Sequence(object):
    """Initialise Sequence object
    Apply a Sequence of transformations to the images/boxes.
    Parameters
    ----------
    augemnetations : list 
        List containing Transformation Objects in Sequence they are to be 
        applied
    probs : int or list 
        If **int**, the probability with which each of the transformation will 
        be applied. If **list**, the length must be equal to *augmentations*. 
        Each element of this list is the probability with which each 
        corresponding transformation is applied
    Returns
    -------
    Sequence
        Sequence Object 
    """
    def __init__(self, augmentations, probs = 1):
        self.augmentations = augmentations
        self.probs = probs
    def __call__(self, images, bboxes):
        for i, augmentation in enumerate(self.augmentations):
            if type(self.probs) == list:
                prob = self.probs[i]
            else:
                prob = self.probs
            if random.random() < prob:
                images, bboxes = augmentation(images, bboxes)
        return images, bboxes
--- a/model.py
+++ b/model.py
@@ -0,0 +1,221 @@
 # %%
 import os
 import numpy as np
 import torch
 from PIL import Image
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 import json
 import torch
 from torchvision import transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
 sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
 from references.detection import utils, engine
 import data_aug
 import bbox_util
 def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 def create_map(list_in, from_key, to_key):
    cmap = dict()
    for l in list_in:
        cmap[l[from_key]] = l[to_key]
    return cmap
 class iNaturalistDataset(torch.utils.data.Dataset):
    def __init__(self, validation=False, train=False, transforms = None):
        self.validation = validation
        self.train = train
        self.transforms = transforms
        if validation:
            json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
        elif train:
            json_path = os.path.join(
                PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
            )
        with open(json_path, "r") as rj:
            f = json.load(rj)
        categories = list()
        image_info = dict()
        for category in f["categories"]:
            if category["supercategory"] == "Aves":
                if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
                    print(category['name'])
                    categories.append(category)
        categories = sorted(categories, key=lambda k: k["name"])
        for idx, cat in enumerate(categories):
            cat["new_id"] = idx + 1
        orig_to_new_id = create_map(categories, "id", "new_id")
        for annot in f["annotations"]:
            if annot["category_id"] in orig_to_new_id:
                annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
                id = annot["image_id"]
                if id not in image_info:
                    image_info[id] = dict()
                annot["bbox"][2] += annot["bbox"][0]
                annot["bbox"][3] += annot["bbox"][1]
                image_info[id]["annotation"] = annot
        for img in f["images"]:
            id = img["id"]
            path = os.path.join(PATH_ROOT, img["file_name"])
            height = img["height"]
            width = img["width"]
            if id in image_info:
                image_info[id].update({"path": path, "height": height, "width": width})
        for idx, (id, im_in) in enumerate(image_info.items()):
            im_in["idx"] = idx
        self.images = image_info
        self.categories = categories
        self.idx_to_id = [x for x in self.images]
        self.num_classes = len(self.categories) + 1
        self.num_samples = len(self.images)
    def __len__(self):
        return self.num_samples
    def __getitem__(self, idx):
        idd = self.idx_to_id[idx]
        c_image = self.images[idd]
        img_path = c_image["path"]
        img = Image.open(img_path).convert("RGB")
        annot = c_image["annotation"]
        bbox = annot["bbox"]
        boxes = bbox
        target = dict()
        target["boxes"] = torch.as_tensor([boxes])
        target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
        target['image_id'] = torch.tensor([annot['image_id']])
        target['area'] =  torch.as_tensor([annot['area']])
        target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
 # %%
 # v = iNaturalistDataset(validation=True)
 # v = iNaturalistDataset(validation= True)
 # o = v[10]
 # %%
 # oimage = t.tensor(o[0]*255, dtype=t.uint8)
 # import matplotlib.pyplot as plt
 # ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
 # plt.imshow(ox.permute([1,2,0]))
 # plt.savefig('crap2.png')
 def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 2  # 1 class (person) + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model
 import transforms as T
 def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 from engine import train_one_epoch, evaluate
 import utils
 # %%
 def run():
    val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
    train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    import torchvision
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    num_classes = 2
    model = get_model(num_classes)
    model.to(device)
  # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
    # let's train it for 10 epochs
    num_epochs = 10
    for epoch in range(num_epochs):
        print(epoch)
        torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
        # train for one epoch, printing every 10 iterations
        engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
        torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
        # update the learning rate
        lr_scheduler.step()
        torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
        # evaluate on the test dataset
        engine.evaluate(model, val_data_loader, device=device)
 if __name__ == "__main__":    
    run()
 # # %%
 # json_path = os.path.join(
 #                 PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
 # )
 # with open(json_path, "r") as rj:
 #     f = json.load(rj)
 # # %%
 # image_id: 2358
--- a/old_files/README.md
+++ b/old_files/README.md
@@ -0,0 +1,82 @@
 # Object detection reference training scripts
 This folder contains reference training scripts for object detection.
 They serve as a log of how to train specific models, to provide baseline
 training and evaluation scripts to quickly bootstrap research.
 To execute the example commands below you must install the following:
 ```
 cython
 pycocotools
 matplotlib
 ```
 You must modify the following flags:
 `--data-path=/path/to/coco/dataset`
 `--nproc_per_node=<number_of_gpus_available>`
 Except otherwise noted, all models have been trained on 8x V100 GPUs. 
 ### Faster R-CNN ResNet-50 FPN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
    --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 ### Faster R-CNN MobileNetV3-Large FPN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\
    --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 ### Faster R-CNN MobileNetV3-Large 320 FPN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\
    --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 ### RetinaNet
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model retinanet_resnet50_fpn --epochs 26\
    --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01
 ```
 ### SSD300 VGG16
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model ssd300_vgg16 --epochs 120\
    --lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\
    --weight-decay 0.0005 --data-augmentation ssd
 ```
 ### SSDlite320 MobileNetV3-Large
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
    --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
    --weight-decay 0.00004 --data-augmentation ssdlite
 ```
 ### Mask R-CNN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
    --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 ### Keypoint R-CNN
 ```
 python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
    --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
    --lr-steps 36 43 --aspect-ratio-group-factor 3
 ```
--- a/old_files/coco_eval.py
+++ b/old_files/coco_eval.py
@@ -0,0 +1,352 @@
 import json
 import tempfile
 import numpy as np
 import copy
 import time
 import torch
 import torch._six
 from pycocotools.cocoeval import COCOeval
 from pycocotools.coco import COCO
 import pycocotools.mask as mask_util
 from collections import defaultdict
 import utils
 class CocoEvaluator(object):
    def __init__(self, coco_gt, iou_types):
        assert isinstance(iou_types, (list, tuple))
        coco_gt = copy.deepcopy(coco_gt)
        self.coco_gt = coco_gt
        self.iou_types = iou_types
        self.coco_eval = {}
        for iou_type in iou_types:
            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
        self.img_ids = []
        self.eval_imgs = {k: [] for k in iou_types}
    def update(self, predictions):
        img_ids = list(np.unique(list(predictions.keys())))
        self.img_ids.extend(img_ids)
        for iou_type in self.iou_types:
            results = self.prepare(predictions, iou_type)
            coco_dt = loadRes(self.coco_gt, results) if results else COCO()
            coco_eval = self.coco_eval[iou_type]
            coco_eval.cocoDt = coco_dt
            coco_eval.params.imgIds = list(img_ids)
            img_ids, eval_imgs = evaluate(coco_eval)
            self.eval_imgs[iou_type].append(eval_imgs)
    def synchronize_between_processes(self):
        for iou_type in self.iou_types:
            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
    def accumulate(self):
        for coco_eval in self.coco_eval.values():
            coco_eval.accumulate()
    def summarize(self):
        for iou_type, coco_eval in self.coco_eval.items():
            print("IoU metric: {}".format(iou_type))
            coco_eval.summarize()
    def prepare(self, predictions, iou_type):
        if iou_type == "bbox":
            return self.prepare_for_coco_detection(predictions)
        elif iou_type == "segm":
            return self.prepare_for_coco_segmentation(predictions)
        elif iou_type == "keypoints":
            return self.prepare_for_coco_keypoint(predictions)
        else:
            raise ValueError("Unknown iou type {}".format(iou_type))
    def prepare_for_coco_detection(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue
            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "bbox": box,
                        "score": scores[k],
                    }
                    for k, box in enumerate(boxes)
                ]
            )
        return coco_results
    def prepare_for_coco_segmentation(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue
            scores = prediction["scores"]
            labels = prediction["labels"]
            masks = prediction["masks"]
            masks = masks > 0.5
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            rles = [
                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
                for mask in masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")
            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        "segmentation": rle,
                        "score": scores[k],
                    }
                    for k, rle in enumerate(rles)
                ]
            )
        return coco_results
    def prepare_for_coco_keypoint(self, predictions):
        coco_results = []
        for original_id, prediction in predictions.items():
            if len(prediction) == 0:
                continue
            boxes = prediction["boxes"]
            boxes = convert_to_xywh(boxes).tolist()
            scores = prediction["scores"].tolist()
            labels = prediction["labels"].tolist()
            keypoints = prediction["keypoints"]
            keypoints = keypoints.flatten(start_dim=1).tolist()
            coco_results.extend(
                [
                    {
                        "image_id": original_id,
                        "category_id": labels[k],
                        'keypoints': keypoint,
                        "score": scores[k],
                    }
                    for k, keypoint in enumerate(keypoints)
                ]
            )
        return coco_results
 def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
 def merge(img_ids, eval_imgs):
    all_img_ids = utils.all_gather(img_ids)
    all_eval_imgs = utils.all_gather(eval_imgs)
    merged_img_ids = []
    for p in all_img_ids:
        merged_img_ids.extend(p)
    merged_eval_imgs = []
    for p in all_eval_imgs:
        merged_eval_imgs.append(p)
    merged_img_ids = np.array(merged_img_ids)
    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
    # keep only unique (and in sorted order) images
    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
    merged_eval_imgs = merged_eval_imgs[..., idx]
    return merged_img_ids, merged_eval_imgs
 def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
    img_ids, eval_imgs = merge(img_ids, eval_imgs)
    img_ids = list(img_ids)
    eval_imgs = list(eval_imgs.flatten())
    coco_eval.evalImgs = eval_imgs
    coco_eval.params.imgIds = img_ids
    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
 #################################################################
 # From pycocotools, just removed the prints and fixed
 # a Python3 bug about unicode not defined
 #################################################################
 # Ideally, pycocotools wouldn't have hard-coded prints
 # so that we could avoid copy-pasting those two functions
 def createIndex(self):
    # create index
    # print('creating index...')
    anns, cats, imgs = {}, {}, {}
    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
    if 'annotations' in self.dataset:
        for ann in self.dataset['annotations']:
            imgToAnns[ann['image_id']].append(ann)
            anns[ann['id']] = ann
    if 'images' in self.dataset:
        for img in self.dataset['images']:
            imgs[img['id']] = img
    if 'categories' in self.dataset:
        for cat in self.dataset['categories']:
            cats[cat['id']] = cat
    if 'annotations' in self.dataset and 'categories' in self.dataset:
        for ann in self.dataset['annotations']:
            catToImgs[ann['category_id']].append(ann['image_id'])
    # print('index created!')
    # create class members
    self.anns = anns
    self.imgToAnns = imgToAnns
    self.catToImgs = catToImgs
    self.imgs = imgs
    self.cats = cats
 maskUtils = mask_util
 def loadRes(self, resFile):
    """
    Load result file and return a result api object.
    Args:
        self (obj): coco object with ground truth annotations
        resFile (str): file name of result file
    Returns:
    res (obj): result api object
    """
    res = COCO()
    res.dataset['images'] = [img for img in self.dataset['images']]
    # print('Loading and preparing results...')
    # tic = time.time()
    if isinstance(resFile, torch._six.string_classes):
        anns = json.load(open(resFile))
    elif type(resFile) == np.ndarray:
        anns = self.loadNumpyAnnotations(resFile)
    else:
        anns = resFile
    assert type(anns) == list, 'results in not an array of objects'
    annsImgIds = [ann['image_id'] for ann in anns]
    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
        'Results do not correspond to current coco set'
    if 'caption' in anns[0]:
        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
        for id, ann in enumerate(anns):
            ann['id'] = id + 1
    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            bb = ann['bbox']
            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
            if 'segmentation' not in ann:
                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
            ann['area'] = bb[2] * bb[3]
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'segmentation' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            # now only support compressed RLE format as segmentation results
            ann['area'] = maskUtils.area(ann['segmentation'])
            if 'bbox' not in ann:
                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
            ann['id'] = id + 1
            ann['iscrowd'] = 0
    elif 'keypoints' in anns[0]:
        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
        for id, ann in enumerate(anns):
            s = ann['keypoints']
            x = s[0::3]
            y = s[1::3]
            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
            ann['area'] = (x2 - x1) * (y2 - y1)
            ann['id'] = id + 1
            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))
    res.dataset['annotations'] = anns
    createIndex(res)
    return res
 def evaluate(self):
    '''
    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
    :return: None
    '''
    # tic = time.time()
    # print('Running per image evaluation...')
    p = self.params
    # add backward compatibility if useSegm is specified in params
    if p.useSegm is not None:
        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
    # print('Evaluate annotation type *{}*'.format(p.iouType))
    p.imgIds = list(np.unique(p.imgIds))
    if p.useCats:
        p.catIds = list(np.unique(p.catIds))
    p.maxDets = sorted(p.maxDets)
    self.params = p
    self._prepare()
    # loop through images, area range, max detection number
    catIds = p.catIds if p.useCats else [-1]
    if p.iouType == 'segm' or p.iouType == 'bbox':
        computeIoU = self.computeIoU
    elif p.iouType == 'keypoints':
        computeIoU = self.computeOks
    self.ious = {
        (imgId, catId): computeIoU(imgId, catId)
        for imgId in p.imgIds
        for catId in catIds}
    evaluateImg = self.evaluateImg
    maxDet = p.maxDets[-1]
    evalImgs = [
        evaluateImg(imgId, catId, areaRng, maxDet)
        for catId in catIds
        for areaRng in p.areaRng
        for imgId in p.imgIds
    ]
    # this is NOT in the pycocotools code, but could be done outside
    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
    self._paramsEval = copy.deepcopy(self.params)
    # toc = time.time()
    # print('DONE (t={:0.2f}s).'.format(toc-tic))
    return p.imgIds, evalImgs
 #################################################################
 # end of straight copy from pycocotools, just removing the prints
 #################################################################
--- a/old_files/coco_utils.py
+++ b/old_files/coco_utils.py
@@ -0,0 +1,252 @@
 import copy
 import os
 from PIL import Image
 import torch
 import torch.utils.data
 import torchvision
 from pycocotools import mask as coco_mask
 from pycocotools.coco import COCO
 import transforms as T
 class FilterAndRemapCocoCategories(object):
    def __init__(self, categories, remap=True):
        self.categories = categories
        self.remap = remap
    def __call__(self, image, target):
        anno = target["annotations"]
        anno = [obj for obj in anno if obj["category_id"] in self.categories]
        if not self.remap:
            target["annotations"] = anno
            return image, target
        anno = copy.deepcopy(anno)
        for obj in anno:
            obj["category_id"] = self.categories.index(obj["category_id"])
        target["annotations"] = anno
        return image, target
 def convert_coco_poly_to_mask(segmentations, height, width):
    masks = []
    for polygons in segmentations:
        rles = coco_mask.frPyObjects(polygons, height, width)
        mask = coco_mask.decode(rles)
        if len(mask.shape) < 3:
            mask = mask[..., None]
        mask = torch.as_tensor(mask, dtype=torch.uint8)
        mask = mask.any(dim=2)
        masks.append(mask)
    if masks:
        masks = torch.stack(masks, dim=0)
    else:
        masks = torch.zeros((0, height, width), dtype=torch.uint8)
    return masks
 class ConvertCocoPolysToMask(object):
    def __call__(self, image, target):
        w, h = image.size
        image_id = target["image_id"]
        image_id = torch.tensor([image_id])
        anno = target["annotations"]
        anno = [obj for obj in anno if obj['iscrowd'] == 0]
        boxes = [obj["bbox"] for obj in anno]
        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)
        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)
        segmentations = [obj["segmentation"] for obj in anno]
        masks = convert_coco_poly_to_mask(segmentations, h, w)
        keypoints = None
        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
            num_keypoints = keypoints.shape[0]
            if num_keypoints:
                keypoints = keypoints.view(num_keypoints, -1, 3)
        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        masks = masks[keep]
        if keypoints is not None:
            keypoints = keypoints[keep]
        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        target["masks"] = masks
        target["image_id"] = image_id
        if keypoints is not None:
            target["keypoints"] = keypoints
        # for conversion to coco api
        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
        target["area"] = area
        target["iscrowd"] = iscrowd
        return image, target
 def _coco_remove_images_without_annotations(dataset, cat_list=None):
    def _has_only_empty_bbox(anno):
        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
    def _count_visible_keypoints(anno):
        return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
    min_keypoints_per_image = 10
    def _has_valid_annotation(anno):
        # if it's empty, there is no annotation
        if len(anno) == 0:
            return False
        # if all boxes have close to zero area, there is no annotation
        if _has_only_empty_bbox(anno):
            return False
        # keypoints task have a slight different critera for considering
        # if an annotation is valid
        if "keypoints" not in anno[0]:
            return True
        # for keypoint detection tasks, only consider valid images those
        # containing at least min_keypoints_per_image
        if _count_visible_keypoints(anno) >= min_keypoints_per_image:
            return True
        return False
    assert isinstance(dataset, torchvision.datasets.CocoDetection)
    ids = []
    for ds_idx, img_id in enumerate(dataset.ids):
        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = dataset.coco.loadAnns(ann_ids)
        if cat_list:
            anno = [obj for obj in anno if obj["category_id"] in cat_list]
        if _has_valid_annotation(anno):
            ids.append(ds_idx)
    dataset = torch.utils.data.Subset(dataset, ids)
    return dataset
 def convert_to_coco_api(ds):
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
    ann_id = 1
    dataset = {'images': [], 'categories': [], 'annotations': []}
    categories = set()
    for img_idx in range(len(ds)):
        # find better way to get target
        # targets = ds.get_annotations(img_idx)
        img, targets = ds[img_idx]
        image_id = targets["image_id"].item()
        img_dict = {}
        img_dict['id'] = image_id
        img_dict['height'] = img.shape[-2]
        img_dict['width'] = img.shape[-1]
        dataset['images'].append(img_dict)
        bboxes = targets["boxes"]
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets['labels'].tolist()
        areas = targets['area'].tolist()
        iscrowd = targets['iscrowd'].tolist()
        if 'masks' in targets:
            masks = targets['masks']
            # make masks Fortran contiguous for coco_mask
            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
        if 'keypoints' in targets:
            keypoints = targets['keypoints']
            keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann['image_id'] = image_id
            ann['bbox'] = bboxes[i]
            ann['category_id'] = labels[i]
            categories.add(labels[i])
            ann['area'] = areas[i]
            ann['iscrowd'] = iscrowd[i]
            ann['id'] = ann_id
            if 'masks' in targets:
                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
            if 'keypoints' in targets:
                ann['keypoints'] = keypoints[i]
                ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
            dataset['annotations'].append(ann)
            ann_id += 1
    dataset['categories'] = [{'id': i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds
 def get_coco_api_from_dataset(dataset):
    for _ in range(10):
        if isinstance(dataset, torchvision.datasets.CocoDetection):
            break
        if isinstance(dataset, torch.utils.data.Subset):
            dataset = dataset.dataset
    if isinstance(dataset, torchvision.datasets.CocoDetection):
        return dataset.coco
    return convert_to_coco_api(dataset)
 class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self._transforms = transforms
    def __getitem__(self, idx):
        img, target = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        target = dict(image_id=image_id, annotations=target)
        if self._transforms is not None:
            img, target = self._transforms(img, target)
        return img, target
 def get_coco(root, image_set, transforms, mode='instances'):
    anno_file_template = "{}_{}2017.json"
    PATHS = {
        "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
        "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
        # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
    }
    t = [ConvertCocoPolysToMask()]
    if transforms is not None:
        t.append(transforms)
    transforms = T.Compose(t)
    img_folder, ann_file = PATHS[image_set]
    img_folder = os.path.join(root, img_folder)
    ann_file = os.path.join(root, ann_file)
    dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
    if image_set == "train":
        dataset = _coco_remove_images_without_annotations(dataset)
    # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
    return dataset
 def get_coco_kp(root, image_set, transforms):
    return get_coco(root, image_set, transforms, mode="person_keypoints")
--- a/old_files/crap.png
+++ b/old_files/crap.png
--- a/old_files/crap2.png
+++ b/old_files/crap2.png
--- a/old_files/data.py
+++ b/old_files/data.py
@@ -0,0 +1,221 @@
 # %%
 import os
 import numpy as np
 import torch
 from PIL import Image
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 import json
 import torch
 from torchvision import transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
 sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
 from references.detection import utils, engine
 import data_aug
 import bbox_util
 def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
 device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 def create_map(list_in, from_key, to_key):
    cmap = dict()
    for l in list_in:
        cmap[l[from_key]] = l[to_key]
    return cmap
 class iNaturalistDataset(torch.utils.data.Dataset):
    def __init__(self, validation=False, train=False, transforms = None):
        self.validation = validation
        self.train = train
        self.transforms = transforms
        if validation:
            json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
        elif train:
            json_path = os.path.join(
                PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
            )
        with open(json_path, "r") as rj:
            f = json.load(rj)
        categories = list()
        image_info = dict()
        for category in f["categories"]:
            if category["supercategory"] == "Aves":
                if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
                    print(category['name'])
                    categories.append(category)
        categories = sorted(categories, key=lambda k: k["name"])
        for idx, cat in enumerate(categories):
            cat["new_id"] = idx + 1
        orig_to_new_id = create_map(categories, "id", "new_id")
        for annot in f["annotations"]:
            if annot["category_id"] in orig_to_new_id:
                annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
                id = annot["image_id"]
                if id not in image_info:
                    image_info[id] = dict()
                annot["bbox"][2] += annot["bbox"][0]
                annot["bbox"][3] += annot["bbox"][1]
                image_info[id]["annotation"] = annot
        for img in f["images"]:
            id = img["id"]
            path = os.path.join(PATH_ROOT, img["file_name"])
            height = img["height"]
            width = img["width"]
            if id in image_info:
                image_info[id].update({"path": path, "height": height, "width": width})
        for idx, (id, im_in) in enumerate(image_info.items()):
            im_in["idx"] = idx
        self.images = image_info
        self.categories = categories
        self.idx_to_id = [x for x in self.images]
        self.num_classes = len(self.categories) + 1
        self.num_samples = len(self.images)
    def __len__(self):
        return self.num_samples
    def __getitem__(self, idx):
        idd = self.idx_to_id[idx]
        c_image = self.images[idd]
        img_path = c_image["path"]
        img = Image.open(img_path).convert("RGB")
        annot = c_image["annotation"]
        bbox = annot["bbox"]
        boxes = bbox
        target = dict()
        target["boxes"] = torch.as_tensor([boxes])
        target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
        target['image_id'] = torch.tensor([annot['image_id']])
        target['area'] =  torch.as_tensor([annot['area']])
        target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
 # %%
 # v = iNaturalistDataset(validation=True)
 # v = iNaturalistDataset(validation= True)
 # o = v[10]
 # %%
 # oimage = t.tensor(o[0]*255, dtype=t.uint8)
 # import matplotlib.pyplot as plt
 # ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
 # plt.imshow(ox.permute([1,2,0]))
 # plt.savefig('crap2.png')
 def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 2  # 1 class (person) + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model
 import transforms as T
 def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
 from engine import train_one_epoch, evaluate
 import utils
 # %%
 def run():
    val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
    train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=8, shuffle=True, num_workers=1,  collate_fn=utils.collate_fn
    )
    import torchvision
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    num_classes = 2
    model = get_model(num_classes)
    model.to(device)
  # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
    # let's train it for 10 epochs
    num_epochs = 10
    for epoch in range(num_epochs):
        print(epoch)
        torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
        # train for one epoch, printing every 10 iterations
        engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
        torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
        # update the learning rate
        lr_scheduler.step()
        torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
        # evaluate on the test dataset
        engine.evaluate(model, val_data_loader, device=device)
 if __name__ == "__main__":    
    run()
 # # %%
 # json_path = os.path.join(
 #                 PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
 # )
 # with open(json_path, "r") as rj:
 #     f = json.load(rj)
 # # %%
 # image_id: 2358
--- a/old_files/engine.py
+++ b/old_files/engine.py
@@ -0,0 +1,110 @@
 import math
 import sys
 import time
 import torch
 import torchvision.models.detection.mask_rcnn
 from coco_utils import get_coco_api_from_dataset
 from coco_eval import CocoEvaluator
 import utils
 def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)
    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)
        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()
        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        if lr_scheduler is not None:
            lr_scheduler.step()
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
    return metric_logger
 def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types
@torch.no_grad()
 def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)
    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()
    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
--- a/old_files/fine_tuned.py
+++ b/old_files/fine_tuned.py
@@ -0,0 +1,28 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
 num_classes = 1  # 1 class (person) + background
 in_features = model.roi_heads.box_predictor.cls_score.in_features
 model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 import torchvision
 from torchvision.models.detection import FasterRCNN
 from torchvision.models.detection.rpn import AnchorGenerator
 backbone = torchvision.models.mobilenet_v2(pretrained=True).features
 backbone.out_channels = list(backbone.modules())[-3].out_channels
 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))
 roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)
 model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
 # %%
--- a/old_files/group_by_aspect_ratio.py
+++ b/old_files/group_by_aspect_ratio.py
--- a/old_files/presets.py
+++ b/old_files/presets.py
--- a/old_files/test.py
+++ b/old_files/test.py
@@ -15,19 +15,19 @@ sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
 sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
-model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 3)
+model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2)
-model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_1.pth'))
+model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_9.pth'))
 model.eval()
 model.to('cuda')
-#img = r'D:\ishan\ml\inaturalist\test2017\00a903fa1d23b2f8f28248e81bc1c4a4.jpg'
+
 #img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210617093423.mp4_023.jpg'
 img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210609095848.mp4_133.jpg'
 import random
 rtdir = r'J:\hummingbird_imagenet\hummingbird'
 ff = os.listdir(rtdir)
 # %%
 img = os.path.join(rtdir,random.choice(ff))
 image = cv2.imread(img)[:,:,::-1].copy()
 o = T.ToTensor()(image).cuda()
@@ -43,7 +43,6 @@ plt.imshow(ox.permute([1,2,0]))
 # %%
 from data import iNaturalistDataset
 sd = iNaturalistDataset(validation=True)
 # # %%
--- a/old_files/train.py
+++ b/old_files/train.py
@@ -0,0 +1,233 @@
 r"""PyTorch Detection Training.
 To run in a multi-gpu environment, use the distributed launcher::
    python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
        train.py ... --world-size $NGPU
 The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
    --lr 0.02 --batch-size 2 --world-size 8
 If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
 On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
    --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
 Also, if you train Keypoint R-CNN, the default hyperparameters are
    --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
 Because the number of images is smaller in the person keypoint subset of COCO,
 the number of epochs should be adapted so that we have the same number of iterations.
 """
 import datetime
 import os
 import time
 import torch
 import torch.utils.data
 import torchvision
 import torchvision.models.detection
 import torchvision.models.detection.mask_rcnn
 from coco_utils import get_coco, get_coco_kp
 from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
 from engine import train_one_epoch, evaluate
 import presets
 import utils
 def get_dataset(name, image_set, transform, data_path):
    paths = {
        "coco": (data_path, get_coco, 91),
        "coco_kp": (data_path, get_coco_kp, 2)
    }
    p, ds_fn, num_classes = paths[name]
    ds = ds_fn(p, image_set=image_set, transforms=transform)
    return ds, num_classes
 def get_transform(train, data_augmentation):
    return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
 def get_args_parser(add_help=True):
    import argparse
    parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
    parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
    parser.add_argument('--dataset', default='coco', help='dataset')
    parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
    parser.add_argument('--device', default='cuda', help='device')
    parser.add_argument('-b', '--batch-size', default=2, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--lr', default=0.02, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
    parser.add_argument('--lr-step-size', default=8, type=int,
                        help='decrease lr every step-size epochs (multisteplr scheduler only)')
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs (multisteplr scheduler only)')
    parser.add_argument('--lr-gamma', default=0.1, type=float,
                        help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    parser.add_argument('--output-dir', default='.', help='path where to save')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
    parser.add_argument('--trainable-backbone-layers', default=None, type=int,
                        help='number of trainable layers of backbone')
    parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
    parser.add_argument(
        "--sync-bn",
        dest="sync_bn",
        help="Use sync batch norm",
        action="store_true",
    )
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )
    parser.add_argument(
        "--pretrained",
        dest="pretrained",
        help="Use pre-trained models from the modelzoo",
        action="store_true",
    )
    # distributed training parameters
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    return parser
 def main(args):
    if args.output_dir:
        utils.mkdir(args.output_dir)
    utils.init_distributed_mode(args)
    print(args)
    device = torch.device(args.device)
    # Data loading code
    print("Loading data")
    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
                                       args.data_path)
    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)
    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)
    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)
    print("Creating model")
    kwargs = {
        "trainable_backbone_layers": args.trainable_backbone_layers
    }
    if "rcnn" in args.model:
        if args.rpn_score_thresh is not None:
            kwargs["rpn_score_thresh"] = args.rpn_score_thresh
    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
                                                              **kwargs)
    model.to(device)
    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    args.lr_scheduler = args.lr_scheduler.lower()
    if args.lr_scheduler == 'multisteplr':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
    elif args.lr_scheduler == 'cosineannealinglr':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
    else:
        raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
                           "are supported.".format(args.lr_scheduler))
    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return
    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
        lr_scheduler.step()
        if args.output_dir:
            checkpoint = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'checkpoint.pth'))
        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)
    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
 if __name__ == "__main__":
    args = get_args_parser().parse_args()
    main(args)
--- a/old_files/transforms.py
+++ b/old_files/transforms.py
@@ -0,0 +1,239 @@
 import torch
 import torchvision
 from torch import nn, Tensor
 from torchvision.transforms import functional as F
 from torchvision.transforms import transforms as T
 from typing import List, Tuple, Dict, Optional
 def _flip_coco_person_keypoints(kps, width):
    flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
    flipped_data = kps[:, flip_inds]
    flipped_data[..., 0] = width - flipped_data[..., 0]
    # Maintain COCO convention that if visibility == 0, then x, y = 0
    inds = flipped_data[..., 2] == 0
    flipped_data[inds] = 0
    return flipped_data
 class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target
 class RandomHorizontalFlip(T.RandomHorizontalFlip):
    def forward(self, image: Tensor,
                target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if torch.rand(1) < self.p:
            image = F.hflip(image)
            if target is not None:
                width, _ = F._get_image_size(image)
                target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
                if "masks" in target:
                    target["masks"] = target["masks"].flip(-1)
                if "keypoints" in target:
                    keypoints = target["keypoints"]
                    keypoints = _flip_coco_person_keypoints(keypoints, width)
                    target["keypoints"] = keypoints
        return image, target
 class ToTensor(nn.Module):
    def forward(self, image: Tensor,
                target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        image = F.to_tensor(image)
        return image, target
 class RandomIoUCrop(nn.Module):
    def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5,
                 max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40):
        super().__init__()
        # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
        self.min_scale = min_scale
        self.max_scale = max_scale
        self.min_aspect_ratio = min_aspect_ratio
        self.max_aspect_ratio = max_aspect_ratio
        if sampler_options is None:
            sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
        self.options = sampler_options
        self.trials = trials
    def forward(self, image: Tensor,
                target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if target is None:
            raise ValueError("The targets can't be None for this transform.")
        if isinstance(image, torch.Tensor):
            if image.ndimension() not in {2, 3}:
                raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
            elif image.ndimension() == 2:
                image = image.unsqueeze(0)
        orig_w, orig_h = F._get_image_size(image)
        while True:
            # sample an option
            idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
            min_jaccard_overlap = self.options[idx]
            if min_jaccard_overlap >= 1.0:  # a value larger than 1 encodes the leave as-is option
                return image, target
            for _ in range(self.trials):
                # check the aspect ratio limitations
                r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
                new_w = int(orig_w * r[0])
                new_h = int(orig_h * r[1])
                aspect_ratio = new_w / new_h
                if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
                    continue
                # check for 0 area crops
                r = torch.rand(2)
                left = int((orig_w - new_w) * r[0])
                top = int((orig_h - new_h) * r[1])
                right = left + new_w
                bottom = top + new_h
                if left == right or top == bottom:
                    continue
                # check for any valid boxes with centers within the crop area
                cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
                cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
                is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
                if not is_within_crop_area.any():
                    continue
                # check at least 1 box with jaccard limitations
                boxes = target["boxes"][is_within_crop_area]
                ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]],
                                                                         dtype=boxes.dtype, device=boxes.device))
                if ious.max() < min_jaccard_overlap:
                    continue
                # keep only valid boxes and perform cropping
                target["boxes"] = boxes
                target["labels"] = target["labels"][is_within_crop_area]
                target["boxes"][:, 0::2] -= left
                target["boxes"][:, 1::2] -= top
                target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
                target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
                image = F.crop(image, top, left, new_h, new_w)
                return image, target
 class RandomZoomOut(nn.Module):
    def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5):
        super().__init__()
        if fill is None:
            fill = [0., 0., 0.]
        self.fill = fill
        self.side_range = side_range
        if side_range[0] < 1. or side_range[0] > side_range[1]:
            raise ValueError("Invalid canvas side range provided {}.".format(side_range))
        self.p = p
    @torch.jit.unused
    def _get_fill_value(self, is_pil):
        # type: (bool) -> int
        # We fake the type to make it work on JIT
        return tuple(int(x) for x in self.fill) if is_pil else 0
    def forward(self, image: Tensor,
                target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if isinstance(image, torch.Tensor):
            if image.ndimension() not in {2, 3}:
                raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
            elif image.ndimension() == 2:
                image = image.unsqueeze(0)
        if torch.rand(1) < self.p:
            return image, target
        orig_w, orig_h = F._get_image_size(image)
        r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
        canvas_width = int(orig_w * r)
        canvas_height = int(orig_h * r)
        r = torch.rand(2)
        left = int((canvas_width - orig_w) * r[0])
        top = int((canvas_height - orig_h) * r[1])
        right = canvas_width - (left + orig_w)
        bottom = canvas_height - (top + orig_h)
        if torch.jit.is_scripting():
            fill = 0
        else:
            fill = self._get_fill_value(F._is_pil_image(image))
        image = F.pad(image, [left, top, right, bottom], fill=fill)
        if isinstance(image, torch.Tensor):
            v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
            image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \
                image[..., :, (left + orig_w):] = v
        if target is not None:
            target["boxes"][:, 0::2] += left
            target["boxes"][:, 1::2] += top
        return image, target
 class RandomPhotometricDistort(nn.Module):
    def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5),
                 hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5):
        super().__init__()
        self._brightness = T.ColorJitter(brightness=brightness)
        self._contrast = T.ColorJitter(contrast=contrast)
        self._hue = T.ColorJitter(hue=hue)
        self._saturation = T.ColorJitter(saturation=saturation)
        self.p = p
    def forward(self, image: Tensor,
                target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if isinstance(image, torch.Tensor):
            if image.ndimension() not in {2, 3}:
                raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
            elif image.ndimension() == 2:
                image = image.unsqueeze(0)
        r = torch.rand(7)
        if r[0] < self.p:
            image = self._brightness(image)
        contrast_before = r[1] < 0.5
        if contrast_before:
            if r[2] < self.p:
                image = self._contrast(image)
        if r[3] < self.p:
            image = self._saturation(image)
        if r[4] < self.p:
            image = self._hue(image)
        if not contrast_before:
            if r[5] < self.p:
                image = self._contrast(image)
        if r[6] < self.p:
            channels = F._get_image_num_channels(image)
            permutation = torch.randperm(channels)
            is_pil = F._is_pil_image(image)
            if is_pil:
                image = F.to_tensor(image)
            image = image[..., permutation, :, :]
            if is_pil:
                image = F.to_pil_image(image)
        return image, target
--- a/old_files/utils.py
+++ b/old_files/utils.py
@@ -0,0 +1,295 @@
 from collections import defaultdict, deque
 import datetime
 import errno
 import os
 import time
 import torch
 import torch.distributed as dist
 class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt
    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n
    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]
    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()
    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()
    @property
    def global_avg(self):
        return self.total / self.count
    @property
    def max(self):
        return max(self.deque)
    @property
    def value(self):
        return self.deque[-1]
    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)
 def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]
    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)
    return data_list
 def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict
 class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter
    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)
    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))
    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)
    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()
    def add_meter(self, name, meter):
        self.meters[name] = meter
    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))
 def collate_fn(batch):
    return tuple(zip(*batch))
 def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha
    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
 def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
 def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print
    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)
    __builtin__.print = print
 def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True
 def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()
 def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()
 def is_main_process():
    return get_rank() == 0
 def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)
 def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return
    args.distributed = True
    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)
--- a/train.py
+++ b/train.py
@@ -1,233 +0,0 @@
 r"""PyTorch Detection Training.
 To run in a multi-gpu environment, use the distributed launcher::
    python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
        train.py ... --world-size $NGPU
 The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
    --lr 0.02 --batch-size 2 --world-size 8
 If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
 On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
    --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
 Also, if you train Keypoint R-CNN, the default hyperparameters are
    --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
 Because the number of images is smaller in the person keypoint subset of COCO,
 the number of epochs should be adapted so that we have the same number of iterations.
 """
 import datetime
 import os
 import time
 import torch
 import torch.utils.data
 import torchvision
 import torchvision.models.detection
 import torchvision.models.detection.mask_rcnn
 from coco_utils import get_coco, get_coco_kp
 from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
 from engine import train_one_epoch, evaluate
 import presets
 import utils
 def get_dataset(name, image_set, transform, data_path):
    paths = {
        "coco": (data_path, get_coco, 91),
        "coco_kp": (data_path, get_coco_kp, 2)
    }
    p, ds_fn, num_classes = paths[name]
    ds = ds_fn(p, image_set=image_set, transforms=transform)
    return ds, num_classes
 def get_transform(train, data_augmentation):
    return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
 def get_args_parser(add_help=True):
    import argparse
    parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
    parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
    parser.add_argument('--dataset', default='coco', help='dataset')
    parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
    parser.add_argument('--device', default='cuda', help='device')
    parser.add_argument('-b', '--batch-size', default=2, type=int,
                        help='images per gpu, the total batch size is $NGPU x batch_size')
    parser.add_argument('--epochs', default=26, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--lr', default=0.02, type=float,
                        help='initial learning rate, 0.02 is the default value for training '
                             'on 8 gpus and 2 images_per_gpu')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
    parser.add_argument('--lr-step-size', default=8, type=int,
                        help='decrease lr every step-size epochs (multisteplr scheduler only)')
    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
                        help='decrease lr every step-size epochs (multisteplr scheduler only)')
    parser.add_argument('--lr-gamma', default=0.1, type=float,
                        help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    parser.add_argument('--output-dir', default='.', help='path where to save')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
    parser.add_argument('--trainable-backbone-layers', default=None, type=int,
                        help='number of trainable layers of backbone')
    parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
    parser.add_argument(
        "--sync-bn",
        dest="sync_bn",
        help="Use sync batch norm",
        action="store_true",
    )
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )
    parser.add_argument(
        "--pretrained",
        dest="pretrained",
        help="Use pre-trained models from the modelzoo",
        action="store_true",
    )
    # distributed training parameters
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    return parser
 def main(args):
    if args.output_dir:
        utils.mkdir(args.output_dir)
    utils.init_distributed_mode(args)
    print(args)
    device = torch.device(args.device)
    # Data loading code
    print("Loading data")
    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
                                       args.data_path)
    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)
    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)
    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)
    print("Creating model")
    kwargs = {
        "trainable_backbone_layers": args.trainable_backbone_layers
    }
    if "rcnn" in args.model:
        if args.rpn_score_thresh is not None:
            kwargs["rpn_score_thresh"] = args.rpn_score_thresh
    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
                                                              **kwargs)
    model.to(device)
    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    args.lr_scheduler = args.lr_scheduler.lower()
    if args.lr_scheduler == 'multisteplr':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
    elif args.lr_scheduler == 'cosineannealinglr':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
    else:
        raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
                           "are supported.".format(args.lr_scheduler))
    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return
    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
        lr_scheduler.step()
        if args.output_dir:
            checkpoint = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'checkpoint.pth'))
        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)
    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
 if __name__ == "__main__":
    args = get_args_parser().parse_args()
    main(args)
--- a/1
+++ b/1