commit 7f8a3eb6b0db5a1cc19157761f976f6f0347d51a Author: Ishan S. Patel Date: Thu Jul 1 12:41:54 2021 -0400 initial diff --git a/coco_eval.py b/coco_eval.py new file mode 100644 index 0000000..09648f2 --- /dev/null +++ b/coco_eval.py @@ -0,0 +1,352 @@ +import json +import tempfile + +import numpy as np +import copy +import time +import torch +import torch._six + +from pycocotools.cocoeval import COCOeval +from pycocotools.coco import COCO +import pycocotools.mask as mask_util + +from collections import defaultdict + +import utils + + +class CocoEvaluator(object): + def __init__(self, coco_gt, iou_types): + assert isinstance(iou_types, (list, tuple)) + coco_gt = copy.deepcopy(coco_gt) + self.coco_gt = coco_gt + + self.iou_types = iou_types + self.coco_eval = {} + for iou_type in iou_types: + self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) + + self.img_ids = [] + self.eval_imgs = {k: [] for k in iou_types} + + def update(self, predictions): + img_ids = list(np.unique(list(predictions.keys()))) + self.img_ids.extend(img_ids) + + for iou_type in self.iou_types: + results = self.prepare(predictions, iou_type) + coco_dt = loadRes(self.coco_gt, results) if results else COCO() + coco_eval = self.coco_eval[iou_type] + + coco_eval.cocoDt = coco_dt + coco_eval.params.imgIds = list(img_ids) + img_ids, eval_imgs = evaluate(coco_eval) + + self.eval_imgs[iou_type].append(eval_imgs) + + def synchronize_between_processes(self): + for iou_type in self.iou_types: + self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) + create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) + + def accumulate(self): + for coco_eval in self.coco_eval.values(): + coco_eval.accumulate() + + def summarize(self): + for iou_type, coco_eval in self.coco_eval.items(): + print("IoU metric: {}".format(iou_type)) + coco_eval.summarize() + + def prepare(self, predictions, iou_type): + if iou_type == "bbox": + return self.prepare_for_coco_detection(predictions) + elif iou_type == "segm": + return self.prepare_for_coco_segmentation(predictions) + elif iou_type == "keypoints": + return self.prepare_for_coco_keypoint(predictions) + else: + raise ValueError("Unknown iou type {}".format(iou_type)) + + def prepare_for_coco_detection(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "bbox": box, + "score": scores[k], + } + for k, box in enumerate(boxes) + ] + ) + return coco_results + + def prepare_for_coco_segmentation(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + scores = prediction["scores"] + labels = prediction["labels"] + masks = prediction["masks"] + + masks = masks > 0.5 + + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] + for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "segmentation": rle, + "score": scores[k], + } + for k, rle in enumerate(rles) + ] + ) + return coco_results + + def prepare_for_coco_keypoint(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + keypoints = prediction["keypoints"] + keypoints = keypoints.flatten(start_dim=1).tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + 'keypoints': keypoint, + "score": scores[k], + } + for k, keypoint in enumerate(keypoints) + ] + ) + return coco_results + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + + +def merge(img_ids, eval_imgs): + all_img_ids = utils.all_gather(img_ids) + all_eval_imgs = utils.all_gather(eval_imgs) + + merged_img_ids = [] + for p in all_img_ids: + merged_img_ids.extend(p) + + merged_eval_imgs = [] + for p in all_eval_imgs: + merged_eval_imgs.append(p) + + merged_img_ids = np.array(merged_img_ids) + merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) + + # keep only unique (and in sorted order) images + merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) + merged_eval_imgs = merged_eval_imgs[..., idx] + + return merged_img_ids, merged_eval_imgs + + +def create_common_coco_eval(coco_eval, img_ids, eval_imgs): + img_ids, eval_imgs = merge(img_ids, eval_imgs) + img_ids = list(img_ids) + eval_imgs = list(eval_imgs.flatten()) + + coco_eval.evalImgs = eval_imgs + coco_eval.params.imgIds = img_ids + coco_eval._paramsEval = copy.deepcopy(coco_eval.params) + + +################################################################# +# From pycocotools, just removed the prints and fixed +# a Python3 bug about unicode not defined +################################################################# + +# Ideally, pycocotools wouldn't have hard-coded prints +# so that we could avoid copy-pasting those two functions + +def createIndex(self): + # create index + # print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns, catToImgs = defaultdict(list), defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + # print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + +maskUtils = mask_util + + +def loadRes(self, resFile): + """ + Load result file and return a result api object. + Args: + self (obj): coco object with ground truth annotations + resFile (str): file name of result file + Returns: + res (obj): result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + # print('Loading and preparing results...') + # tic = time.time() + if isinstance(resFile, torch._six.string_classes): + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id + 1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + if 'segmentation' not in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2] * bb[3] + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if 'bbox' not in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id + 1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x2 - x1) * (y2 - y1) + ann['id'] = id + 1 + ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] + # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + createIndex(res) + return res + + +def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + # tic = time.time() + # print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + # print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + evalImgs = [ + evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + # this is NOT in the pycocotools code, but could be done outside + evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) + self._paramsEval = copy.deepcopy(self.params) + # toc = time.time() + # print('DONE (t={:0.2f}s).'.format(toc-tic)) + return p.imgIds, evalImgs + +################################################################# +# end of straight copy from pycocotools, just removing the prints +################################################################# diff --git a/coco_utils.py b/coco_utils.py new file mode 100644 index 0000000..26701a2 --- /dev/null +++ b/coco_utils.py @@ -0,0 +1,252 @@ +import copy +import os +from PIL import Image + +import torch +import torch.utils.data +import torchvision + +from pycocotools import mask as coco_mask +from pycocotools.coco import COCO + +import transforms as T + + +class FilterAndRemapCocoCategories(object): + def __init__(self, categories, remap=True): + self.categories = categories + self.remap = remap + + def __call__(self, image, target): + anno = target["annotations"] + anno = [obj for obj in anno if obj["category_id"] in self.categories] + if not self.remap: + target["annotations"] = anno + return image, target + anno = copy.deepcopy(anno) + for obj in anno: + obj["category_id"] = self.categories.index(obj["category_id"]) + target["annotations"] = anno + return image, target + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +class ConvertCocoPolysToMask(object): + def __call__(self, image, target): + w, h = image.size + + image_id = target["image_id"] + image_id = torch.tensor([image_id]) + + anno = target["annotations"] + + anno = [obj for obj in anno if obj['iscrowd'] == 0] + + boxes = [obj["bbox"] for obj in anno] + # guard against no boxes via resizing + boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) + boxes[:, 2:] += boxes[:, :2] + boxes[:, 0::2].clamp_(min=0, max=w) + boxes[:, 1::2].clamp_(min=0, max=h) + + classes = [obj["category_id"] for obj in anno] + classes = torch.tensor(classes, dtype=torch.int64) + + segmentations = [obj["segmentation"] for obj in anno] + masks = convert_coco_poly_to_mask(segmentations, h, w) + + keypoints = None + if anno and "keypoints" in anno[0]: + keypoints = [obj["keypoints"] for obj in anno] + keypoints = torch.as_tensor(keypoints, dtype=torch.float32) + num_keypoints = keypoints.shape[0] + if num_keypoints: + keypoints = keypoints.view(num_keypoints, -1, 3) + + keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + + target = {} + target["boxes"] = boxes + target["labels"] = classes + target["masks"] = masks + target["image_id"] = image_id + if keypoints is not None: + target["keypoints"] = keypoints + + # for conversion to coco api + area = torch.tensor([obj["area"] for obj in anno]) + iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) + target["area"] = area + target["iscrowd"] = iscrowd + + return image, target + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_only_empty_bbox(anno): + return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) + + def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + min_keypoints_per_image = 10 + + def _has_valid_annotation(anno): + # if it's empty, there is no annotation + if len(anno) == 0: + return False + # if all boxes have close to zero area, there is no annotation + if _has_only_empty_bbox(anno): + return False + # keypoints task have a slight different critera for considering + # if an annotation is valid + if "keypoints" not in anno[0]: + return True + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) >= min_keypoints_per_image: + return True + return False + + assert isinstance(dataset, torchvision.datasets.CocoDetection) + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset + + +def convert_to_coco_api(ds): + coco_ds = COCO() + # annotation IDs need to start at 1, not 0, see torchvision issue #1530 + ann_id = 1 + dataset = {'images': [], 'categories': [], 'annotations': []} + categories = set() + for img_idx in range(len(ds)): + # find better way to get target + # targets = ds.get_annotations(img_idx) + img, targets = ds[img_idx] + image_id = targets["image_id"].item() + img_dict = {} + img_dict['id'] = image_id + img_dict['height'] = img.shape[-2] + img_dict['width'] = img.shape[-1] + dataset['images'].append(img_dict) + bboxes = targets["boxes"] + bboxes[:, 2:] -= bboxes[:, :2] + bboxes = bboxes.tolist() + labels = targets['labels'].tolist() + areas = targets['area'].tolist() + iscrowd = targets['iscrowd'].tolist() + if 'masks' in targets: + masks = targets['masks'] + # make masks Fortran contiguous for coco_mask + masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) + if 'keypoints' in targets: + keypoints = targets['keypoints'] + keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() + num_objs = len(bboxes) + for i in range(num_objs): + ann = {} + ann['image_id'] = image_id + ann['bbox'] = bboxes[i] + ann['category_id'] = labels[i] + categories.add(labels[i]) + ann['area'] = areas[i] + ann['iscrowd'] = iscrowd[i] + ann['id'] = ann_id + if 'masks' in targets: + ann["segmentation"] = coco_mask.encode(masks[i].numpy()) + if 'keypoints' in targets: + ann['keypoints'] = keypoints[i] + ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) + dataset['annotations'].append(ann) + ann_id += 1 + dataset['categories'] = [{'id': i} for i in sorted(categories)] + coco_ds.dataset = dataset + coco_ds.createIndex() + return coco_ds + + +def get_coco_api_from_dataset(dataset): + for _ in range(10): + if isinstance(dataset, torchvision.datasets.CocoDetection): + break + if isinstance(dataset, torch.utils.data.Subset): + dataset = dataset.dataset + if isinstance(dataset, torchvision.datasets.CocoDetection): + return dataset.coco + return convert_to_coco_api(dataset) + + +class CocoDetection(torchvision.datasets.CocoDetection): + def __init__(self, img_folder, ann_file, transforms): + super(CocoDetection, self).__init__(img_folder, ann_file) + self._transforms = transforms + + def __getitem__(self, idx): + img, target = super(CocoDetection, self).__getitem__(idx) + image_id = self.ids[idx] + target = dict(image_id=image_id, annotations=target) + if self._transforms is not None: + img, target = self._transforms(img, target) + return img, target + + +def get_coco(root, image_set, transforms, mode='instances'): + anno_file_template = "{}_{}2017.json" + PATHS = { + "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), + "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), + # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) + } + + t = [ConvertCocoPolysToMask()] + + if transforms is not None: + t.append(transforms) + transforms = T.Compose(t) + + img_folder, ann_file = PATHS[image_set] + img_folder = os.path.join(root, img_folder) + ann_file = os.path.join(root, ann_file) + + dataset = CocoDetection(img_folder, ann_file, transforms=transforms) + + if image_set == "train": + dataset = _coco_remove_images_without_annotations(dataset) + + # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) + + return dataset + + +def get_coco_kp(root, image_set, transforms): + return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/data.py b/data.py new file mode 100644 index 0000000..bfcb7e1 --- /dev/null +++ b/data.py @@ -0,0 +1,211 @@ +# %% +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import cv2 +import numpy as np +import os +import sys + + + + +sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") +sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") +from references.detection import utils, engine +import data_aug +import bbox_util + + + +def get_transform(train): + transforms = [] + transforms.append(T.ToTensor()) + if train: + transforms.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(transforms) + + +IMAGE_MEAN = [0.485, 0.456, 0.406] +IMAGE_STD = [0.229, 0.224, 0.225] +PATH_ROOT = r"D:\ishan\ml\inaturalist\\" + +device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + +def create_map(list_in, from_key, to_key): + cmap = dict() + for l in list_in: + cmap[l[from_key]] = l[to_key] + return cmap + + +class iNaturalistDataset(torch.utils.data.Dataset): + def __init__(self, validation=False, train=False): + + self.validation = validation + self.train = train + + self.transforms = T.Compose([T.Resize(600, max_size=1024), T.ToTensor()]) + + if validation: + json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json") + elif train: + json_path = os.path.join( + PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" + ) + + with open(json_path, "r") as rj: + f = json.load(rj) + + categories = list() + image_info = dict() + + for category in f["categories"]: + if category["supercategory"] == "Aves": + if category['name'] in ['Archilochus colubris','Icterus galbula']: + print(category['name']) + categories.append(category) + + categories = sorted(categories, key=lambda k: k["name"]) + for idx, cat in enumerate(categories): + cat["new_id"] = idx + 1 + + orig_to_new_id = create_map(categories, "id", "new_id") + + for annot in f["annotations"]: + if annot["category_id"] in orig_to_new_id: + annot["new_category_id"] = orig_to_new_id[annot["category_id"]] + id = annot["image_id"] + if id not in image_info: + image_info[id] = dict() + + annot["bbox"][2] += annot["bbox"][0] + annot["bbox"][3] += annot["bbox"][1] + image_info[id]["annotation"] = annot + + for img in f["images"]: + id = img["id"] + path = os.path.join(PATH_ROOT, img["file_name"]) + height = img["height"] + width = img["width"] + if id in image_info: + image_info[id].update({"path": path, "height": height, "width": width}) + + for idx, (id, im_in) in enumerate(image_info.items()): + im_in["idx"] = idx + self.images = image_info + self.categories = categories + self.idx_to_id = [x for x in self.images] + self.num_classes = len(self.categories) + 1 + self.num_samples = len(self.images) + self.transforms = [ + data_aug.RandomHorizontalFlip(0.5), + data_aug.Resize(600), + ] + self.pre_transform = T.Compose([T.ToTensor()])#],T.Normalize(mean=[0.485, 0.456, 0.406], + #std=[0.229, 0.224, 0.225])]) + + def __len__(self): + return self.num_samples + + def transform(self, img, bbox): + + for x in self.transforms: + img, bbox = x(img, bbox) + img = self.pre_transform(img) + return img, bbox + + def __getitem__(self, idx): + idd = self.idx_to_id[idx] + c_image = self.images[idd] + # print(c_image, idx, self.validation, self.train) + # breakpoint() + image = np.asarray(cv2.imread(c_image["path"])[:,:,::-1].copy(),dtype=np.float32) + annot = c_image["annotation"] + bbox = annot["bbox"] + bbox.append(annot["new_category_id"]) + bbox = np.asarray([bbox], dtype=np.float32) + + image, bbox = self.transform(image.copy(), bbox.copy()) + boxes = torch.as_tensor(bbox[:,:4], dtype=torch.float32) + target = dict() + target["boxes"] = boxes + target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64) + target['image_id'] = torch.tensor([annot['image_id']]) + target['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) + target['iscrowd'] = torch.zeros((1,), dtype=torch.int64) + + return image, target + +# v = iNaturalistDataset(validation= True) +# o = v[10] +# %% +# oimage = t.tensor(o[0]*255, dtype=t.uint8) +# import matplotlib.pyplot as plt +# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1) +# plt.imshow(ox.permute([1,2,0])) +# plt.savefig('crap2.png') + +# %% +def run(): + val_dataset = iNaturalistDataset(validation=True) + train_dataset = iNaturalistDataset(train=True) + + + train_data_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn + ) + val_data_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=utils.collate_fn + ) + model = torchvision.models.detection.fasterrcnn_resnet50_fpn( + pretrained=True, num_classes=train_dataset.num_classes, progress=True + ) + + model.to(device) + + # construct an optimizer + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=0.005, + momentum=0.9, weight_decay=0.0005) + # and a learning rate scheduler + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, + step_size=3, + gamma=0.1) + + # let's train it for 10 epochs + num_epochs = 10 + + for epoch in range(num_epochs): + print(epoch) + torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth') + # train for one epoch, printing every 10 iterations + engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10) + torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth') + # update the learning rate + lr_scheduler.step() + torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth') + # evaluate on the test dataset + engine.evaluate(model, val_data_loader, device=device) + + + +if __name__ == "__main__": + run() + + + +# # %% +# json_path = os.path.join( +# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json" +# ) +# with open(json_path, "r") as rj: +# f = json.load(rj) + + +# # %% +# image_id: 2358 + diff --git a/engine.py b/engine.py new file mode 100644 index 0000000..49992af --- /dev/null +++ b/engine.py @@ -0,0 +1,110 @@ +import math +import sys +import time +import torch + +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco_api_from_dataset +from coco_eval import CocoEvaluator +import utils + + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + + lr_scheduler = None + if epoch == 0: + warmup_factor = 1. / 1000 + warmup_iters = min(1000, len(data_loader) - 1) + + lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) + + for images, targets in metric_logger.log_every(data_loader, print_freq, header): + images = list(image.to(device) for image in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + loss_dict = model(images, targets) + + losses = sum(loss for loss in loss_dict.values()) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + + loss_value = losses_reduced.item() + + if not math.isfinite(loss_value): + print("Loss is {}, stopping training".format(loss_value)) + print(loss_dict_reduced) + sys.exit(1) + + optimizer.zero_grad() + losses.backward() + optimizer.step() + + if lr_scheduler is not None: + lr_scheduler.step() + + metric_logger.update(loss=losses_reduced, **loss_dict_reduced) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + + return metric_logger + + +def _get_iou_types(model): + model_without_ddp = model + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model_without_ddp = model.module + iou_types = ["bbox"] + if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): + iou_types.append("segm") + if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): + iou_types.append("keypoints") + return iou_types + + +@torch.no_grad() +def evaluate(model, data_loader, device): + n_threads = torch.get_num_threads() + # FIXME remove this and make paste_masks_in_image run on the GPU + torch.set_num_threads(1) + cpu_device = torch.device("cpu") + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + coco = get_coco_api_from_dataset(data_loader.dataset) + iou_types = _get_iou_types(model) + coco_evaluator = CocoEvaluator(coco, iou_types) + + for images, targets in metric_logger.log_every(data_loader, 100, header): + images = list(img.to(device) for img in images) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + model_time = time.time() + outputs = model(images) + + outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] + model_time = time.time() - model_time + + res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} + evaluator_time = time.time() + coco_evaluator.update(res) + evaluator_time = time.time() - evaluator_time + metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + coco_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + coco_evaluator.accumulate() + coco_evaluator.summarize() + torch.set_num_threads(n_threads) + return coco_evaluator diff --git a/group_by_aspect_ratio.py b/group_by_aspect_ratio.py new file mode 100644 index 0000000..1b76f4c --- /dev/null +++ b/group_by_aspect_ratio.py @@ -0,0 +1,195 @@ +import bisect +from collections import defaultdict +import copy +from itertools import repeat, chain +import math +import numpy as np + +import torch +import torch.utils.data +from torch.utils.data.sampler import BatchSampler, Sampler +from torch.utils.model_zoo import tqdm +import torchvision + +from PIL import Image + + +def _repeat_to_at_least(iterable, n): + repeat_times = math.ceil(n / len(iterable)) + repeated = chain.from_iterable(repeat(iterable, repeat_times)) + return list(repeated) + + +class GroupedBatchSampler(BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + It enforces that the batch only contain elements from the same group. + It also tries to provide mini-batches which follows an ordering which is + as close as possible to the ordering from the original sampler. + Args: + sampler (Sampler): Base sampler. + group_ids (list[int]): If the sampler produces indices in range [0, N), + `group_ids` must be a list of `N` ints which contains the group id of each sample. + The group ids must be a continuous set of integers starting from + 0, i.e. they must be in the range [0, num_groups). + batch_size (int): Size of mini-batch. + """ + def __init__(self, sampler, group_ids, batch_size): + if not isinstance(sampler, Sampler): + raise ValueError( + "sampler should be an instance of " + "torch.utils.data.Sampler, but got sampler={}".format(sampler) + ) + self.sampler = sampler + self.group_ids = group_ids + self.batch_size = batch_size + + def __iter__(self): + buffer_per_group = defaultdict(list) + samples_per_group = defaultdict(list) + + num_batches = 0 + for idx in self.sampler: + group_id = self.group_ids[idx] + buffer_per_group[group_id].append(idx) + samples_per_group[group_id].append(idx) + if len(buffer_per_group[group_id]) == self.batch_size: + yield buffer_per_group[group_id] + num_batches += 1 + del buffer_per_group[group_id] + assert len(buffer_per_group[group_id]) < self.batch_size + + # now we have run out of elements that satisfy + # the group criteria, let's return the remaining + # elements so that the size of the sampler is + # deterministic + expected_num_batches = len(self) + num_remaining = expected_num_batches - num_batches + if num_remaining > 0: + # for the remaining batches, take first the buffers with largest number + # of elements + for group_id, _ in sorted(buffer_per_group.items(), + key=lambda x: len(x[1]), reverse=True): + remaining = self.batch_size - len(buffer_per_group[group_id]) + samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) + buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) + assert len(buffer_per_group[group_id]) == self.batch_size + yield buffer_per_group[group_id] + num_remaining -= 1 + if num_remaining == 0: + break + assert num_remaining == 0 + + def __len__(self): + return len(self.sampler) // self.batch_size + + +def _compute_aspect_ratios_slow(dataset, indices=None): + print("Your dataset doesn't support the fast path for " + "computing the aspect ratios, so will iterate over " + "the full dataset and load every image instead. " + "This might take some time...") + if indices is None: + indices = range(len(dataset)) + + class SubsetSampler(Sampler): + def __init__(self, indices): + self.indices = indices + + def __iter__(self): + return iter(self.indices) + + def __len__(self): + return len(self.indices) + + sampler = SubsetSampler(indices) + data_loader = torch.utils.data.DataLoader( + dataset, batch_size=1, sampler=sampler, + num_workers=14, # you might want to increase it for faster processing + collate_fn=lambda x: x[0]) + aspect_ratios = [] + with tqdm(total=len(dataset)) as pbar: + for _i, (img, _) in enumerate(data_loader): + pbar.update(1) + height, width = img.shape[-2:] + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_custom_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + height, width = dataset.get_height_and_width(i) + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_coco_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + img_info = dataset.coco.imgs[dataset.ids[i]] + aspect_ratio = float(img_info["width"]) / float(img_info["height"]) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_voc_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + # this doesn't load the data into memory, because PIL loads it lazily + width, height = Image.open(dataset.images[i]).size + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_subset_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + + ds_indices = [dataset.indices[i] for i in indices] + return compute_aspect_ratios(dataset.dataset, ds_indices) + + +def compute_aspect_ratios(dataset, indices=None): + if hasattr(dataset, "get_height_and_width"): + return _compute_aspect_ratios_custom_dataset(dataset, indices) + + if isinstance(dataset, torchvision.datasets.CocoDetection): + return _compute_aspect_ratios_coco_dataset(dataset, indices) + + if isinstance(dataset, torchvision.datasets.VOCDetection): + return _compute_aspect_ratios_voc_dataset(dataset, indices) + + if isinstance(dataset, torch.utils.data.Subset): + return _compute_aspect_ratios_subset_dataset(dataset, indices) + + # slow path + return _compute_aspect_ratios_slow(dataset, indices) + + +def _quantize(x, bins): + bins = copy.deepcopy(bins) + bins = sorted(bins) + quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) + return quantized + + +def create_aspect_ratio_groups(dataset, k=0): + aspect_ratios = compute_aspect_ratios(dataset) + bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] + groups = _quantize(aspect_ratios, bins) + # count number of elements per group + counts = np.unique(groups, return_counts=True)[1] + fbins = [0] + bins + [np.inf] + print("Using {} as bins for aspect ratio quantization".format(fbins)) + print("Count of instances per bin: {}".format(counts)) + return groups diff --git a/presets.py b/presets.py new file mode 100644 index 0000000..1fac69a --- /dev/null +++ b/presets.py @@ -0,0 +1,37 @@ +import transforms as T + + +class DetectionPresetTrain: + def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)): + if data_augmentation == 'hflip': + self.transforms = T.Compose([ + T.RandomHorizontalFlip(p=hflip_prob), + T.ToTensor(), + ]) + elif data_augmentation == 'ssd': + self.transforms = T.Compose([ + T.RandomPhotometricDistort(), + T.RandomZoomOut(fill=list(mean)), + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + T.ToTensor(), + ]) + elif data_augmentation == 'ssdlite': + self.transforms = T.Compose([ + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + T.ToTensor(), + ]) + else: + raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') + + def __call__(self, img, target): + return self.transforms(img, target) + + +class DetectionPresetEval: + def __init__(self): + self.transforms = T.ToTensor() + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/test.py b/test.py new file mode 100644 index 0000000..fce8894 --- /dev/null +++ b/test.py @@ -0,0 +1,50 @@ +# %% +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import cv2 +import numpy as np +import os +import sys + + +sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug") +sys.path.append(r"K:\Designs\ML\inaturalist_models\vision") + + +model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 3) + +model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_1.pth')) +model.eval() +model.to('cuda') + +#img = r'D:\ishan\ml\inaturalist\test2017\00a903fa1d23b2f8f28248e81bc1c4a4.jpg' +#img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210617093423.mp4_023.jpg' +img = r'J:\hummingbird_imagenet\hummingbird\Hummingbird_01_20210609095848.mp4_133.jpg' +import random +rtdir = r'J:\hummingbird_imagenet\hummingbird' + +ff = os.listdir(rtdir) +img = os.path.join(rtdir,random.choice(ff)) +image = cv2.imread(img)[:,:,::-1].copy() +o = T.ToTensor()(image).cuda() +img = o[None, :, :, :] + +ou = model(img) +from torchvision.utils import draw_bounding_boxes +import torch as t +oimage = t.tensor(image, dtype=t.uint8).permute([2,0,1]) +import matplotlib.pyplot as plt +ox = draw_bounding_boxes(oimage, ou[0]['boxes'], width=1) +plt.imshow(ox.permute([1,2,0])) +# %% +from data import iNaturalistDataset +sd = iNaturalistDataset(validation=True) +# # %% + + + +# %% diff --git a/train.py b/train.py new file mode 100644 index 0000000..cd4148e --- /dev/null +++ b/train.py @@ -0,0 +1,233 @@ +r"""PyTorch Detection Training. + +To run in a multi-gpu environment, use the distributed launcher:: + + python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ + train.py ... --world-size $NGPU + +The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. + --lr 0.02 --batch-size 2 --world-size 8 +If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. + +On top of that, for training Faster/Mask R-CNN, the default hyperparameters are + --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 + +Also, if you train Keypoint R-CNN, the default hyperparameters are + --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 +Because the number of images is smaller in the person keypoint subset of COCO, +the number of epochs should be adapted so that we have the same number of iterations. +""" +import datetime +import os +import time + +import torch +import torch.utils.data +import torchvision +import torchvision.models.detection +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco, get_coco_kp + +from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups +from engine import train_one_epoch, evaluate + +import presets +import utils + + +def get_dataset(name, image_set, transform, data_path): + paths = { + "coco": (data_path, get_coco, 91), + "coco_kp": (data_path, get_coco_kp, 2) + } + p, ds_fn, num_classes = paths[name] + + ds = ds_fn(p, image_set=image_set, transforms=transform) + return ds, num_classes + + +def get_transform(train, data_augmentation): + return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval() + + +def get_args_parser(add_help=True): + import argparse + parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help) + + parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset') + parser.add_argument('--dataset', default='coco', help='dataset') + parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model') + parser.add_argument('--device', default='cuda', help='device') + parser.add_argument('-b', '--batch-size', default=2, type=int, + help='images per gpu, the total batch size is $NGPU x batch_size') + parser.add_argument('--epochs', default=26, type=int, metavar='N', + help='number of total epochs to run') + parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') + parser.add_argument('--lr', default=0.02, type=float, + help='initial learning rate, 0.02 is the default value for training ' + 'on 8 gpus and 2 images_per_gpu') + parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') + parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') + parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)') + parser.add_argument('--lr-step-size', default=8, type=int, + help='decrease lr every step-size epochs (multisteplr scheduler only)') + parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, + help='decrease lr every step-size epochs (multisteplr scheduler only)') + parser.add_argument('--lr-gamma', default=0.1, type=float, + help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)') + parser.add_argument('--print-freq', default=20, type=int, help='print frequency') + parser.add_argument('--output-dir', default='.', help='path where to save') + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') + parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) + parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn') + parser.add_argument('--trainable-backbone-layers', default=None, type=int, + help='number of trainable layers of backbone') + parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)') + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--pretrained", + dest="pretrained", + help="Use pre-trained models from the modelzoo", + action="store_true", + ) + + # distributed training parameters + parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + + return parser + + +def main(args): + if args.output_dir: + utils.mkdir(args.output_dir) + + utils.init_distributed_mode(args) + print(args) + + device = torch.device(args.device) + + # Data loading code + print("Loading data") + + dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation), + args.data_path) + dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path) + + print("Creating data loaders") + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + else: + train_sampler = torch.utils.data.RandomSampler(dataset) + test_sampler = torch.utils.data.SequentialSampler(dataset_test) + + if args.aspect_ratio_group_factor >= 0: + group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) + train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) + else: + train_batch_sampler = torch.utils.data.BatchSampler( + train_sampler, args.batch_size, drop_last=True) + + data_loader = torch.utils.data.DataLoader( + dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, + collate_fn=utils.collate_fn) + + data_loader_test = torch.utils.data.DataLoader( + dataset_test, batch_size=1, + sampler=test_sampler, num_workers=args.workers, + collate_fn=utils.collate_fn) + + print("Creating model") + kwargs = { + "trainable_backbone_layers": args.trainable_backbone_layers + } + if "rcnn" in args.model: + if args.rpn_score_thresh is not None: + kwargs["rpn_score_thresh"] = args.rpn_score_thresh + model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained, + **kwargs) + model.to(device) + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model_without_ddp = model + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + model_without_ddp = model.module + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD( + params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + args.lr_scheduler = args.lr_scheduler.lower() + if args.lr_scheduler == 'multisteplr': + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) + elif args.lr_scheduler == 'cosineannealinglr': + lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + else: + raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR " + "are supported.".format(args.lr_scheduler)) + + if args.resume: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + + if args.test_only: + evaluate(model, data_loader_test, device=device) + return + + print("Start training") + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + lr_scheduler.step() + if args.output_dir: + checkpoint = { + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'args': args, + 'epoch': epoch + } + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'checkpoint.pth')) + + # evaluate after every epoch + evaluate(model, data_loader_test, device=device) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/transforms.py b/transforms.py new file mode 100644 index 0000000..8e4b887 --- /dev/null +++ b/transforms.py @@ -0,0 +1,239 @@ +import torch +import torchvision + +from torch import nn, Tensor +from torchvision.transforms import functional as F +from torchvision.transforms import transforms as T +from typing import List, Tuple, Dict, Optional + + +def _flip_coco_person_keypoints(kps, width): + flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + flipped_data = kps[:, flip_inds] + flipped_data[..., 0] = width - flipped_data[..., 0] + # Maintain COCO convention that if visibility == 0, then x, y = 0 + inds = flipped_data[..., 2] == 0 + flipped_data[inds] = 0 + return flipped_data + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + return image, target + + +class RandomHorizontalFlip(T.RandomHorizontalFlip): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torch.rand(1) < self.p: + image = F.hflip(image) + if target is not None: + width, _ = F._get_image_size(image) + target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] + if "masks" in target: + target["masks"] = target["masks"].flip(-1) + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = _flip_coco_person_keypoints(keypoints, width) + target["keypoints"] = keypoints + return image, target + + +class ToTensor(nn.Module): + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.to_tensor(image) + return image, target + + +class RandomIoUCrop(nn.Module): + def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40): + super().__init__() + # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + if sampler_options is None: + sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] + self.options = sampler_options + self.trials = trials + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if target is None: + raise ValueError("The targets can't be None for this transform.") + + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + orig_w, orig_h = F._get_image_size(image) + + while True: + # sample an option + idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) + min_jaccard_overlap = self.options[idx] + if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option + return image, target + + for _ in range(self.trials): + # check the aspect ratio limitations + r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) + new_w = int(orig_w * r[0]) + new_h = int(orig_h * r[1]) + aspect_ratio = new_w / new_h + if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): + continue + + # check for 0 area crops + r = torch.rand(2) + left = int((orig_w - new_w) * r[0]) + top = int((orig_h - new_h) * r[1]) + right = left + new_w + bottom = top + new_h + if left == right or top == bottom: + continue + + # check for any valid boxes with centers within the crop area + cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) + cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) + is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) + if not is_within_crop_area.any(): + continue + + # check at least 1 box with jaccard limitations + boxes = target["boxes"][is_within_crop_area] + ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]], + dtype=boxes.dtype, device=boxes.device)) + if ious.max() < min_jaccard_overlap: + continue + + # keep only valid boxes and perform cropping + target["boxes"] = boxes + target["labels"] = target["labels"][is_within_crop_area] + target["boxes"][:, 0::2] -= left + target["boxes"][:, 1::2] -= top + target["boxes"][:, 0::2].clamp_(min=0, max=new_w) + target["boxes"][:, 1::2].clamp_(min=0, max=new_h) + image = F.crop(image, top, left, new_h, new_w) + + return image, target + + +class RandomZoomOut(nn.Module): + def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5): + super().__init__() + if fill is None: + fill = [0., 0., 0.] + self.fill = fill + self.side_range = side_range + if side_range[0] < 1. or side_range[0] > side_range[1]: + raise ValueError("Invalid canvas side range provided {}.".format(side_range)) + self.p = p + + @torch.jit.unused + def _get_fill_value(self, is_pil): + # type: (bool) -> int + # We fake the type to make it work on JIT + return tuple(int(x) for x in self.fill) if is_pil else 0 + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + if torch.rand(1) < self.p: + return image, target + + orig_w, orig_h = F._get_image_size(image) + + r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) + canvas_width = int(orig_w * r) + canvas_height = int(orig_h * r) + + r = torch.rand(2) + left = int((canvas_width - orig_w) * r[0]) + top = int((canvas_height - orig_h) * r[1]) + right = canvas_width - (left + orig_w) + bottom = canvas_height - (top + orig_h) + + if torch.jit.is_scripting(): + fill = 0 + else: + fill = self._get_fill_value(F._is_pil_image(image)) + + image = F.pad(image, [left, top, right, bottom], fill=fill) + if isinstance(image, torch.Tensor): + v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) + image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \ + image[..., :, (left + orig_w):] = v + + if target is not None: + target["boxes"][:, 0::2] += left + target["boxes"][:, 1::2] += top + + return image, target + + +class RandomPhotometricDistort(nn.Module): + def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5), + hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5): + super().__init__() + self._brightness = T.ColorJitter(brightness=brightness) + self._contrast = T.ColorJitter(contrast=contrast) + self._hue = T.ColorJitter(hue=hue) + self._saturation = T.ColorJitter(saturation=saturation) + self.p = p + + def forward(self, image: Tensor, + target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + r = torch.rand(7) + + if r[0] < self.p: + image = self._brightness(image) + + contrast_before = r[1] < 0.5 + if contrast_before: + if r[2] < self.p: + image = self._contrast(image) + + if r[3] < self.p: + image = self._saturation(image) + + if r[4] < self.p: + image = self._hue(image) + + if not contrast_before: + if r[5] < self.p: + image = self._contrast(image) + + if r[6] < self.p: + channels = F._get_image_num_channels(image) + permutation = torch.randperm(channels) + + is_pil = F._is_pil_image(image) + if is_pil: + image = F.to_tensor(image) + image = image[..., permutation, :, :] + if is_pil: + image = F.to_pil_image(image) + + return image, target diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..3c52abb --- /dev/null +++ b/utils.py @@ -0,0 +1,295 @@ +from collections import defaultdict, deque +import datetime +import errno +import os +import time + +import torch +import torch.distributed as dist + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + data_list = [None] * world_size + dist.all_gather_object(data_list, data) + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.cuda.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {} ({:.4f} s / it)'.format( + header, total_time_str, total_time / len(iterable))) + + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): + + def f(x): + if x >= warmup_iters: + return 1 + alpha = float(x) / warmup_iters + return warmup_factor * (1 - alpha) + alpha + + return torch.optim.lr_scheduler.LambdaLR(optimizer, f) + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0)