yacwc
This commit is contained in:
82
old_files/README.md
Normal file
82
old_files/README.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# Object detection reference training scripts
|
||||
|
||||
This folder contains reference training scripts for object detection.
|
||||
They serve as a log of how to train specific models, to provide baseline
|
||||
training and evaluation scripts to quickly bootstrap research.
|
||||
|
||||
To execute the example commands below you must install the following:
|
||||
|
||||
```
|
||||
cython
|
||||
pycocotools
|
||||
matplotlib
|
||||
```
|
||||
|
||||
You must modify the following flags:
|
||||
|
||||
`--data-path=/path/to/coco/dataset`
|
||||
|
||||
`--nproc_per_node=<number_of_gpus_available>`
|
||||
|
||||
Except otherwise noted, all models have been trained on 8x V100 GPUs.
|
||||
|
||||
### Faster R-CNN ResNet-50 FPN
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
|
||||
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||
```
|
||||
|
||||
### Faster R-CNN MobileNetV3-Large FPN
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\
|
||||
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||
```
|
||||
|
||||
### Faster R-CNN MobileNetV3-Large 320 FPN
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\
|
||||
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||
```
|
||||
|
||||
### RetinaNet
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model retinanet_resnet50_fpn --epochs 26\
|
||||
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01
|
||||
```
|
||||
|
||||
### SSD300 VGG16
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model ssd300_vgg16 --epochs 120\
|
||||
--lr-steps 80 110 --aspect-ratio-group-factor 3 --lr 0.002 --batch-size 4\
|
||||
--weight-decay 0.0005 --data-augmentation ssd
|
||||
```
|
||||
|
||||
### SSDlite320 MobileNetV3-Large
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660\
|
||||
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
|
||||
--weight-decay 0.00004 --data-augmentation ssdlite
|
||||
```
|
||||
|
||||
|
||||
### Mask R-CNN
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
|
||||
--lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||
```
|
||||
|
||||
|
||||
### Keypoint R-CNN
|
||||
```
|
||||
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
|
||||
--dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
|
||||
--lr-steps 36 43 --aspect-ratio-group-factor 3
|
||||
```
|
||||
|
||||
352
old_files/coco_eval.py
Normal file
352
old_files/coco_eval.py
Normal file
@@ -0,0 +1,352 @@
|
||||
import json
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
import time
|
||||
import torch
|
||||
import torch._six
|
||||
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
from pycocotools.coco import COCO
|
||||
import pycocotools.mask as mask_util
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import utils
|
||||
|
||||
|
||||
class CocoEvaluator(object):
|
||||
def __init__(self, coco_gt, iou_types):
|
||||
assert isinstance(iou_types, (list, tuple))
|
||||
coco_gt = copy.deepcopy(coco_gt)
|
||||
self.coco_gt = coco_gt
|
||||
|
||||
self.iou_types = iou_types
|
||||
self.coco_eval = {}
|
||||
for iou_type in iou_types:
|
||||
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
|
||||
|
||||
self.img_ids = []
|
||||
self.eval_imgs = {k: [] for k in iou_types}
|
||||
|
||||
def update(self, predictions):
|
||||
img_ids = list(np.unique(list(predictions.keys())))
|
||||
self.img_ids.extend(img_ids)
|
||||
|
||||
for iou_type in self.iou_types:
|
||||
results = self.prepare(predictions, iou_type)
|
||||
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
|
||||
coco_eval = self.coco_eval[iou_type]
|
||||
|
||||
coco_eval.cocoDt = coco_dt
|
||||
coco_eval.params.imgIds = list(img_ids)
|
||||
img_ids, eval_imgs = evaluate(coco_eval)
|
||||
|
||||
self.eval_imgs[iou_type].append(eval_imgs)
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
for iou_type in self.iou_types:
|
||||
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
|
||||
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
|
||||
|
||||
def accumulate(self):
|
||||
for coco_eval in self.coco_eval.values():
|
||||
coco_eval.accumulate()
|
||||
|
||||
def summarize(self):
|
||||
for iou_type, coco_eval in self.coco_eval.items():
|
||||
print("IoU metric: {}".format(iou_type))
|
||||
coco_eval.summarize()
|
||||
|
||||
def prepare(self, predictions, iou_type):
|
||||
if iou_type == "bbox":
|
||||
return self.prepare_for_coco_detection(predictions)
|
||||
elif iou_type == "segm":
|
||||
return self.prepare_for_coco_segmentation(predictions)
|
||||
elif iou_type == "keypoints":
|
||||
return self.prepare_for_coco_keypoint(predictions)
|
||||
else:
|
||||
raise ValueError("Unknown iou type {}".format(iou_type))
|
||||
|
||||
def prepare_for_coco_detection(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
boxes = prediction["boxes"]
|
||||
boxes = convert_to_xywh(boxes).tolist()
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
"bbox": box,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, box in enumerate(boxes)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
def prepare_for_coco_segmentation(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
scores = prediction["scores"]
|
||||
labels = prediction["labels"]
|
||||
masks = prediction["masks"]
|
||||
|
||||
masks = masks > 0.5
|
||||
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
|
||||
rles = [
|
||||
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
|
||||
for mask in masks
|
||||
]
|
||||
for rle in rles:
|
||||
rle["counts"] = rle["counts"].decode("utf-8")
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
"segmentation": rle,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, rle in enumerate(rles)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
def prepare_for_coco_keypoint(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
boxes = prediction["boxes"]
|
||||
boxes = convert_to_xywh(boxes).tolist()
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
keypoints = prediction["keypoints"]
|
||||
keypoints = keypoints.flatten(start_dim=1).tolist()
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
'keypoints': keypoint,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, keypoint in enumerate(keypoints)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
|
||||
def convert_to_xywh(boxes):
|
||||
xmin, ymin, xmax, ymax = boxes.unbind(1)
|
||||
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
|
||||
|
||||
|
||||
def merge(img_ids, eval_imgs):
|
||||
all_img_ids = utils.all_gather(img_ids)
|
||||
all_eval_imgs = utils.all_gather(eval_imgs)
|
||||
|
||||
merged_img_ids = []
|
||||
for p in all_img_ids:
|
||||
merged_img_ids.extend(p)
|
||||
|
||||
merged_eval_imgs = []
|
||||
for p in all_eval_imgs:
|
||||
merged_eval_imgs.append(p)
|
||||
|
||||
merged_img_ids = np.array(merged_img_ids)
|
||||
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
|
||||
|
||||
# keep only unique (and in sorted order) images
|
||||
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
|
||||
merged_eval_imgs = merged_eval_imgs[..., idx]
|
||||
|
||||
return merged_img_ids, merged_eval_imgs
|
||||
|
||||
|
||||
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
|
||||
img_ids, eval_imgs = merge(img_ids, eval_imgs)
|
||||
img_ids = list(img_ids)
|
||||
eval_imgs = list(eval_imgs.flatten())
|
||||
|
||||
coco_eval.evalImgs = eval_imgs
|
||||
coco_eval.params.imgIds = img_ids
|
||||
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
|
||||
|
||||
|
||||
#################################################################
|
||||
# From pycocotools, just removed the prints and fixed
|
||||
# a Python3 bug about unicode not defined
|
||||
#################################################################
|
||||
|
||||
# Ideally, pycocotools wouldn't have hard-coded prints
|
||||
# so that we could avoid copy-pasting those two functions
|
||||
|
||||
def createIndex(self):
|
||||
# create index
|
||||
# print('creating index...')
|
||||
anns, cats, imgs = {}, {}, {}
|
||||
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
|
||||
if 'annotations' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
imgToAnns[ann['image_id']].append(ann)
|
||||
anns[ann['id']] = ann
|
||||
|
||||
if 'images' in self.dataset:
|
||||
for img in self.dataset['images']:
|
||||
imgs[img['id']] = img
|
||||
|
||||
if 'categories' in self.dataset:
|
||||
for cat in self.dataset['categories']:
|
||||
cats[cat['id']] = cat
|
||||
|
||||
if 'annotations' in self.dataset and 'categories' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
catToImgs[ann['category_id']].append(ann['image_id'])
|
||||
|
||||
# print('index created!')
|
||||
|
||||
# create class members
|
||||
self.anns = anns
|
||||
self.imgToAnns = imgToAnns
|
||||
self.catToImgs = catToImgs
|
||||
self.imgs = imgs
|
||||
self.cats = cats
|
||||
|
||||
|
||||
maskUtils = mask_util
|
||||
|
||||
|
||||
def loadRes(self, resFile):
|
||||
"""
|
||||
Load result file and return a result api object.
|
||||
Args:
|
||||
self (obj): coco object with ground truth annotations
|
||||
resFile (str): file name of result file
|
||||
Returns:
|
||||
res (obj): result api object
|
||||
"""
|
||||
res = COCO()
|
||||
res.dataset['images'] = [img for img in self.dataset['images']]
|
||||
|
||||
# print('Loading and preparing results...')
|
||||
# tic = time.time()
|
||||
if isinstance(resFile, torch._six.string_classes):
|
||||
anns = json.load(open(resFile))
|
||||
elif type(resFile) == np.ndarray:
|
||||
anns = self.loadNumpyAnnotations(resFile)
|
||||
else:
|
||||
anns = resFile
|
||||
assert type(anns) == list, 'results in not an array of objects'
|
||||
annsImgIds = [ann['image_id'] for ann in anns]
|
||||
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
|
||||
'Results do not correspond to current coco set'
|
||||
if 'caption' in anns[0]:
|
||||
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
|
||||
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
|
||||
for id, ann in enumerate(anns):
|
||||
ann['id'] = id + 1
|
||||
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
bb = ann['bbox']
|
||||
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
|
||||
if 'segmentation' not in ann:
|
||||
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
|
||||
ann['area'] = bb[2] * bb[3]
|
||||
ann['id'] = id + 1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'segmentation' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
# now only support compressed RLE format as segmentation results
|
||||
ann['area'] = maskUtils.area(ann['segmentation'])
|
||||
if 'bbox' not in ann:
|
||||
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
|
||||
ann['id'] = id + 1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'keypoints' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
s = ann['keypoints']
|
||||
x = s[0::3]
|
||||
y = s[1::3]
|
||||
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
|
||||
ann['area'] = (x2 - x1) * (y2 - y1)
|
||||
ann['id'] = id + 1
|
||||
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
|
||||
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
|
||||
|
||||
res.dataset['annotations'] = anns
|
||||
createIndex(res)
|
||||
return res
|
||||
|
||||
|
||||
def evaluate(self):
|
||||
'''
|
||||
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
|
||||
:return: None
|
||||
'''
|
||||
# tic = time.time()
|
||||
# print('Running per image evaluation...')
|
||||
p = self.params
|
||||
# add backward compatibility if useSegm is specified in params
|
||||
if p.useSegm is not None:
|
||||
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
|
||||
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
|
||||
# print('Evaluate annotation type *{}*'.format(p.iouType))
|
||||
p.imgIds = list(np.unique(p.imgIds))
|
||||
if p.useCats:
|
||||
p.catIds = list(np.unique(p.catIds))
|
||||
p.maxDets = sorted(p.maxDets)
|
||||
self.params = p
|
||||
|
||||
self._prepare()
|
||||
# loop through images, area range, max detection number
|
||||
catIds = p.catIds if p.useCats else [-1]
|
||||
|
||||
if p.iouType == 'segm' or p.iouType == 'bbox':
|
||||
computeIoU = self.computeIoU
|
||||
elif p.iouType == 'keypoints':
|
||||
computeIoU = self.computeOks
|
||||
self.ious = {
|
||||
(imgId, catId): computeIoU(imgId, catId)
|
||||
for imgId in p.imgIds
|
||||
for catId in catIds}
|
||||
|
||||
evaluateImg = self.evaluateImg
|
||||
maxDet = p.maxDets[-1]
|
||||
evalImgs = [
|
||||
evaluateImg(imgId, catId, areaRng, maxDet)
|
||||
for catId in catIds
|
||||
for areaRng in p.areaRng
|
||||
for imgId in p.imgIds
|
||||
]
|
||||
# this is NOT in the pycocotools code, but could be done outside
|
||||
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
|
||||
self._paramsEval = copy.deepcopy(self.params)
|
||||
# toc = time.time()
|
||||
# print('DONE (t={:0.2f}s).'.format(toc-tic))
|
||||
return p.imgIds, evalImgs
|
||||
|
||||
#################################################################
|
||||
# end of straight copy from pycocotools, just removing the prints
|
||||
#################################################################
|
||||
252
old_files/coco_utils.py
Normal file
252
old_files/coco_utils.py
Normal file
@@ -0,0 +1,252 @@
|
||||
import copy
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import torchvision
|
||||
|
||||
from pycocotools import mask as coco_mask
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
import transforms as T
|
||||
|
||||
|
||||
class FilterAndRemapCocoCategories(object):
|
||||
def __init__(self, categories, remap=True):
|
||||
self.categories = categories
|
||||
self.remap = remap
|
||||
|
||||
def __call__(self, image, target):
|
||||
anno = target["annotations"]
|
||||
anno = [obj for obj in anno if obj["category_id"] in self.categories]
|
||||
if not self.remap:
|
||||
target["annotations"] = anno
|
||||
return image, target
|
||||
anno = copy.deepcopy(anno)
|
||||
for obj in anno:
|
||||
obj["category_id"] = self.categories.index(obj["category_id"])
|
||||
target["annotations"] = anno
|
||||
return image, target
|
||||
|
||||
|
||||
def convert_coco_poly_to_mask(segmentations, height, width):
|
||||
masks = []
|
||||
for polygons in segmentations:
|
||||
rles = coco_mask.frPyObjects(polygons, height, width)
|
||||
mask = coco_mask.decode(rles)
|
||||
if len(mask.shape) < 3:
|
||||
mask = mask[..., None]
|
||||
mask = torch.as_tensor(mask, dtype=torch.uint8)
|
||||
mask = mask.any(dim=2)
|
||||
masks.append(mask)
|
||||
if masks:
|
||||
masks = torch.stack(masks, dim=0)
|
||||
else:
|
||||
masks = torch.zeros((0, height, width), dtype=torch.uint8)
|
||||
return masks
|
||||
|
||||
|
||||
class ConvertCocoPolysToMask(object):
|
||||
def __call__(self, image, target):
|
||||
w, h = image.size
|
||||
|
||||
image_id = target["image_id"]
|
||||
image_id = torch.tensor([image_id])
|
||||
|
||||
anno = target["annotations"]
|
||||
|
||||
anno = [obj for obj in anno if obj['iscrowd'] == 0]
|
||||
|
||||
boxes = [obj["bbox"] for obj in anno]
|
||||
# guard against no boxes via resizing
|
||||
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
|
||||
boxes[:, 2:] += boxes[:, :2]
|
||||
boxes[:, 0::2].clamp_(min=0, max=w)
|
||||
boxes[:, 1::2].clamp_(min=0, max=h)
|
||||
|
||||
classes = [obj["category_id"] for obj in anno]
|
||||
classes = torch.tensor(classes, dtype=torch.int64)
|
||||
|
||||
segmentations = [obj["segmentation"] for obj in anno]
|
||||
masks = convert_coco_poly_to_mask(segmentations, h, w)
|
||||
|
||||
keypoints = None
|
||||
if anno and "keypoints" in anno[0]:
|
||||
keypoints = [obj["keypoints"] for obj in anno]
|
||||
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
|
||||
num_keypoints = keypoints.shape[0]
|
||||
if num_keypoints:
|
||||
keypoints = keypoints.view(num_keypoints, -1, 3)
|
||||
|
||||
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
|
||||
boxes = boxes[keep]
|
||||
classes = classes[keep]
|
||||
masks = masks[keep]
|
||||
if keypoints is not None:
|
||||
keypoints = keypoints[keep]
|
||||
|
||||
target = {}
|
||||
target["boxes"] = boxes
|
||||
target["labels"] = classes
|
||||
target["masks"] = masks
|
||||
target["image_id"] = image_id
|
||||
if keypoints is not None:
|
||||
target["keypoints"] = keypoints
|
||||
|
||||
# for conversion to coco api
|
||||
area = torch.tensor([obj["area"] for obj in anno])
|
||||
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
|
||||
target["area"] = area
|
||||
target["iscrowd"] = iscrowd
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
def _coco_remove_images_without_annotations(dataset, cat_list=None):
|
||||
def _has_only_empty_bbox(anno):
|
||||
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
|
||||
|
||||
def _count_visible_keypoints(anno):
|
||||
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
|
||||
|
||||
min_keypoints_per_image = 10
|
||||
|
||||
def _has_valid_annotation(anno):
|
||||
# if it's empty, there is no annotation
|
||||
if len(anno) == 0:
|
||||
return False
|
||||
# if all boxes have close to zero area, there is no annotation
|
||||
if _has_only_empty_bbox(anno):
|
||||
return False
|
||||
# keypoints task have a slight different critera for considering
|
||||
# if an annotation is valid
|
||||
if "keypoints" not in anno[0]:
|
||||
return True
|
||||
# for keypoint detection tasks, only consider valid images those
|
||||
# containing at least min_keypoints_per_image
|
||||
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
|
||||
return True
|
||||
return False
|
||||
|
||||
assert isinstance(dataset, torchvision.datasets.CocoDetection)
|
||||
ids = []
|
||||
for ds_idx, img_id in enumerate(dataset.ids):
|
||||
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
|
||||
anno = dataset.coco.loadAnns(ann_ids)
|
||||
if cat_list:
|
||||
anno = [obj for obj in anno if obj["category_id"] in cat_list]
|
||||
if _has_valid_annotation(anno):
|
||||
ids.append(ds_idx)
|
||||
|
||||
dataset = torch.utils.data.Subset(dataset, ids)
|
||||
return dataset
|
||||
|
||||
|
||||
def convert_to_coco_api(ds):
|
||||
coco_ds = COCO()
|
||||
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
|
||||
ann_id = 1
|
||||
dataset = {'images': [], 'categories': [], 'annotations': []}
|
||||
categories = set()
|
||||
for img_idx in range(len(ds)):
|
||||
# find better way to get target
|
||||
# targets = ds.get_annotations(img_idx)
|
||||
img, targets = ds[img_idx]
|
||||
image_id = targets["image_id"].item()
|
||||
img_dict = {}
|
||||
img_dict['id'] = image_id
|
||||
img_dict['height'] = img.shape[-2]
|
||||
img_dict['width'] = img.shape[-1]
|
||||
dataset['images'].append(img_dict)
|
||||
bboxes = targets["boxes"]
|
||||
bboxes[:, 2:] -= bboxes[:, :2]
|
||||
bboxes = bboxes.tolist()
|
||||
labels = targets['labels'].tolist()
|
||||
areas = targets['area'].tolist()
|
||||
iscrowd = targets['iscrowd'].tolist()
|
||||
if 'masks' in targets:
|
||||
masks = targets['masks']
|
||||
# make masks Fortran contiguous for coco_mask
|
||||
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
|
||||
if 'keypoints' in targets:
|
||||
keypoints = targets['keypoints']
|
||||
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
|
||||
num_objs = len(bboxes)
|
||||
for i in range(num_objs):
|
||||
ann = {}
|
||||
ann['image_id'] = image_id
|
||||
ann['bbox'] = bboxes[i]
|
||||
ann['category_id'] = labels[i]
|
||||
categories.add(labels[i])
|
||||
ann['area'] = areas[i]
|
||||
ann['iscrowd'] = iscrowd[i]
|
||||
ann['id'] = ann_id
|
||||
if 'masks' in targets:
|
||||
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
|
||||
if 'keypoints' in targets:
|
||||
ann['keypoints'] = keypoints[i]
|
||||
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
|
||||
dataset['annotations'].append(ann)
|
||||
ann_id += 1
|
||||
dataset['categories'] = [{'id': i} for i in sorted(categories)]
|
||||
coco_ds.dataset = dataset
|
||||
coco_ds.createIndex()
|
||||
return coco_ds
|
||||
|
||||
|
||||
def get_coco_api_from_dataset(dataset):
|
||||
for _ in range(10):
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
break
|
||||
if isinstance(dataset, torch.utils.data.Subset):
|
||||
dataset = dataset.dataset
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
return dataset.coco
|
||||
return convert_to_coco_api(dataset)
|
||||
|
||||
|
||||
class CocoDetection(torchvision.datasets.CocoDetection):
|
||||
def __init__(self, img_folder, ann_file, transforms):
|
||||
super(CocoDetection, self).__init__(img_folder, ann_file)
|
||||
self._transforms = transforms
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img, target = super(CocoDetection, self).__getitem__(idx)
|
||||
image_id = self.ids[idx]
|
||||
target = dict(image_id=image_id, annotations=target)
|
||||
if self._transforms is not None:
|
||||
img, target = self._transforms(img, target)
|
||||
return img, target
|
||||
|
||||
|
||||
def get_coco(root, image_set, transforms, mode='instances'):
|
||||
anno_file_template = "{}_{}2017.json"
|
||||
PATHS = {
|
||||
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
|
||||
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
|
||||
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
|
||||
}
|
||||
|
||||
t = [ConvertCocoPolysToMask()]
|
||||
|
||||
if transforms is not None:
|
||||
t.append(transforms)
|
||||
transforms = T.Compose(t)
|
||||
|
||||
img_folder, ann_file = PATHS[image_set]
|
||||
img_folder = os.path.join(root, img_folder)
|
||||
ann_file = os.path.join(root, ann_file)
|
||||
|
||||
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
|
||||
|
||||
if image_set == "train":
|
||||
dataset = _coco_remove_images_without_annotations(dataset)
|
||||
|
||||
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def get_coco_kp(root, image_set, transforms):
|
||||
return get_coco(root, image_set, transforms, mode="person_keypoints")
|
||||
BIN
old_files/crap.png
Normal file
BIN
old_files/crap.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 357 KiB |
BIN
old_files/crap2.png
Normal file
BIN
old_files/crap2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 357 KiB |
221
old_files/data.py
Normal file
221
old_files/data.py
Normal file
@@ -0,0 +1,221 @@
|
||||
# %%
|
||||
import os
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
import torchvision
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
import json
|
||||
import torch
|
||||
from torchvision import transforms as T
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||
from references.detection import utils, engine
|
||||
import data_aug
|
||||
import bbox_util
|
||||
|
||||
|
||||
|
||||
def get_transform(train):
|
||||
transforms = []
|
||||
transforms.append(T.ToTensor())
|
||||
if train:
|
||||
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||
return T.Compose(transforms)
|
||||
|
||||
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
||||
|
||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
|
||||
def create_map(list_in, from_key, to_key):
|
||||
cmap = dict()
|
||||
for l in list_in:
|
||||
cmap[l[from_key]] = l[to_key]
|
||||
return cmap
|
||||
|
||||
|
||||
class iNaturalistDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, validation=False, train=False, transforms = None):
|
||||
|
||||
self.validation = validation
|
||||
self.train = train
|
||||
self.transforms = transforms
|
||||
|
||||
|
||||
if validation:
|
||||
json_path = os.path.join(PATH_ROOT, r"val_2017_bboxes\val_2017_bboxes.json")
|
||||
elif train:
|
||||
json_path = os.path.join(
|
||||
PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||
)
|
||||
|
||||
with open(json_path, "r") as rj:
|
||||
f = json.load(rj)
|
||||
|
||||
categories = list()
|
||||
image_info = dict()
|
||||
|
||||
for category in f["categories"]:
|
||||
if category["supercategory"] == "Aves":
|
||||
if category['name'] in ['Archilochus colubris']:#,'Icterus galbula']:
|
||||
print(category['name'])
|
||||
categories.append(category)
|
||||
|
||||
categories = sorted(categories, key=lambda k: k["name"])
|
||||
for idx, cat in enumerate(categories):
|
||||
cat["new_id"] = idx + 1
|
||||
|
||||
orig_to_new_id = create_map(categories, "id", "new_id")
|
||||
|
||||
for annot in f["annotations"]:
|
||||
if annot["category_id"] in orig_to_new_id:
|
||||
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
|
||||
id = annot["image_id"]
|
||||
if id not in image_info:
|
||||
image_info[id] = dict()
|
||||
|
||||
annot["bbox"][2] += annot["bbox"][0]
|
||||
annot["bbox"][3] += annot["bbox"][1]
|
||||
image_info[id]["annotation"] = annot
|
||||
|
||||
for img in f["images"]:
|
||||
id = img["id"]
|
||||
path = os.path.join(PATH_ROOT, img["file_name"])
|
||||
height = img["height"]
|
||||
width = img["width"]
|
||||
if id in image_info:
|
||||
image_info[id].update({"path": path, "height": height, "width": width})
|
||||
|
||||
for idx, (id, im_in) in enumerate(image_info.items()):
|
||||
im_in["idx"] = idx
|
||||
self.images = image_info
|
||||
self.categories = categories
|
||||
self.idx_to_id = [x for x in self.images]
|
||||
self.num_classes = len(self.categories) + 1
|
||||
self.num_samples = len(self.images)
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
def __getitem__(self, idx):
|
||||
idd = self.idx_to_id[idx]
|
||||
c_image = self.images[idd]
|
||||
img_path = c_image["path"]
|
||||
img = Image.open(img_path).convert("RGB")
|
||||
|
||||
annot = c_image["annotation"]
|
||||
bbox = annot["bbox"]
|
||||
boxes = bbox
|
||||
target = dict()
|
||||
target["boxes"] = torch.as_tensor([boxes])
|
||||
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
||||
target['image_id'] = torch.tensor([annot['image_id']])
|
||||
target['area'] = torch.as_tensor([annot['area']])
|
||||
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
||||
|
||||
|
||||
if self.transforms is not None:
|
||||
img, target = self.transforms(img, target)
|
||||
|
||||
return img, target
|
||||
# %%
|
||||
# v = iNaturalistDataset(validation=True)
|
||||
|
||||
|
||||
# v = iNaturalistDataset(validation= True)
|
||||
# o = v[10]
|
||||
# %%
|
||||
# oimage = t.tensor(o[0]*255, dtype=t.uint8)
|
||||
# import matplotlib.pyplot as plt
|
||||
# ox = draw_bounding_boxes(oimage, o[1]['boxes'], width=1)
|
||||
# plt.imshow(ox.permute([1,2,0]))
|
||||
# plt.savefig('crap2.png')
|
||||
|
||||
def get_model(num_classes):
|
||||
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||
num_classes = 2 # 1 class (person) + background
|
||||
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||
return model
|
||||
|
||||
|
||||
import transforms as T
|
||||
|
||||
def get_transform(train):
|
||||
transforms = []
|
||||
transforms.append(T.ToTensor())
|
||||
if train:
|
||||
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||
return T.Compose(transforms)
|
||||
|
||||
from engine import train_one_epoch, evaluate
|
||||
import utils
|
||||
# %%
|
||||
def run():
|
||||
val_dataset = iNaturalistDataset(validation=True, transforms = get_transform(train=True))
|
||||
train_dataset = iNaturalistDataset(train=True, transforms = get_transform(train=False))
|
||||
|
||||
|
||||
train_data_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||
)
|
||||
val_data_loader = torch.utils.data.DataLoader(
|
||||
val_dataset, batch_size=8, shuffle=True, num_workers=1, collate_fn=utils.collate_fn
|
||||
)
|
||||
|
||||
import torchvision
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
num_classes = 2
|
||||
|
||||
|
||||
model = get_model(num_classes)
|
||||
model.to(device)
|
||||
# construct an optimizer
|
||||
params = [p for p in model.parameters() if p.requires_grad]
|
||||
optimizer = torch.optim.SGD(params, lr=0.005,
|
||||
momentum=0.9, weight_decay=0.0005)
|
||||
# and a learning rate scheduler
|
||||
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
|
||||
step_size=3,
|
||||
gamma=0.1)
|
||||
|
||||
# let's train it for 10 epochs
|
||||
num_epochs = 10
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print(epoch)
|
||||
torch.save(model.state_dict(), 'model_weights_start_'+str(epoch)+ '.pth')
|
||||
# train for one epoch, printing every 10 iterations
|
||||
engine.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
|
||||
torch.save(model.state_dict(), 'model_weights_post_train_'+str(epoch)+ '.pth')
|
||||
# update the learning rate
|
||||
lr_scheduler.step()
|
||||
torch.save(model.state_dict(), 'model_weights_post_step_'+str(epoch)+ '.pth')
|
||||
# evaluate on the test dataset
|
||||
engine.evaluate(model, val_data_loader, device=device)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
||||
|
||||
|
||||
# # %%
|
||||
# json_path = os.path.join(
|
||||
# PATH_ROOT, r"train_2017_bboxes\train_2017_bboxes.json"
|
||||
# )
|
||||
# with open(json_path, "r") as rj:
|
||||
# f = json.load(rj)
|
||||
|
||||
|
||||
# # %%
|
||||
# image_id: 2358
|
||||
|
||||
110
old_files/engine.py
Normal file
110
old_files/engine.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
import torch
|
||||
|
||||
import torchvision.models.detection.mask_rcnn
|
||||
|
||||
from coco_utils import get_coco_api_from_dataset
|
||||
from coco_eval import CocoEvaluator
|
||||
import utils
|
||||
|
||||
|
||||
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
|
||||
model.train()
|
||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
|
||||
header = 'Epoch: [{}]'.format(epoch)
|
||||
|
||||
lr_scheduler = None
|
||||
if epoch == 0:
|
||||
warmup_factor = 1. / 1000
|
||||
warmup_iters = min(1000, len(data_loader) - 1)
|
||||
|
||||
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
|
||||
|
||||
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
|
||||
images = list(image.to(device) for image in images)
|
||||
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
||||
|
||||
loss_dict = model(images, targets)
|
||||
|
||||
losses = sum(loss for loss in loss_dict.values())
|
||||
|
||||
# reduce losses over all GPUs for logging purposes
|
||||
loss_dict_reduced = utils.reduce_dict(loss_dict)
|
||||
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
|
||||
|
||||
loss_value = losses_reduced.item()
|
||||
|
||||
if not math.isfinite(loss_value):
|
||||
print("Loss is {}, stopping training".format(loss_value))
|
||||
print(loss_dict_reduced)
|
||||
sys.exit(1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
losses.backward()
|
||||
optimizer.step()
|
||||
|
||||
if lr_scheduler is not None:
|
||||
lr_scheduler.step()
|
||||
|
||||
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
|
||||
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
|
||||
|
||||
return metric_logger
|
||||
|
||||
|
||||
def _get_iou_types(model):
|
||||
model_without_ddp = model
|
||||
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
|
||||
model_without_ddp = model.module
|
||||
iou_types = ["bbox"]
|
||||
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
|
||||
iou_types.append("segm")
|
||||
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
|
||||
iou_types.append("keypoints")
|
||||
return iou_types
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def evaluate(model, data_loader, device):
|
||||
n_threads = torch.get_num_threads()
|
||||
# FIXME remove this and make paste_masks_in_image run on the GPU
|
||||
torch.set_num_threads(1)
|
||||
cpu_device = torch.device("cpu")
|
||||
model.eval()
|
||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||
header = 'Test:'
|
||||
|
||||
coco = get_coco_api_from_dataset(data_loader.dataset)
|
||||
iou_types = _get_iou_types(model)
|
||||
coco_evaluator = CocoEvaluator(coco, iou_types)
|
||||
|
||||
for images, targets in metric_logger.log_every(data_loader, 100, header):
|
||||
images = list(img.to(device) for img in images)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
model_time = time.time()
|
||||
outputs = model(images)
|
||||
|
||||
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
|
||||
model_time = time.time() - model_time
|
||||
|
||||
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
|
||||
evaluator_time = time.time()
|
||||
coco_evaluator.update(res)
|
||||
evaluator_time = time.time() - evaluator_time
|
||||
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
|
||||
|
||||
# gather the stats from all processes
|
||||
metric_logger.synchronize_between_processes()
|
||||
print("Averaged stats:", metric_logger)
|
||||
coco_evaluator.synchronize_between_processes()
|
||||
|
||||
# accumulate predictions from all images
|
||||
coco_evaluator.accumulate()
|
||||
coco_evaluator.summarize()
|
||||
torch.set_num_threads(n_threads)
|
||||
return coco_evaluator
|
||||
28
old_files/fine_tuned.py
Normal file
28
old_files/fine_tuned.py
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
import torchvision
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
|
||||
|
||||
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
|
||||
num_classes = 1 # 1 class (person) + background
|
||||
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||
|
||||
import torchvision
|
||||
from torchvision.models.detection import FasterRCNN
|
||||
from torchvision.models.detection.rpn import AnchorGenerator
|
||||
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
|
||||
backbone.out_channels = list(backbone.modules())[-3].out_channels
|
||||
|
||||
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
|
||||
aspect_ratios=((0.5, 1.0, 2.0),))
|
||||
|
||||
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
|
||||
output_size=7,
|
||||
sampling_ratio=2)
|
||||
|
||||
model = FasterRCNN(backbone,
|
||||
num_classes=2,
|
||||
rpn_anchor_generator=anchor_generator,
|
||||
box_roi_pool=roi_pooler)
|
||||
# %%
|
||||
195
old_files/group_by_aspect_ratio.py
Normal file
195
old_files/group_by_aspect_ratio.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import bisect
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
from itertools import repeat, chain
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.utils.data
|
||||
from torch.utils.data.sampler import BatchSampler, Sampler
|
||||
from torch.utils.model_zoo import tqdm
|
||||
import torchvision
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def _repeat_to_at_least(iterable, n):
|
||||
repeat_times = math.ceil(n / len(iterable))
|
||||
repeated = chain.from_iterable(repeat(iterable, repeat_times))
|
||||
return list(repeated)
|
||||
|
||||
|
||||
class GroupedBatchSampler(BatchSampler):
|
||||
"""
|
||||
Wraps another sampler to yield a mini-batch of indices.
|
||||
It enforces that the batch only contain elements from the same group.
|
||||
It also tries to provide mini-batches which follows an ordering which is
|
||||
as close as possible to the ordering from the original sampler.
|
||||
Args:
|
||||
sampler (Sampler): Base sampler.
|
||||
group_ids (list[int]): If the sampler produces indices in range [0, N),
|
||||
`group_ids` must be a list of `N` ints which contains the group id of each sample.
|
||||
The group ids must be a continuous set of integers starting from
|
||||
0, i.e. they must be in the range [0, num_groups).
|
||||
batch_size (int): Size of mini-batch.
|
||||
"""
|
||||
def __init__(self, sampler, group_ids, batch_size):
|
||||
if not isinstance(sampler, Sampler):
|
||||
raise ValueError(
|
||||
"sampler should be an instance of "
|
||||
"torch.utils.data.Sampler, but got sampler={}".format(sampler)
|
||||
)
|
||||
self.sampler = sampler
|
||||
self.group_ids = group_ids
|
||||
self.batch_size = batch_size
|
||||
|
||||
def __iter__(self):
|
||||
buffer_per_group = defaultdict(list)
|
||||
samples_per_group = defaultdict(list)
|
||||
|
||||
num_batches = 0
|
||||
for idx in self.sampler:
|
||||
group_id = self.group_ids[idx]
|
||||
buffer_per_group[group_id].append(idx)
|
||||
samples_per_group[group_id].append(idx)
|
||||
if len(buffer_per_group[group_id]) == self.batch_size:
|
||||
yield buffer_per_group[group_id]
|
||||
num_batches += 1
|
||||
del buffer_per_group[group_id]
|
||||
assert len(buffer_per_group[group_id]) < self.batch_size
|
||||
|
||||
# now we have run out of elements that satisfy
|
||||
# the group criteria, let's return the remaining
|
||||
# elements so that the size of the sampler is
|
||||
# deterministic
|
||||
expected_num_batches = len(self)
|
||||
num_remaining = expected_num_batches - num_batches
|
||||
if num_remaining > 0:
|
||||
# for the remaining batches, take first the buffers with largest number
|
||||
# of elements
|
||||
for group_id, _ in sorted(buffer_per_group.items(),
|
||||
key=lambda x: len(x[1]), reverse=True):
|
||||
remaining = self.batch_size - len(buffer_per_group[group_id])
|
||||
samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
|
||||
buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
|
||||
assert len(buffer_per_group[group_id]) == self.batch_size
|
||||
yield buffer_per_group[group_id]
|
||||
num_remaining -= 1
|
||||
if num_remaining == 0:
|
||||
break
|
||||
assert num_remaining == 0
|
||||
|
||||
def __len__(self):
|
||||
return len(self.sampler) // self.batch_size
|
||||
|
||||
|
||||
def _compute_aspect_ratios_slow(dataset, indices=None):
|
||||
print("Your dataset doesn't support the fast path for "
|
||||
"computing the aspect ratios, so will iterate over "
|
||||
"the full dataset and load every image instead. "
|
||||
"This might take some time...")
|
||||
if indices is None:
|
||||
indices = range(len(dataset))
|
||||
|
||||
class SubsetSampler(Sampler):
|
||||
def __init__(self, indices):
|
||||
self.indices = indices
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.indices)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.indices)
|
||||
|
||||
sampler = SubsetSampler(indices)
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset, batch_size=1, sampler=sampler,
|
||||
num_workers=14, # you might want to increase it for faster processing
|
||||
collate_fn=lambda x: x[0])
|
||||
aspect_ratios = []
|
||||
with tqdm(total=len(dataset)) as pbar:
|
||||
for _i, (img, _) in enumerate(data_loader):
|
||||
pbar.update(1)
|
||||
height, width = img.shape[-2:]
|
||||
aspect_ratio = float(width) / float(height)
|
||||
aspect_ratios.append(aspect_ratio)
|
||||
return aspect_ratios
|
||||
|
||||
|
||||
def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
|
||||
if indices is None:
|
||||
indices = range(len(dataset))
|
||||
aspect_ratios = []
|
||||
for i in indices:
|
||||
height, width = dataset.get_height_and_width(i)
|
||||
aspect_ratio = float(width) / float(height)
|
||||
aspect_ratios.append(aspect_ratio)
|
||||
return aspect_ratios
|
||||
|
||||
|
||||
def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
|
||||
if indices is None:
|
||||
indices = range(len(dataset))
|
||||
aspect_ratios = []
|
||||
for i in indices:
|
||||
img_info = dataset.coco.imgs[dataset.ids[i]]
|
||||
aspect_ratio = float(img_info["width"]) / float(img_info["height"])
|
||||
aspect_ratios.append(aspect_ratio)
|
||||
return aspect_ratios
|
||||
|
||||
|
||||
def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
|
||||
if indices is None:
|
||||
indices = range(len(dataset))
|
||||
aspect_ratios = []
|
||||
for i in indices:
|
||||
# this doesn't load the data into memory, because PIL loads it lazily
|
||||
width, height = Image.open(dataset.images[i]).size
|
||||
aspect_ratio = float(width) / float(height)
|
||||
aspect_ratios.append(aspect_ratio)
|
||||
return aspect_ratios
|
||||
|
||||
|
||||
def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
|
||||
if indices is None:
|
||||
indices = range(len(dataset))
|
||||
|
||||
ds_indices = [dataset.indices[i] for i in indices]
|
||||
return compute_aspect_ratios(dataset.dataset, ds_indices)
|
||||
|
||||
|
||||
def compute_aspect_ratios(dataset, indices=None):
|
||||
if hasattr(dataset, "get_height_and_width"):
|
||||
return _compute_aspect_ratios_custom_dataset(dataset, indices)
|
||||
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
return _compute_aspect_ratios_coco_dataset(dataset, indices)
|
||||
|
||||
if isinstance(dataset, torchvision.datasets.VOCDetection):
|
||||
return _compute_aspect_ratios_voc_dataset(dataset, indices)
|
||||
|
||||
if isinstance(dataset, torch.utils.data.Subset):
|
||||
return _compute_aspect_ratios_subset_dataset(dataset, indices)
|
||||
|
||||
# slow path
|
||||
return _compute_aspect_ratios_slow(dataset, indices)
|
||||
|
||||
|
||||
def _quantize(x, bins):
|
||||
bins = copy.deepcopy(bins)
|
||||
bins = sorted(bins)
|
||||
quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
|
||||
return quantized
|
||||
|
||||
|
||||
def create_aspect_ratio_groups(dataset, k=0):
|
||||
aspect_ratios = compute_aspect_ratios(dataset)
|
||||
bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
|
||||
groups = _quantize(aspect_ratios, bins)
|
||||
# count number of elements per group
|
||||
counts = np.unique(groups, return_counts=True)[1]
|
||||
fbins = [0] + bins + [np.inf]
|
||||
print("Using {} as bins for aspect ratio quantization".format(fbins))
|
||||
print("Count of instances per bin: {}".format(counts))
|
||||
return groups
|
||||
37
old_files/presets.py
Normal file
37
old_files/presets.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import transforms as T
|
||||
|
||||
|
||||
class DetectionPresetTrain:
|
||||
def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123., 117., 104.)):
|
||||
if data_augmentation == 'hflip':
|
||||
self.transforms = T.Compose([
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
elif data_augmentation == 'ssd':
|
||||
self.transforms = T.Compose([
|
||||
T.RandomPhotometricDistort(),
|
||||
T.RandomZoomOut(fill=list(mean)),
|
||||
T.RandomIoUCrop(),
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
elif data_augmentation == 'ssdlite':
|
||||
self.transforms = T.Compose([
|
||||
T.RandomIoUCrop(),
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
else:
|
||||
raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"')
|
||||
|
||||
def __call__(self, img, target):
|
||||
return self.transforms(img, target)
|
||||
|
||||
|
||||
class DetectionPresetEval:
|
||||
def __init__(self):
|
||||
self.transforms = T.ToTensor()
|
||||
|
||||
def __call__(self, img, target):
|
||||
return self.transforms(img, target)
|
||||
49
old_files/test.py
Normal file
49
old_files/test.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# %%
|
||||
import torchvision
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
from collections import defaultdict as ddict
|
||||
import json
|
||||
import torch
|
||||
from torchvision import datasets, transforms as T
|
||||
import cv2
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\data_aug")
|
||||
sys.path.append(r"K:\Designs\ML\inaturalist_models\vision")
|
||||
|
||||
|
||||
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2)
|
||||
|
||||
model.load_state_dict(torch.load('K:\Designs\ML\inaturalist_models\model_weights_start_9.pth'))
|
||||
model.eval()
|
||||
model.to('cuda')
|
||||
|
||||
|
||||
import random
|
||||
rtdir = r'J:\hummingbird_imagenet\hummingbird'
|
||||
|
||||
ff = os.listdir(rtdir)
|
||||
|
||||
# %%
|
||||
img = os.path.join(rtdir,random.choice(ff))
|
||||
image = cv2.imread(img)[:,:,::-1].copy()
|
||||
o = T.ToTensor()(image).cuda()
|
||||
img = o[None, :, :, :]
|
||||
|
||||
ou = model(img)
|
||||
from torchvision.utils import draw_bounding_boxes
|
||||
import torch as t
|
||||
oimage = t.tensor(image, dtype=t.uint8).permute([2,0,1])
|
||||
import matplotlib.pyplot as plt
|
||||
ox = draw_bounding_boxes(oimage, ou[0]['boxes'], width=1)
|
||||
plt.imshow(ox.permute([1,2,0]))
|
||||
# %%
|
||||
from data import iNaturalistDataset
|
||||
sd = iNaturalistDataset(validation=True)
|
||||
|
||||
|
||||
|
||||
# %%
|
||||
233
old_files/train.py
Normal file
233
old_files/train.py
Normal file
@@ -0,0 +1,233 @@
|
||||
r"""PyTorch Detection Training.
|
||||
|
||||
To run in a multi-gpu environment, use the distributed launcher::
|
||||
|
||||
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
|
||||
train.py ... --world-size $NGPU
|
||||
|
||||
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
|
||||
--lr 0.02 --batch-size 2 --world-size 8
|
||||
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
|
||||
|
||||
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
|
||||
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
|
||||
|
||||
Also, if you train Keypoint R-CNN, the default hyperparameters are
|
||||
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
|
||||
Because the number of images is smaller in the person keypoint subset of COCO,
|
||||
the number of epochs should be adapted so that we have the same number of iterations.
|
||||
"""
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import torchvision
|
||||
import torchvision.models.detection
|
||||
import torchvision.models.detection.mask_rcnn
|
||||
|
||||
from coco_utils import get_coco, get_coco_kp
|
||||
|
||||
from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
|
||||
from engine import train_one_epoch, evaluate
|
||||
|
||||
import presets
|
||||
import utils
|
||||
|
||||
|
||||
def get_dataset(name, image_set, transform, data_path):
|
||||
paths = {
|
||||
"coco": (data_path, get_coco, 91),
|
||||
"coco_kp": (data_path, get_coco_kp, 2)
|
||||
}
|
||||
p, ds_fn, num_classes = paths[name]
|
||||
|
||||
ds = ds_fn(p, image_set=image_set, transforms=transform)
|
||||
return ds, num_classes
|
||||
|
||||
|
||||
def get_transform(train, data_augmentation):
|
||||
return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
|
||||
|
||||
|
||||
def get_args_parser(add_help=True):
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='PyTorch Detection Training', add_help=add_help)
|
||||
|
||||
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
|
||||
parser.add_argument('--dataset', default='coco', help='dataset')
|
||||
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
|
||||
parser.add_argument('--device', default='cuda', help='device')
|
||||
parser.add_argument('-b', '--batch-size', default=2, type=int,
|
||||
help='images per gpu, the total batch size is $NGPU x batch_size')
|
||||
parser.add_argument('--epochs', default=26, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--lr', default=0.02, type=float,
|
||||
help='initial learning rate, 0.02 is the default value for training '
|
||||
'on 8 gpus and 2 images_per_gpu')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('--lr-scheduler', default="multisteplr", help='the lr scheduler (default: multisteplr)')
|
||||
parser.add_argument('--lr-step-size', default=8, type=int,
|
||||
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
||||
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,
|
||||
help='decrease lr every step-size epochs (multisteplr scheduler only)')
|
||||
parser.add_argument('--lr-gamma', default=0.1, type=float,
|
||||
help='decrease lr by a factor of lr-gamma (multisteplr scheduler only)')
|
||||
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
|
||||
parser.add_argument('--output-dir', default='.', help='path where to save')
|
||||
parser.add_argument('--resume', default='', help='resume from checkpoint')
|
||||
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
|
||||
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
|
||||
parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn')
|
||||
parser.add_argument('--trainable-backbone-layers', default=None, type=int,
|
||||
help='number of trainable layers of backbone')
|
||||
parser.add_argument('--data-augmentation', default="hflip", help='data augmentation policy (default: hflip)')
|
||||
parser.add_argument(
|
||||
"--sync-bn",
|
||||
dest="sync_bn",
|
||||
help="Use sync batch norm",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-only",
|
||||
dest="test_only",
|
||||
help="Only test the model",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pretrained",
|
||||
dest="pretrained",
|
||||
help="Use pre-trained models from the modelzoo",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
# distributed training parameters
|
||||
parser.add_argument('--world-size', default=1, type=int,
|
||||
help='number of distributed processes')
|
||||
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def main(args):
|
||||
if args.output_dir:
|
||||
utils.mkdir(args.output_dir)
|
||||
|
||||
utils.init_distributed_mode(args)
|
||||
print(args)
|
||||
|
||||
device = torch.device(args.device)
|
||||
|
||||
# Data loading code
|
||||
print("Loading data")
|
||||
|
||||
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args.data_augmentation),
|
||||
args.data_path)
|
||||
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
|
||||
|
||||
print("Creating data loaders")
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
||||
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
|
||||
else:
|
||||
train_sampler = torch.utils.data.RandomSampler(dataset)
|
||||
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
|
||||
|
||||
if args.aspect_ratio_group_factor >= 0:
|
||||
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
|
||||
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
|
||||
else:
|
||||
train_batch_sampler = torch.utils.data.BatchSampler(
|
||||
train_sampler, args.batch_size, drop_last=True)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
|
||||
collate_fn=utils.collate_fn)
|
||||
|
||||
data_loader_test = torch.utils.data.DataLoader(
|
||||
dataset_test, batch_size=1,
|
||||
sampler=test_sampler, num_workers=args.workers,
|
||||
collate_fn=utils.collate_fn)
|
||||
|
||||
print("Creating model")
|
||||
kwargs = {
|
||||
"trainable_backbone_layers": args.trainable_backbone_layers
|
||||
}
|
||||
if "rcnn" in args.model:
|
||||
if args.rpn_score_thresh is not None:
|
||||
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
|
||||
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
|
||||
**kwargs)
|
||||
model.to(device)
|
||||
if args.distributed and args.sync_bn:
|
||||
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
||||
|
||||
model_without_ddp = model
|
||||
if args.distributed:
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
||||
model_without_ddp = model.module
|
||||
|
||||
params = [p for p in model.parameters() if p.requires_grad]
|
||||
optimizer = torch.optim.SGD(
|
||||
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
|
||||
args.lr_scheduler = args.lr_scheduler.lower()
|
||||
if args.lr_scheduler == 'multisteplr':
|
||||
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
|
||||
elif args.lr_scheduler == 'cosineannealinglr':
|
||||
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
|
||||
else:
|
||||
raise RuntimeError("Invalid lr scheduler '{}'. Only MultiStepLR and CosineAnnealingLR "
|
||||
"are supported.".format(args.lr_scheduler))
|
||||
|
||||
if args.resume:
|
||||
checkpoint = torch.load(args.resume, map_location='cpu')
|
||||
model_without_ddp.load_state_dict(checkpoint['model'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
||||
args.start_epoch = checkpoint['epoch'] + 1
|
||||
|
||||
if args.test_only:
|
||||
evaluate(model, data_loader_test, device=device)
|
||||
return
|
||||
|
||||
print("Start training")
|
||||
start_time = time.time()
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
|
||||
lr_scheduler.step()
|
||||
if args.output_dir:
|
||||
checkpoint = {
|
||||
'model': model_without_ddp.state_dict(),
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'lr_scheduler': lr_scheduler.state_dict(),
|
||||
'args': args,
|
||||
'epoch': epoch
|
||||
}
|
||||
utils.save_on_master(
|
||||
checkpoint,
|
||||
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
|
||||
utils.save_on_master(
|
||||
checkpoint,
|
||||
os.path.join(args.output_dir, 'checkpoint.pth'))
|
||||
|
||||
# evaluate after every epoch
|
||||
evaluate(model, data_loader_test, device=device)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||
print('Training time {}'.format(total_time_str))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = get_args_parser().parse_args()
|
||||
main(args)
|
||||
239
old_files/transforms.py
Normal file
239
old_files/transforms.py
Normal file
@@ -0,0 +1,239 @@
|
||||
import torch
|
||||
import torchvision
|
||||
|
||||
from torch import nn, Tensor
|
||||
from torchvision.transforms import functional as F
|
||||
from torchvision.transforms import transforms as T
|
||||
from typing import List, Tuple, Dict, Optional
|
||||
|
||||
|
||||
def _flip_coco_person_keypoints(kps, width):
|
||||
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
flipped_data = kps[:, flip_inds]
|
||||
flipped_data[..., 0] = width - flipped_data[..., 0]
|
||||
# Maintain COCO convention that if visibility == 0, then x, y = 0
|
||||
inds = flipped_data[..., 2] == 0
|
||||
flipped_data[inds] = 0
|
||||
return flipped_data
|
||||
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, image, target):
|
||||
for t in self.transforms:
|
||||
image, target = t(image, target)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomHorizontalFlip(T.RandomHorizontalFlip):
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if torch.rand(1) < self.p:
|
||||
image = F.hflip(image)
|
||||
if target is not None:
|
||||
width, _ = F._get_image_size(image)
|
||||
target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
|
||||
if "masks" in target:
|
||||
target["masks"] = target["masks"].flip(-1)
|
||||
if "keypoints" in target:
|
||||
keypoints = target["keypoints"]
|
||||
keypoints = _flip_coco_person_keypoints(keypoints, width)
|
||||
target["keypoints"] = keypoints
|
||||
return image, target
|
||||
|
||||
|
||||
class ToTensor(nn.Module):
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
image = F.to_tensor(image)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomIoUCrop(nn.Module):
|
||||
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5,
|
||||
max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40):
|
||||
super().__init__()
|
||||
# Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
|
||||
self.min_scale = min_scale
|
||||
self.max_scale = max_scale
|
||||
self.min_aspect_ratio = min_aspect_ratio
|
||||
self.max_aspect_ratio = max_aspect_ratio
|
||||
if sampler_options is None:
|
||||
sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
|
||||
self.options = sampler_options
|
||||
self.trials = trials
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if target is None:
|
||||
raise ValueError("The targets can't be None for this transform.")
|
||||
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
orig_w, orig_h = F._get_image_size(image)
|
||||
|
||||
while True:
|
||||
# sample an option
|
||||
idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
|
||||
min_jaccard_overlap = self.options[idx]
|
||||
if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option
|
||||
return image, target
|
||||
|
||||
for _ in range(self.trials):
|
||||
# check the aspect ratio limitations
|
||||
r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
|
||||
new_w = int(orig_w * r[0])
|
||||
new_h = int(orig_h * r[1])
|
||||
aspect_ratio = new_w / new_h
|
||||
if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
|
||||
continue
|
||||
|
||||
# check for 0 area crops
|
||||
r = torch.rand(2)
|
||||
left = int((orig_w - new_w) * r[0])
|
||||
top = int((orig_h - new_h) * r[1])
|
||||
right = left + new_w
|
||||
bottom = top + new_h
|
||||
if left == right or top == bottom:
|
||||
continue
|
||||
|
||||
# check for any valid boxes with centers within the crop area
|
||||
cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
|
||||
cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
|
||||
is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
|
||||
if not is_within_crop_area.any():
|
||||
continue
|
||||
|
||||
# check at least 1 box with jaccard limitations
|
||||
boxes = target["boxes"][is_within_crop_area]
|
||||
ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]],
|
||||
dtype=boxes.dtype, device=boxes.device))
|
||||
if ious.max() < min_jaccard_overlap:
|
||||
continue
|
||||
|
||||
# keep only valid boxes and perform cropping
|
||||
target["boxes"] = boxes
|
||||
target["labels"] = target["labels"][is_within_crop_area]
|
||||
target["boxes"][:, 0::2] -= left
|
||||
target["boxes"][:, 1::2] -= top
|
||||
target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
|
||||
target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
|
||||
image = F.crop(image, top, left, new_h, new_w)
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomZoomOut(nn.Module):
|
||||
def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5):
|
||||
super().__init__()
|
||||
if fill is None:
|
||||
fill = [0., 0., 0.]
|
||||
self.fill = fill
|
||||
self.side_range = side_range
|
||||
if side_range[0] < 1. or side_range[0] > side_range[1]:
|
||||
raise ValueError("Invalid canvas side range provided {}.".format(side_range))
|
||||
self.p = p
|
||||
|
||||
@torch.jit.unused
|
||||
def _get_fill_value(self, is_pil):
|
||||
# type: (bool) -> int
|
||||
# We fake the type to make it work on JIT
|
||||
return tuple(int(x) for x in self.fill) if is_pil else 0
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
if torch.rand(1) < self.p:
|
||||
return image, target
|
||||
|
||||
orig_w, orig_h = F._get_image_size(image)
|
||||
|
||||
r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
|
||||
canvas_width = int(orig_w * r)
|
||||
canvas_height = int(orig_h * r)
|
||||
|
||||
r = torch.rand(2)
|
||||
left = int((canvas_width - orig_w) * r[0])
|
||||
top = int((canvas_height - orig_h) * r[1])
|
||||
right = canvas_width - (left + orig_w)
|
||||
bottom = canvas_height - (top + orig_h)
|
||||
|
||||
if torch.jit.is_scripting():
|
||||
fill = 0
|
||||
else:
|
||||
fill = self._get_fill_value(F._is_pil_image(image))
|
||||
|
||||
image = F.pad(image, [left, top, right, bottom], fill=fill)
|
||||
if isinstance(image, torch.Tensor):
|
||||
v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
|
||||
image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \
|
||||
image[..., :, (left + orig_w):] = v
|
||||
|
||||
if target is not None:
|
||||
target["boxes"][:, 0::2] += left
|
||||
target["boxes"][:, 1::2] += top
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomPhotometricDistort(nn.Module):
|
||||
def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5),
|
||||
hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5):
|
||||
super().__init__()
|
||||
self._brightness = T.ColorJitter(brightness=brightness)
|
||||
self._contrast = T.ColorJitter(contrast=contrast)
|
||||
self._hue = T.ColorJitter(hue=hue)
|
||||
self._saturation = T.ColorJitter(saturation=saturation)
|
||||
self.p = p
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
r = torch.rand(7)
|
||||
|
||||
if r[0] < self.p:
|
||||
image = self._brightness(image)
|
||||
|
||||
contrast_before = r[1] < 0.5
|
||||
if contrast_before:
|
||||
if r[2] < self.p:
|
||||
image = self._contrast(image)
|
||||
|
||||
if r[3] < self.p:
|
||||
image = self._saturation(image)
|
||||
|
||||
if r[4] < self.p:
|
||||
image = self._hue(image)
|
||||
|
||||
if not contrast_before:
|
||||
if r[5] < self.p:
|
||||
image = self._contrast(image)
|
||||
|
||||
if r[6] < self.p:
|
||||
channels = F._get_image_num_channels(image)
|
||||
permutation = torch.randperm(channels)
|
||||
|
||||
is_pil = F._is_pil_image(image)
|
||||
if is_pil:
|
||||
image = F.to_tensor(image)
|
||||
image = image[..., permutation, :, :]
|
||||
if is_pil:
|
||||
image = F.to_pil_image(image)
|
||||
|
||||
return image, target
|
||||
295
old_files/utils.py
Normal file
295
old_files/utils.py
Normal file
@@ -0,0 +1,295 @@
|
||||
from collections import defaultdict, deque
|
||||
import datetime
|
||||
import errno
|
||||
import os
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
|
||||
class SmoothedValue(object):
|
||||
"""Track a series of values and provide access to smoothed values over a
|
||||
window or the global series average.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size=20, fmt=None):
|
||||
if fmt is None:
|
||||
fmt = "{median:.4f} ({global_avg:.4f})"
|
||||
self.deque = deque(maxlen=window_size)
|
||||
self.total = 0.0
|
||||
self.count = 0
|
||||
self.fmt = fmt
|
||||
|
||||
def update(self, value, n=1):
|
||||
self.deque.append(value)
|
||||
self.count += n
|
||||
self.total += value * n
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
"""
|
||||
Warning: does not synchronize the deque!
|
||||
"""
|
||||
if not is_dist_avail_and_initialized():
|
||||
return
|
||||
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
|
||||
dist.barrier()
|
||||
dist.all_reduce(t)
|
||||
t = t.tolist()
|
||||
self.count = int(t[0])
|
||||
self.total = t[1]
|
||||
|
||||
@property
|
||||
def median(self):
|
||||
d = torch.tensor(list(self.deque))
|
||||
return d.median().item()
|
||||
|
||||
@property
|
||||
def avg(self):
|
||||
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
||||
return d.mean().item()
|
||||
|
||||
@property
|
||||
def global_avg(self):
|
||||
return self.total / self.count
|
||||
|
||||
@property
|
||||
def max(self):
|
||||
return max(self.deque)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.deque[-1]
|
||||
|
||||
def __str__(self):
|
||||
return self.fmt.format(
|
||||
median=self.median,
|
||||
avg=self.avg,
|
||||
global_avg=self.global_avg,
|
||||
max=self.max,
|
||||
value=self.value)
|
||||
|
||||
|
||||
def all_gather(data):
|
||||
"""
|
||||
Run all_gather on arbitrary picklable data (not necessarily tensors)
|
||||
Args:
|
||||
data: any picklable object
|
||||
Returns:
|
||||
list[data]: list of data gathered from each rank
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size == 1:
|
||||
return [data]
|
||||
data_list = [None] * world_size
|
||||
dist.all_gather_object(data_list, data)
|
||||
return data_list
|
||||
|
||||
|
||||
def reduce_dict(input_dict, average=True):
|
||||
"""
|
||||
Args:
|
||||
input_dict (dict): all the values will be reduced
|
||||
average (bool): whether to do average or sum
|
||||
Reduce the values in the dictionary from all processes so that all processes
|
||||
have the averaged results. Returns a dict with the same fields as
|
||||
input_dict, after reduction.
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size < 2:
|
||||
return input_dict
|
||||
with torch.no_grad():
|
||||
names = []
|
||||
values = []
|
||||
# sort the keys so that they are consistent across processes
|
||||
for k in sorted(input_dict.keys()):
|
||||
names.append(k)
|
||||
values.append(input_dict[k])
|
||||
values = torch.stack(values, dim=0)
|
||||
dist.all_reduce(values)
|
||||
if average:
|
||||
values /= world_size
|
||||
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||
return reduced_dict
|
||||
|
||||
|
||||
class MetricLogger(object):
|
||||
def __init__(self, delimiter="\t"):
|
||||
self.meters = defaultdict(SmoothedValue)
|
||||
self.delimiter = delimiter
|
||||
|
||||
def update(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
v = v.item()
|
||||
assert isinstance(v, (float, int))
|
||||
self.meters[k].update(v)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
if attr in self.meters:
|
||||
return self.meters[attr]
|
||||
if attr in self.__dict__:
|
||||
return self.__dict__[attr]
|
||||
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||
type(self).__name__, attr))
|
||||
|
||||
def __str__(self):
|
||||
loss_str = []
|
||||
for name, meter in self.meters.items():
|
||||
loss_str.append(
|
||||
"{}: {}".format(name, str(meter))
|
||||
)
|
||||
return self.delimiter.join(loss_str)
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
for meter in self.meters.values():
|
||||
meter.synchronize_between_processes()
|
||||
|
||||
def add_meter(self, name, meter):
|
||||
self.meters[name] = meter
|
||||
|
||||
def log_every(self, iterable, print_freq, header=None):
|
||||
i = 0
|
||||
if not header:
|
||||
header = ''
|
||||
start_time = time.time()
|
||||
end = time.time()
|
||||
iter_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
data_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
|
||||
if torch.cuda.is_available():
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}',
|
||||
'max mem: {memory:.0f}'
|
||||
])
|
||||
else:
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}'
|
||||
])
|
||||
MB = 1024.0 * 1024.0
|
||||
for obj in iterable:
|
||||
data_time.update(time.time() - end)
|
||||
yield obj
|
||||
iter_time.update(time.time() - end)
|
||||
if i % print_freq == 0 or i == len(iterable) - 1:
|
||||
eta_seconds = iter_time.global_avg * (len(iterable) - i)
|
||||
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
if torch.cuda.is_available():
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time),
|
||||
memory=torch.cuda.max_memory_allocated() / MB))
|
||||
else:
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time)))
|
||||
i += 1
|
||||
end = time.time()
|
||||
total_time = time.time() - start_time
|
||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||
print('{} Total time: {} ({:.4f} s / it)'.format(
|
||||
header, total_time_str, total_time / len(iterable)))
|
||||
|
||||
|
||||
def collate_fn(batch):
|
||||
return tuple(zip(*batch))
|
||||
|
||||
|
||||
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
|
||||
|
||||
def f(x):
|
||||
if x >= warmup_iters:
|
||||
return 1
|
||||
alpha = float(x) / warmup_iters
|
||||
return warmup_factor * (1 - alpha) + alpha
|
||||
|
||||
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
|
||||
|
||||
|
||||
def mkdir(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
|
||||
def setup_for_distributed(is_master):
|
||||
"""
|
||||
This function disables printing when not in master process
|
||||
"""
|
||||
import builtins as __builtin__
|
||||
builtin_print = __builtin__.print
|
||||
|
||||
def print(*args, **kwargs):
|
||||
force = kwargs.pop('force', False)
|
||||
if is_master or force:
|
||||
builtin_print(*args, **kwargs)
|
||||
|
||||
__builtin__.print = print
|
||||
|
||||
|
||||
def is_dist_avail_and_initialized():
|
||||
if not dist.is_available():
|
||||
return False
|
||||
if not dist.is_initialized():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_world_size():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 1
|
||||
return dist.get_world_size()
|
||||
|
||||
|
||||
def get_rank():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 0
|
||||
return dist.get_rank()
|
||||
|
||||
|
||||
def is_main_process():
|
||||
return get_rank() == 0
|
||||
|
||||
|
||||
def save_on_master(*args, **kwargs):
|
||||
if is_main_process():
|
||||
torch.save(*args, **kwargs)
|
||||
|
||||
|
||||
def init_distributed_mode(args):
|
||||
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
args.world_size = int(os.environ['WORLD_SIZE'])
|
||||
args.gpu = int(os.environ['LOCAL_RANK'])
|
||||
elif 'SLURM_PROCID' in os.environ:
|
||||
args.rank = int(os.environ['SLURM_PROCID'])
|
||||
args.gpu = args.rank % torch.cuda.device_count()
|
||||
else:
|
||||
print('Not using distributed mode')
|
||||
args.distributed = False
|
||||
return
|
||||
|
||||
args.distributed = True
|
||||
|
||||
torch.cuda.set_device(args.gpu)
|
||||
args.dist_backend = 'nccl'
|
||||
print('| distributed init (rank {}): {}'.format(
|
||||
args.rank, args.dist_url), flush=True)
|
||||
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
torch.distributed.barrier()
|
||||
setup_for_distributed(args.rank == 0)
|
||||
Reference in New Issue
Block a user