yacwc
This commit is contained in:
125
data.py
125
data.py
@@ -1,27 +1,29 @@
|
||||
# %%
|
||||
import os
|
||||
from unicodedata import category
|
||||
import torch
|
||||
from PIL import Image
|
||||
import torchvision
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
import sys
|
||||
import json
|
||||
import torch
|
||||
from torchvision import transforms as T
|
||||
import transforms as T
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
||||
|
||||
if sys.platform == 'win32':
|
||||
if sys.platform == "win32":
|
||||
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
|
||||
else:
|
||||
raise NotImplementedError("Not defined for this platform")
|
||||
|
||||
|
||||
def get_transform(train):
|
||||
transforms = []
|
||||
transforms.append(T.ToTensor())
|
||||
trsf = []
|
||||
trsf.append(T.ToTensor())
|
||||
if train:
|
||||
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||
return T.Compose(transforms)
|
||||
trsf.append(T.RandomHorizontalFlip(0.5))
|
||||
return T.Compose(trsf)
|
||||
|
||||
|
||||
def create_map(list_in, from_key, to_key):
|
||||
@@ -32,40 +34,45 @@ def create_map(list_in, from_key, to_key):
|
||||
|
||||
|
||||
class iNaturalistDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, validation=False, train=False, transforms = None, species = None):
|
||||
def __init__(self, validation=False, train=False, species=None):
|
||||
|
||||
self.validation = validation
|
||||
self.train = train
|
||||
|
||||
if (self.train or self.validation) or (self.train and self.validation)
|
||||
if (not self.train and not self.validation) or (self.train and self.validation):
|
||||
raise Exception("Need to do either train or validation")
|
||||
|
||||
self.transforms = get_transform(self.train)
|
||||
|
||||
self.transform = get_transform(self.train)
|
||||
|
||||
if validation:
|
||||
json_path = os.path.join(PATH_ROOT, "val_2017_bboxes","val_2017_bboxes.json")
|
||||
json_path = os.path.join(
|
||||
PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json"
|
||||
)
|
||||
elif train:
|
||||
json_path = os.path.join(
|
||||
PATH_ROOT, "train_2017_bboxes","train_2017_bboxes.json"
|
||||
PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json"
|
||||
)
|
||||
|
||||
with open(json_path, "r") as rj:
|
||||
f = json.load(rj)
|
||||
|
||||
self.raw_data = f
|
||||
categories = list()
|
||||
image_info = dict()
|
||||
|
||||
|
||||
|
||||
orig_id_to_name = dict()
|
||||
for category in f["categories"]:
|
||||
do_add = False
|
||||
orig_id_to_name[category["id"]] = category
|
||||
if species is None:
|
||||
do_add = True
|
||||
if category['name'] in species:
|
||||
print(category['name'])
|
||||
elif category["name"] in species:
|
||||
print(category["name"])
|
||||
do_add = True
|
||||
|
||||
if do_add:
|
||||
categories.append(category)
|
||||
|
||||
|
||||
categories = sorted(categories, key=lambda k: k["name"])
|
||||
for idx, cat in enumerate(categories):
|
||||
cat["new_id"] = idx + 1
|
||||
@@ -93,13 +100,13 @@ class iNaturalistDataset(torch.utils.data.Dataset):
|
||||
|
||||
for idx, (id, im_in) in enumerate(image_info.items()):
|
||||
im_in["idx"] = idx
|
||||
|
||||
|
||||
self.images = image_info
|
||||
self.categories = categories
|
||||
self.orig_id_to_name = orig_id_to_name
|
||||
self.idx_to_id = [x for x in self.images]
|
||||
self.num_classes = len(self.categories) + 1
|
||||
self.num_samples = len(self.images)
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
@@ -109,18 +116,74 @@ class iNaturalistDataset(torch.utils.data.Dataset):
|
||||
c_image = self.images[idd]
|
||||
img_path = c_image["path"]
|
||||
img = Image.open(img_path).convert("RGB")
|
||||
|
||||
|
||||
annot = c_image["annotation"]
|
||||
bbox = annot["bbox"]
|
||||
boxes = bbox
|
||||
target = dict()
|
||||
target["boxes"] = torch.as_tensor([boxes])
|
||||
target["labels"] = torch.as_tensor([annot["new_category_id"]], dtype=torch.int64)
|
||||
target['image_id'] = torch.tensor([annot['image_id']])
|
||||
target['area'] = torch.as_tensor([annot['area']])
|
||||
target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)
|
||||
|
||||
if self.transforms is not None:
|
||||
img, target = self.transforms(img, target)
|
||||
target["labels"] = torch.as_tensor(
|
||||
[annot["new_category_id"]], dtype=torch.int64
|
||||
)
|
||||
target["image_id"] = torch.tensor([annot["image_id"]])
|
||||
target["area"] = torch.as_tensor([annot["area"]])
|
||||
target["iscrowd"] = torch.zeros((1,), dtype=torch.int64)
|
||||
|
||||
return img, target
|
||||
if self.transform is not None:
|
||||
img, target = self.transform(img, target)
|
||||
|
||||
return img, target
|
||||
|
||||
|
||||
if False:
|
||||
|
||||
train_dataset = iNaturalistDataset(train=True)
|
||||
loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json")
|
||||
with open(loc_path, "r") as lfile:
|
||||
locs = json.load(lfile)
|
||||
|
||||
from bear_utils import get_distance_from_home
|
||||
|
||||
# %%
|
||||
category_distances = dict()
|
||||
inserts = 0
|
||||
for loc in locs:
|
||||
lat = loc["lat"]
|
||||
lon = loc["lon"]
|
||||
im_id = loc["id"]
|
||||
if lat is None or lon is None:
|
||||
continue
|
||||
|
||||
ff = get_distance_from_home(lat, lon)
|
||||
if im_id in train_dataset.images:
|
||||
inserts += 1
|
||||
train_dataset.images[im_id]["distance"] = ff
|
||||
category_id = train_dataset.images[im_id]["annotation"]["category_id"]
|
||||
|
||||
if category_id not in category_distances:
|
||||
category_distances[category_id] = list()
|
||||
|
||||
category_distances[category_id].append(ff)
|
||||
|
||||
# %%
|
||||
from EcoNameTranslator import to_common
|
||||
|
||||
for k, v in category_distances.items():
|
||||
name = train_dataset.orig_id_to_name[k]
|
||||
if np.average(np.asarray(v) < 250) > 0.1:
|
||||
if name["supercategory"] == "Aves":
|
||||
print(len(v), to_common([name["name"]]))
|
||||
|
||||
# %%
|
||||
|
||||
fc = sorted(
|
||||
category_distances, key=lambda x: len(category_distances[x]), reverse=True
|
||||
)
|
||||
for x in fc:
|
||||
cc = train_dataset.orig_id_to_name[x]
|
||||
if cc["supercategory"] == "Aves":
|
||||
ou = to_common([cc["name"]])
|
||||
print(ou, len(category_distances[x]))
|
||||
|
||||
|
||||
# %%
|
||||
|
||||
Reference in New Issue
Block a user