Files
inaturalist_pytorch_model/data (SFConflict ispatel@live.com 2021-09-27-14-36-56).py
2021-09-27 16:02:11 -04:00

193 lines
5.8 KiB
Python

# %%
import os
from unicodedata import category
import torch
from PIL import Image
import sys
import json
import torch
import transforms as T
import os
import numpy as np
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if sys.platform == "win32":
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
else:
PATH_ROOT = '/home/thebears/data/ml/inaturalist'
def get_transform(train):
trsf = []
trsf.append(T.ToTensor())
if train:
trsf.append(T.RandomHorizontalFlip(0.5))
return T.Compose(trsf)
def create_map(list_in, from_key, to_key):
cmap = dict()
for l in list_in:
cmap[l[from_key]] = l[to_key]
return cmap
class iNaturalistDataset(torch.utils.data.Dataset):
def __init__(self, validation=False, train=False, species=None):
self.validation = validation
self.train = train
if (not self.train and not self.validation) or (self.train and self.validation):
raise Exception("Need to do either train or validation")
self.transform = get_transform(self.train)
if validation:
json_path = os.path.join(
PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json"
)
elif train:
json_path = os.path.join(
PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json"
)
with open(json_path, "r") as rj:
f = json.load(rj)
self.raw_data = f
categories = list()
image_info = dict()
orig_id_to_name = dict()
for idx, category in enumerate(f["categories"]):
do_add = False
orig_id_to_name[category["id"]] = category
if species is None:
do_add = True
elif category["name"] in species:
do_add = True
elif category['supercategory'] == 'Mammalia':
do_add = True
if do_add:
print('Adding '+str(category))
categories.append(category)
categories = sorted(categories, key=lambda k: k["name"])
for idx, cat in enumerate(categories):
cat["new_id"] = idx + 1
orig_to_new_id = create_map(categories, "id", "new_id")
for annot in f["annotations"]:
if annot["category_id"] in orig_to_new_id:
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
id = annot["image_id"]
if id not in image_info:
image_info[id] = dict()
annot["bbox"][2] += annot["bbox"][0]
annot["bbox"][3] += annot["bbox"][1]
image_info[id]["annotation"] = annot
for img in f["images"]:
id = img["id"]
path = os.path.join(PATH_ROOT, img["file_name"])
height = img["height"]
width = img["width"]
if id in image_info:
image_info[id].update({"path": path, "height": height, "width": width})
for idx, (id, im_in) in enumerate(image_info.items()):
im_in["idx"] = idx
self.images = image_info
self.categories = categories
self.orig_id_to_name = orig_id_to_name
self.idx_to_id = [x for x in self.images]
self.num_classes = len(self.categories) + 1
self.num_samples = len(self.images)
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
idd = self.idx_to_id[idx]
c_image = self.images[idd]
img_path = c_image["path"]
img = Image.open(img_path).convert("RGB")
annot = c_image["annotation"]
bbox = annot["bbox"]
boxes = bbox
target = dict()
target["boxes"] = torch.as_tensor([boxes])
target["labels"] = torch.as_tensor(
[annot["new_category_id"]], dtype=torch.int64
)
target["image_id"] = torch.tensor([annot["image_id"]])
target["area"] = torch.as_tensor([annot["area"]])
target["iscrowd"] = torch.zeros((1,), dtype=torch.int64)
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
if False:
train_dataset = iNaturalistDataset(train=True)
loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json")
with open(loc_path, "r") as lfile:
locs = json.load(lfile)
from bear_utils import get_distance_from_home
# %%
category_distances = dict()
inserts = 0
for loc in locs:
lat = loc["lat"]
lon = loc["lon"]
im_id = loc["id"]
if lat is None or lon is None:
continue
ff = get_distance_from_home(lat, lon)
if im_id in train_dataset.images:
inserts += 1
train_dataset.images[im_id]["distance"] = ff
category_id = train_dataset.images[im_id]["annotation"]["category_id"]
if category_id not in category_distances:
category_distances[category_id] = list()
category_distances[category_id].append(ff)
# %%
from EcoNameTranslator import to_common
for k, v in category_distances.items():
name = train_dataset.orig_id_to_name[k]
if np.average(np.asarray(v) < 250) > 0.1:
if name["supercategory"] == "Aves":
print(len(v), to_common([name["name"]]))
# %%
fc = sorted(
category_distances, key=lambda x: len(category_distances[x]), reverse=True
)
for x in fc:
cc = train_dataset.orig_id_to_name[x]
if cc["supercategory"] == "Aves":
ou = to_common([cc["name"]])
print(ou, len(category_distances[x]))
# %%