# %% import os from unicodedata import category import torch from PIL import Image import sys import json import torch import transforms as T import os import numpy as np device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") if sys.platform == "win32": PATH_ROOT = r"D:\ishan\ml\inaturalist\\" else: PATH_ROOT = '/home/thebears/data/ml/inaturalist' def get_transform(train): trsf = [] trsf.append(T.ToTensor()) if train: trsf.append(T.RandomHorizontalFlip(0.5)) return T.Compose(trsf) def create_map(list_in, from_key, to_key): cmap = dict() for l in list_in: cmap[l[from_key]] = l[to_key] return cmap class iNaturalistDataset(torch.utils.data.Dataset): def __init__(self, validation=False, train=False, species=None): self.validation = validation self.train = train if (not self.train and not self.validation) or (self.train and self.validation): raise Exception("Need to do either train or validation") self.transform = get_transform(self.train) if validation: json_path = os.path.join( PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json" ) elif train: json_path = os.path.join( PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json" ) with open(json_path, "r") as rj: f = json.load(rj) self.raw_data = f categories = list() image_info = dict() orig_id_to_name = dict() for category in f["categories"]: do_add = False orig_id_to_name[category["id"]] = category if species is None: do_add = True elif category["name"] in species: print(category["name"]) do_add = True if do_add: categories.append(category) categories = sorted(categories, key=lambda k: k["name"]) for idx, cat in enumerate(categories): cat["new_id"] = idx + 1 orig_to_new_id = create_map(categories, "id", "new_id") for annot in f["annotations"]: if annot["category_id"] in orig_to_new_id: annot["new_category_id"] = orig_to_new_id[annot["category_id"]] id = annot["image_id"] if id not in image_info: image_info[id] = dict() annot["bbox"][2] += annot["bbox"][0] annot["bbox"][3] += annot["bbox"][1] image_info[id]["annotation"] = annot for img in f["images"]: id = img["id"] path = os.path.join(PATH_ROOT, img["file_name"]) height = img["height"] width = img["width"] if id in image_info: image_info[id].update({"path": path, "height": height, "width": width}) for idx, (id, im_in) in enumerate(image_info.items()): im_in["idx"] = idx self.images = image_info self.categories = categories self.orig_id_to_name = orig_id_to_name self.idx_to_id = [x for x in self.images] self.num_classes = len(self.categories) + 1 self.num_samples = len(self.images) def __len__(self): return self.num_samples def __getitem__(self, idx): idd = self.idx_to_id[idx] c_image = self.images[idd] img_path = c_image["path"] img = Image.open(img_path).convert("RGB") annot = c_image["annotation"] bbox = annot["bbox"] boxes = bbox target = dict() target["boxes"] = torch.as_tensor([boxes]) target["labels"] = torch.as_tensor( [annot["new_category_id"]], dtype=torch.int64 ) target["image_id"] = torch.tensor([annot["image_id"]]) target["area"] = torch.as_tensor([annot["area"]]) target["iscrowd"] = torch.zeros((1,), dtype=torch.int64) if self.transform is not None: img, target = self.transform(img, target) return img, target if False: train_dataset = iNaturalistDataset(train=True) loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json") with open(loc_path, "r") as lfile: locs = json.load(lfile) from bear_utils import get_distance_from_home # %% category_distances = dict() inserts = 0 for loc in locs: lat = loc["lat"] lon = loc["lon"] im_id = loc["id"] if lat is None or lon is None: continue ff = get_distance_from_home(lat, lon) if im_id in train_dataset.images: inserts += 1 train_dataset.images[im_id]["distance"] = ff category_id = train_dataset.images[im_id]["annotation"]["category_id"] if category_id not in category_distances: category_distances[category_id] = list() category_distances[category_id].append(ff) # %% from EcoNameTranslator import to_common for k, v in category_distances.items(): name = train_dataset.orig_id_to_name[k] if np.average(np.asarray(v) < 250) > 0.1: if name["supercategory"] == "Aves": print(len(v), to_common([name["name"]])) # %% fc = sorted( category_distances, key=lambda x: len(category_distances[x]), reverse=True ) for x in fc: cc = train_dataset.orig_id_to_name[x] if cc["supercategory"] == "Aves": ou = to_common([cc["name"]]) print(ou, len(category_distances[x])) # %%