diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4f41cc1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +models/20210720_155640.oth filter=lfs diff=lfs merge=lfs -text +models/20210921_190116.json filter=lfs diff=lfs merge=lfs -text +models/20210921_190419.json filter=lfs diff=lfs merge=lfs -text +models/birds_only.json filter=lfs diff=lfs merge=lfs -text +models/hummingbird.json filter=lfs diff=lfs merge=lfs -text +models/20210701_202822.json filter=lfs diff=lfs merge=lfs -text +models/20210720_155640.json filter=lfs diff=lfs merge=lfs -text +models/hummingbird.pth filter=lfs diff=lfs merge=lfs -text +models/20210921_190436.oth filter=lfs diff=lfs merge=lfs -text +models/20210921_190436.pth filter=lfs diff=lfs merge=lfs -text +models/20210720_155640.pth filter=lfs diff=lfs merge=lfs -text +models/20210810_104206.json filter=lfs diff=lfs merge=lfs -text +models/20210921_190257.json filter=lfs diff=lfs merge=lfs -text +models/20210921_190436.json filter=lfs diff=lfs merge=lfs -text +models/20210921_190436.sth filter=lfs diff=lfs merge=lfs -text +models/20210923_153848.json filter=lfs diff=lfs merge=lfs -text +models/20210701_202822.pth filter=lfs diff=lfs merge=lfs -text +models/20210720_155509.json filter=lfs diff=lfs merge=lfs -text +models/20210923_174607.json filter=lfs diff=lfs merge=lfs -text +models/birds_only.pth filter=lfs diff=lfs merge=lfs -text +models/birds_only.oth filter=lfs diff=lfs merge=lfs -text +models/20210923_153808.json filter=lfs diff=lfs merge=lfs -text +models/20210923_174546.json filter=lfs diff=lfs merge=lfs -text diff --git a/anaylze_visits.py b/anaylze_visits.py new file mode 100644 index 0000000..5008be7 --- /dev/null +++ b/anaylze_visits.py @@ -0,0 +1,106 @@ +# %% +folder = r'\\192.168.1.242\ftp\hummingbird\2021\08\\08' +import os +dest_path = r'C:\\Users\\TheBears\\Desktop\\' + +files = os.listdir(folder) + +import numpy as np +f_jsons = list() +for f in files: + if f.endswith('.json'): + f_jsons.append(os.path.join(folder,f)) + + +import json +import datetime as dt + + + +def get_obs_in_json(curr_js): + all_obs = list() + curf = os.path.basename(curr_js) + curf = curf.split('.')[0].replace('_trimmed','').split('_')[-1] + dt_obj = dt.datetime.strptime(curf, '%Y%m%d%H%M%S') + with open(curr_js,'r') as ff: + data = json.load(ff) + + for i in data: + + test_field = 'frame_number_original' + + if test_field in i: + sec_offset = i[test_field]/30 + else: + sec_offset = i['frame_number'] + if sec_offset == 'thumbnail': + continue + + if len(i['boxes']) > 0: + obs = [dt_obj + dt.timedelta(seconds=sec_offset/2), max(i['scores'])] + all_obs.append(obs) + + cob = all_obs + ret_this = [] + if len(cob) > 0: + tie = np.asarray([x[0].timestamp() for x in cob]) + tavg = dt.datetime.fromtimestamp(int(np.average(tie))) + scavg = np.max([x[1] for x in cob]) + ret_this = [tavg, scavg] + + + return all_obs, ret_this + + +entire_obs = list() +obs_sep = list() +obs_fnames = list() +for curr_js in f_jsons: + cc, avgg = get_obs_in_json(curr_js) + if len(avgg) > 0: + obs_sep.append(avgg) + obs_fnames.append(curr_js.replace('.json','.mp4')) + entire_obs.extend(cc) + + + +def scatter_dt(entire_obs, fname, hover_data = None): + import numpy as np + obs_array = np.asarray(entire_obs) + x = np.asarray(obs_array[:,0], dtype=np.datetime64) + y = obs_array[:,1] + import plotly.express as px + + #fig = px.scatter(x = x,y = y, hovertext=obs_fnames) + import plotly.graph_objects as go + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=x, + y=y, + hovertext=hover_data, + hoverinfo="text", + marker=dict( + color="green" + ), + mode='markers', + showlegend=False + )) + fig.write_html(os.path.join(dest_path,fname)) + + +scatter_dt(entire_obs, 'hbirds.html') +scatter_dt(obs_sep, 'indiv.html', hover_data = obs_fnames) +import plotly.express as px + +scores = np.asarray([x[1] for x in obs_sep]) + +scors= np.sort(scores) +cumu_total = len(scores) - np.arange(0,len(scores)) + +fig = px.scatter( x= scors, y = cumu_total) +fig.write_html(os.path.join(dest_path, 'cumul.html')) + + +# %% diff --git a/convert_to_onnx.py b/convert_to_onnx.py new file mode 100644 index 0000000..3419f34 --- /dev/null +++ b/convert_to_onnx.py @@ -0,0 +1,40 @@ +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +from model import Model +import torch +device = 'cpu' + +model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json +newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth','')) +with open(newest_model + '.json','r') as nmj: + model_json = json.load(nmj) + +cats = model_json['categories'] +cats.sort(key=lambda x: x['new_id']) +num_cat = len(cats) + 1 +model_type = model_json['model_type'] +model = Model(num_cat, model_type) +labels = [x['name'] for x in cats] +model.load_state_dict( + torch.load(newest_model + '.pth', map_location = torch.device(device)) +) +model.eval() + + + +# %% + +onnx_model_path = "models" +onnx_model_name = "hbirds.onnx" +os.makedirs(onnx_model_path, exist_ok=True) +full_model_path = os.path.join(onnx_model_path, onnx_model_name) + + +# model export into ONNX format +x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] +torch.onnx.export(model, x, full_model_path, opset_version = 12) +# %% + +import cv2 +opencv_net = cv2.dnn.readNetFromONNX(full_model_path) +print("OpenCV model was successfully read. Layer IDs: \n", opencv_net.getLayerNames()) diff --git a/data (SFConflict ispatel@live.com 2021-09-27-14-36-56).py b/data (SFConflict ispatel@live.com 2021-09-27-14-36-56).py new file mode 100644 index 0000000..9250127 --- /dev/null +++ b/data (SFConflict ispatel@live.com 2021-09-27-14-36-56).py @@ -0,0 +1,192 @@ +# %% +import os +from unicodedata import category +import torch +from PIL import Image +import sys +import json +import torch +import transforms as T +import os +import numpy as np + +device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + +if sys.platform == "win32": + PATH_ROOT = r"D:\ishan\ml\inaturalist\\" +else: + PATH_ROOT = '/home/thebears/data/ml/inaturalist' + + + +def get_transform(train): + trsf = [] + trsf.append(T.ToTensor()) + if train: + trsf.append(T.RandomHorizontalFlip(0.5)) + return T.Compose(trsf) + + +def create_map(list_in, from_key, to_key): + cmap = dict() + for l in list_in: + cmap[l[from_key]] = l[to_key] + return cmap + + +class iNaturalistDataset(torch.utils.data.Dataset): + def __init__(self, validation=False, train=False, species=None): + + self.validation = validation + self.train = train + + if (not self.train and not self.validation) or (self.train and self.validation): + raise Exception("Need to do either train or validation") + + self.transform = get_transform(self.train) + + if validation: + json_path = os.path.join( + PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json" + ) + elif train: + json_path = os.path.join( + PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json" + ) + + with open(json_path, "r") as rj: + f = json.load(rj) + + self.raw_data = f + categories = list() + image_info = dict() + + orig_id_to_name = dict() + for idx, category in enumerate(f["categories"]): + do_add = False + orig_id_to_name[category["id"]] = category + if species is None: + do_add = True + elif category["name"] in species: + do_add = True + elif category['supercategory'] == 'Mammalia': + do_add = True + + if do_add: + print('Adding '+str(category)) + categories.append(category) + + categories = sorted(categories, key=lambda k: k["name"]) + for idx, cat in enumerate(categories): + cat["new_id"] = idx + 1 + + orig_to_new_id = create_map(categories, "id", "new_id") + + for annot in f["annotations"]: + if annot["category_id"] in orig_to_new_id: + annot["new_category_id"] = orig_to_new_id[annot["category_id"]] + id = annot["image_id"] + if id not in image_info: + image_info[id] = dict() + + annot["bbox"][2] += annot["bbox"][0] + annot["bbox"][3] += annot["bbox"][1] + image_info[id]["annotation"] = annot + + for img in f["images"]: + id = img["id"] + path = os.path.join(PATH_ROOT, img["file_name"]) + height = img["height"] + width = img["width"] + if id in image_info: + image_info[id].update({"path": path, "height": height, "width": width}) + + for idx, (id, im_in) in enumerate(image_info.items()): + im_in["idx"] = idx + + self.images = image_info + self.categories = categories + self.orig_id_to_name = orig_id_to_name + self.idx_to_id = [x for x in self.images] + self.num_classes = len(self.categories) + 1 + self.num_samples = len(self.images) + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + idd = self.idx_to_id[idx] + c_image = self.images[idd] + img_path = c_image["path"] + img = Image.open(img_path).convert("RGB") + + annot = c_image["annotation"] + bbox = annot["bbox"] + boxes = bbox + target = dict() + target["boxes"] = torch.as_tensor([boxes]) + target["labels"] = torch.as_tensor( + [annot["new_category_id"]], dtype=torch.int64 + ) + target["image_id"] = torch.tensor([annot["image_id"]]) + target["area"] = torch.as_tensor([annot["area"]]) + target["iscrowd"] = torch.zeros((1,), dtype=torch.int64) + + if self.transform is not None: + img, target = self.transform(img, target) + + return img, target + + +if False: + + train_dataset = iNaturalistDataset(train=True) + loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json") + with open(loc_path, "r") as lfile: + locs = json.load(lfile) + + from bear_utils import get_distance_from_home + + # %% + category_distances = dict() + inserts = 0 + for loc in locs: + lat = loc["lat"] + lon = loc["lon"] + im_id = loc["id"] + if lat is None or lon is None: + continue + + ff = get_distance_from_home(lat, lon) + if im_id in train_dataset.images: + inserts += 1 + train_dataset.images[im_id]["distance"] = ff + category_id = train_dataset.images[im_id]["annotation"]["category_id"] + + if category_id not in category_distances: + category_distances[category_id] = list() + + category_distances[category_id].append(ff) + + # %% + from EcoNameTranslator import to_common + + for k, v in category_distances.items(): + name = train_dataset.orig_id_to_name[k] + if np.average(np.asarray(v) < 250) > 0.1: + if name["supercategory"] == "Aves": + print(len(v), to_common([name["name"]])) + + # %% + + fc = sorted( + category_distances, key=lambda x: len(category_distances[x]), reverse=True + ) + for x in fc: + cc = train_dataset.orig_id_to_name[x] + if cc["supercategory"] == "Aves": + ou = to_common([cc["name"]]) + print(ou, len(category_distances[x])) + + +# %% diff --git a/data.py b/data.py index b9bb93f..257531c 100644 --- a/data.py +++ b/data.py @@ -68,7 +68,6 @@ class iNaturalistDataset(torch.utils.data.Dataset): if species is None: do_add = True elif category["name"] in species: - print(category["name"]) do_add = True if do_add: diff --git a/engine (SFConflict ispatel@live.com 2021-09-27-14-36-56).py b/engine (SFConflict ispatel@live.com 2021-09-27-14-36-56).py new file mode 100644 index 0000000..9510a87 --- /dev/null +++ b/engine (SFConflict ispatel@live.com 2021-09-27-14-36-56).py @@ -0,0 +1,110 @@ +import math +import sys +import time +import torch + +import torchvision.models.detection.mask_rcnn + +from coco_utils import get_coco_api_from_dataset +from coco_eval import CocoEvaluator +import utils + + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + header = 'Epoch: [{}]'.format(epoch) + + lr_scheduler = None + if epoch == 0: + warmup_factor = 1. / 1000 + warmup_iters = min(1000, len(data_loader) - 1) + lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) + + + for images, targets in metric_logger.log_every(data_loader, print_freq, header): + images = list(image.to(device) for image in images) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + loss_dict = model(images, targets) + + losses = sum(loss for loss in loss_dict.values()) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + + loss_value = losses_reduced.item() + + if not math.isfinite(loss_value): + print("Loss is {}, stopping training".format(loss_value)) + print(loss_dict_reduced) + sys.exit(1) + + optimizer.zero_grad() + losses.backward() + optimizer.step() + + if lr_scheduler is not None: + lr_scheduler.step() + + metric_logger.update(loss=losses_reduced, **loss_dict_reduced) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + + return metric_logger + + +def _get_iou_types(model): + model_without_ddp = model + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model_without_ddp = model.module + iou_types = ["bbox"] + if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): + iou_types.append("segm") + if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): + iou_types.append("keypoints") + return iou_types + + +@torch.no_grad() +def evaluate(model, data_loader, device): + n_threads = torch.get_num_threads() + # FIXME remove this and make paste_masks_in_image run on the GPU + torch.set_num_threads(1) + cpu_device = torch.device("cpu") + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + + coco = get_coco_api_from_dataset(data_loader.dataset) + iou_types = _get_iou_types(model) + coco_evaluator = CocoEvaluator(coco, iou_types) + + for images, targets in metric_logger.log_every(data_loader, 100, header): + images = list(img.to(device) for img in images) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + model_time = time.time() + outputs = model(images) + + outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] + model_time = time.time() - model_time + + res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} + evaluator_time = time.time() + coco_evaluator.update(res) + evaluator_time = time.time() - evaluator_time + metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + coco_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + coco_evaluator.accumulate() + coco_evaluator.summarize() + torch.set_num_threads(n_threads) + return coco_evaluator diff --git a/engine.py b/engine.py index 49992af..e5c2051 100644 --- a/engine.py +++ b/engine.py @@ -20,15 +20,15 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) - lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) + for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) - + print('Hey I''m here') losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes diff --git a/filter_species.py b/filter_species.py new file mode 100644 index 0000000..e3ca41a --- /dev/null +++ b/filter_species.py @@ -0,0 +1,45 @@ +import csv + + +csv_path = '/home/thebears/data/ebirddata/output_mi/ebd_US-MI_relMay-2021.txt' + +fields = ['COMMON NAME','SCIENTIFIC NAME','LATITUDE', 'LONGITUDE','OBSERVATION DATE'] + +tokeep = list() +idx = 0 +with open(csv_path) as csvfile: + data = csv.DictReader(csvfile, delimiter='\t') + for idx, row in enumerate(data): + if idx % 1000 == 0: + print(idx) + +# %% + +sc = dict() + + + +for x in tokeep: + scientific_name = x['SCIENTIFIC NAME'] + + if scientific_name not in sc: + sc[scientific_name] = [x['COMMON NAME'], 0] + + sc[scientific_name][1]+=1 + +# %% + + + +sor = {x:sc[x] for x in sorted(sc, key = lambda x : sc[x][1], reverse=True)} + +with open('/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv','w') as csvfile: + finames = ['species','name','count'] + writer = csv.DictWriter(csvfile, fieldnames=finames) + writer.writeheader() + for key,values in sor.items(): + fn = {'species':key, 'name':values[0], 'count':values[1]} + writer.writerow(fn) + + + diff --git a/flag_videos_to_keep.py b/flag_videos_to_keep.py index 6b33031..dbe1851 100644 --- a/flag_videos_to_keep.py +++ b/flag_videos_to_keep.py @@ -1,11 +1,15 @@ import json import shutil import os +import numpy as np source_path = '/srv/ftp/hummingbird/2021' #target_path = '/home/thebears/Videos/ftp' -target_path = '/home/thebears/ftp_links' - +target_path = '/home/thebears/data/ftplinks' +import scipy.stats +target_mean = 0.4 +target_std = 1 +gauss = scipy.stats.norm(target_mean, target_std) have_json = set() for di, _, fns in os.walk(source_path): @@ -16,38 +20,69 @@ for di, _, fns in os.walk(source_path): +def box_area(box): + return (box[3]-box[1]) * (box[2] - box[0]) / 100000 do_stop = False fracs = dict() +saveo = None +saveb = None + +def gaussian(x, mu = target_mean, sig = target_std): + return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.))) + +scc = list() for c_js in have_json: hits = 0 total = 0 o = json.load(open(c_js,'r')) + if c_js.endswith('Hummingbird_01_20210701105440.json'): + saveo = o + + if c_js.endswith('Hummingbird_01_20210627111405.json'): + saveb = o + + avg = 0 + max_sc = 0 + for i in o: - total += 1 -# if len([x for x in i['scores'] if x > 0.1]) > 0: - if len(i['boxes']) > 0: - hits += 1 - - fracs[c_js] = [hits, total] +# for x,b in zip(i['scores'], i['boxes']): +# scc.append((x,box_area(b))) + + if len(i['scores']) > 0: + css = max(i['scores']) + mf = gaussian(box_area(i['boxes'][0])) + avg += css * mf / len(o) - if do_stop: - break + fracs[c_js] = avg + ratios = dict() for x,y in fracs.items(): - ratios[x] = y[0]/y[1] -# %% + ratios[x] = y + + + + +sorted_ratios = {x:ratios[x] for x in sorted(ratios, key=lambda x: ratios[x])} + + +import shutil +for d in os.listdir(target_path): + shutil.rmtree(target_path + '/' + d) + + + import math dir_created = set() -for fname, ratio in ratios.items(): - cr = math.floor(ratio * 10)/10 - target_dir = os.path.join(target_path, str(cr)) +for idx, (fname, ratio) in enumerate(sorted_ratios.items()): + cr = math.floor(100*ratio) + target_dir = os.path.join(target_path, '{0:02g}'.format(cr)) if not os.path.exists(target_dir) and target_dir not in dir_created: os.mkdir(target_dir) dir_created.add(target_dir) @@ -58,5 +93,31 @@ for fname, ratio in ratios.items(): source_file = fname.replace('.json','.mp4') target_file = os.path.join(target_dir, os.path.basename(source_file)) - + + os.symlink(source_file, target_file) + + source_file = fname.replace('.json','.json') + target_file = os.path.join(target_dir, os.path.basename(source_file)) + os.symlink(source_file, target_file) +# %% + +if False: + plt.close('all') + inp = saveo + + sco = list() + for x in inp: + if len(x['scores']) > 0: + cscore = max(x['scores']) + ar = box_area(x['boxes'][0])/ 100000 + else: + cscore = 0 + ar = 0 + sco.append((cscore, ar)) + + plt.plot(sco) + +# %% + distr = np.asarray(scc) + diff --git a/generate_matrix_sets.py b/generate_matrix_sets.py new file mode 100644 index 0000000..ffe9cd2 --- /dev/null +++ b/generate_matrix_sets.py @@ -0,0 +1,25 @@ +vpath = '/srv/ftp/railing/2021/09/04/Railing_00_20210904070617.mp4' +stack_path = os.path.splitext(vpath)[0] +import cv2 +import numpy +import torch +cap = cv2.VideoCapture(vpath) +frame_num = 0 +step_frame = 5 +total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + +# %% +images = list() +stack_num = 0 +for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1].copy() + + images.append(img) + if len(images) == 16: + + stack_num +=1 + break +# %% +imgs = np.moveaxis(np.stack(images), 3, 1) +bt = torch.FloatTensor(imgs) +tensor_path = f'{stack_path}.{stack_num:03}.pt' diff --git a/identify_behavior.py b/identify_behavior.py new file mode 100644 index 0000000..aeb412e --- /dev/null +++ b/identify_behavior.py @@ -0,0 +1,64 @@ +import os + +rtdir = '/srv/ftp/hummingbird/2021/07/21' +#rtdir = os.getcwd() + + +files_read = list() +for cdir, _, files in os.walk(rtdir): + for f in files: + if f.endswith('.json'): + files_read.append(os.path.join(cdir, f)) + + + +import json + +fcontents = dict() + +for f in files_read: + with open(f) as ff: + fcontents[f] = json.load(ff) + +def compute_2d_scores(cbbs): + locs = list() + scores = list() + n_scores = 0 + for cbb in cbbs: + if len(cbb['scores']) > 0: + n_scores+=1 + first_box = cbb['boxes'][0] + first_score = cbb['scores'][0] + + x_cent = np.mean(first_box[0::2]) + y_cent = np.mean(first_box[1::2]) + locs.append([x_cent, y_cent]) + scores.append(first_score) + + center = np.average(locs, weights=scores, axis=0) + stddev = np.average(np.linalg.norm(center - locs,axis=1),weights=scores) + + return stddev, scores + + + + + +data = {'file':list(), 'stddev':list(),'weights':list()} +for fname, contents in fcontents.items(): + if len(contents) > 0: + data['file'].append(fname) + sdev, sc = compute_2d_scores(contents) + data['stddev'].append(sdev) + data['weights'].append(np.average(sc)) + + +import pandas as pd + + +df = pd.DataFrame(data) +fil = df.loc[(df['weights'] > 0.4) & (df['stddev'] > 400) ] + + + + diff --git a/purge_originals.py b/purge_originals.py new file mode 100644 index 0000000..f747699 --- /dev/null +++ b/purge_originals.py @@ -0,0 +1,58 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') + +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = os.path.abspath(sys.argv[1]) +#rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/') + +didir = list() +for di,dnames, fns in os.walk(rtpath): + numbers = di.split('/')[-3:] + if all([n.isnumeric() for n in numbers]): + didir.append(di) + + + + +def list_files(path): + for file in os.listdir(path): + if os.path.isfile(os.path.join(path, file)): + yield file + +import shutil +import os +for cdr in didir: + + files_origin = list() + for di, dnames, fns in os.walk(cdr): + if di == cdr: + pass + else: + files_origin.extend([os.path.join(di,f) for f in fns]) + + + for src_file in files_origin: + fname = os.path.basename(src_file) + targ_file = os.path.join(cdr, fname) + os.rename(src_file, targ_file) + + + dirs_purge = list() + for di, dnames, fns in os.walk(cdr): + for d in dnames: + cpath = os.path.join(di, d) + fna = [x for x in list_files(cpath)] + if len(fna) == 0: + dirs_purge.append(cpath) + + for d in dirs_purge: + if os.path.exists(d): + shutil.rmtree(d) + + + + + diff --git a/purge_videos (SFConflict ispatel@live.com 2021-08-11-09-24-52).py b/purge_videos (SFConflict ispatel@live.com 2021-08-11-09-24-52).py new file mode 100644 index 0000000..1136efd --- /dev/null +++ b/purge_videos (SFConflict ispatel@live.com 2021-08-11-09-24-52).py @@ -0,0 +1,59 @@ +import os + +#rtdir = '/srv/ftp/hummingbird/2021/07/21' +rtdir = os.getcwd() + + +files_read = list() +for cdir, _, files in os.walk(rtdir): + for f in files: + if f.endswith('.json'): + files_read.append(os.path.join(cdir, f)) + + + +import json + +fcontents = dict() + +for f in files_read: + with open(f) as ff: + fcontents[f] = json.load(ff) + + + +to_purge = dict() +for fname, contents in fcontents.items(): + if len(contents) > 0: + nscores = 0 + for x in contents: + if len(x['scores']) > 0: + nscores+= max(x['scores']) >= 0.6 +# nscores = sum([max(x['scores']) for x in contents]) +# nscores = sum([len(x['scores']) for x in contents]) + if nscores == 0: + to_purge[fname] = nscores + + + +delete_list = set() +for f_json, n_scores in to_purge.items(): + vid_path = f_json.rstrip('.json') + '.mp4' + img_path = vid_path.rstrip('.mp4')+'.jpg' + if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + if 'trimmed' in img_path: + img_path_sp[-2] = str(int(img_path_sp[-2].rstrip('.jpg'))+1) + + else: + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) + + delete_list.update({vid_path,img_path, f_json}) + + +print(f'Deleting {len(delete_list)} files') +for f in delete_list: + if os.path.exists(f): +# pass + os.remove(f) diff --git a/purge_videos (SFConflict ispatel@live.com 2021-09-27-14-27-04).py b/purge_videos (SFConflict ispatel@live.com 2021-09-27-14-27-04).py new file mode 100644 index 0000000..7b4d5fa --- /dev/null +++ b/purge_videos (SFConflict ispatel@live.com 2021-09-27-14-27-04).py @@ -0,0 +1,49 @@ +import os + +#rtdir = '/srv/ftp/hummingbird/2021/07/21' +rtdir = os.getcwd() + + +files_read = list() +for cdir, _, files in os.walk(rtdir): + for f in files: + if f.endswith('.json'): + files_read.append(os.path.join(cdir, f)) + + + +import json + +fcontents = dict() + +for f in files_read: + with open(f) as ff: + fcontents[f] = json.load(ff) + + + +to_purge = dict() +for fname, contents in fcontents.items(): + if len(contents) > 0: + nscores = sum([len(x['scores']) for x in contents]) + if nscores == 0: + to_purge[fname] = nscores + + + +delete_list = set() +for f_json, n_scores in to_purge.items(): + vid_path = f_json.rstrip('.json') + '.mp4' + img_path = vid_path.rstrip('.mp4')+'.jpg' + if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) + + delete_list.update({vid_path,img_path, f_json}) + + +print(delete_list) +for f in delete_list: + if os.path.exists(f): + os.remove(f) diff --git a/purge_videos.py b/purge_videos.py new file mode 100644 index 0000000..712bf99 --- /dev/null +++ b/purge_videos.py @@ -0,0 +1,54 @@ +import os + +#rtdir = '/srv/ftp/hummingbird/2021/07/21' +rtdir = os.getcwd() + + +files_read = list() +for cdir, _, files in os.walk(rtdir): + for f in files: + if f.endswith('.json'):# and 'trimmed' not in f: + files_read.append(os.path.join(cdir, f)) + + + +import json + +fcontents = dict() + +for f in files_read: + with open(f) as ff: + try: + fcontents[f] = json.load(ff) + except: + print(f, ' Failed') + + + +to_purge = list() +for fname, contents in fcontents.items(): + if len(contents) > 0: + nscores = list() + + for x in contents: + if len(x['scores']) > 0: + nscores.append(max(x['scores'])) + if len(nscores) == 0 or max(nscores) < 0.60: + to_purge.append(fname) + +delete_list = set() +for f_json in to_purge: + vid_path = f_json.rstrip('.json') + '.mp4' + img_path = vid_path.rstrip('.mp4')+'.jpg' + if not os.path.exists(img_path) and 'trimmed' not in img_path: + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) + + delete_list.update({vid_path,img_path, f_json}) + + +print(delete_list) +for f in delete_list: + if os.path.exists(f): + os.remove(f) diff --git a/quantize_model.py b/quantize_model.py new file mode 100644 index 0000000..b9707fe --- /dev/null +++ b/quantize_model.py @@ -0,0 +1,155 @@ + + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +import json +import cv2 +import random + +from model import Model +import socket +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib + + + + +no_cuda = socket.gethostname() == 'tree' +device='cpu' +model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json +with open(model_path + '.json','r') as nmj: + model_json = json.load(nmj) + +cats = model_json['categories'] +cats.sort(key=lambda x: x['new_id']) +num_cat = len(cats) + 1 +model_type = model_json['model_type'] +model = Model(num_cat, model_type) +labels = [x['name'] for x in cats] +model.load_state_dict( + torch.load(model_path + '.pth', map_location = torch.device(device)) +) +model.eval() +# %% +backend = "fbgemm" +model.qconfig = torch.quantization.get_default_qconfig(backend) +torch.backends.quantized.engine = backend +model_static_quantized = torch.quantization.prepare(model, inplace=False) +model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False) + +def print_model_size(mdl): + torch.save(mdl.state_dict(), "tmp.pt") + print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6)) + os.remove('tmp.pt') + +print_model_size(model_static_quantized) +# %% + + + + +results = list() +vid_path = '/srv/ftp/hummingbird/2021/07/28/Hummingbird_01_20210728063745.mp4' +cap = cv2.VideoCapture(vid_path) +frame_num = 0 + + +total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + +step_frame = 15 + +import time +idces = 0 +st = time.time() +for frame_num in range(0, total_frames, step_frame): + srcimg = cap.read()[1] + print(frame_num) + if srcimg is None: + break + + + image = srcimg[:, :, ::-1].copy() + o = T.ToTensor()(image) + img = o[None, :, :, :] + with torch.no_grad(): + ou = model(img) + + + print(ou) + + + + + + for i in range(step_frame): + + img = cap.read()[1]; + if img is None: + break +# %% + + + + + + +et = time.time() + +model(img) +st = time.time() +print(st-et) +# %% + + +img_use = img + + +st = time.time() +features = model.backbone(img_use) +print(time.time() - st) + +st = time.time() +proposals = model.rpn(img_use, features) +print(time.time() - st) + +st = time.time() +head = model.head(features, proposals) +print(time.time() - st) + +# %% +# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/reset_trim (SFConflict ispatel@live.com 2021-08-11-09-24-52).py b/reset_trim (SFConflict ispatel@live.com 2021-08-11-09-24-52).py new file mode 100644 index 0000000..929282c --- /dev/null +++ b/reset_trim (SFConflict ispatel@live.com 2021-08-11-09-24-52).py @@ -0,0 +1,46 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') + +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = os.path.abspath(sys.argv[1]) + +didir = list() +for di,dnames, fns in os.walk(rtpath): + numbers = di.split('/')[-3:] + if all([n.isnumeric() for n in numbers]): + didir.append(di) + + +def list_files(path): + for file in os.listdir(path): + if os.path.isfile(os.path.join(path, file)): + yield file + + +for cdir in didir: + + + + + + files = [x for x in list_files(cdir)] + + + todelete = list() + for y in files: + if 'trimmed' not in y: + ff = os.path.splitext(y) + fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]])) + if os.path.exists(fcheck): + todelete.append(fcheck) + + to_purge = set() + for de in todelete: + if os.path.exists(de.replace('_trimmed','')): + to_purge.add(de) + + for y in to_purge: + os.remove(y) diff --git a/reset_trim.py b/reset_trim.py new file mode 100644 index 0000000..f9fa92c --- /dev/null +++ b/reset_trim.py @@ -0,0 +1,47 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') + +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = os.path.abspath(sys.argv[1]) +rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/') + +didir = list() +for di,dnames, fns in os.walk(rtpath): + numbers = di.split('/')[-3:] + if all([n.isnumeric() for n in numbers]): + didir.append(di) + + +def list_files(path): + for file in os.listdir(path): + if os.path.isfile(os.path.join(path, file)): + yield file + + +for cdir in didir: + + + + + + files = [x for x in list_files(cdir)] + + + todelete = list() + for y in files: + if 'trimmed' not in y: + ff = os.path.splitext(y) + fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]])) + if os.path.exists(fcheck): + todelete.append(fcheck) + + to_purge = set() + for de in todelete: + if os.path.exists(de.replace('_trimmed','')): + to_purge.add(de) + + for y in to_purge: + os.remove(y) diff --git a/score_in_directory (SFConflict ispatel@live.com 2021-09-27-14-27-04).py b/score_in_directory (SFConflict ispatel@live.com 2021-09-27-14-27-04).py new file mode 100644 index 0000000..795567f --- /dev/null +++ b/score_in_directory (SFConflict ispatel@live.com 2021-09-27-14-27-04).py @@ -0,0 +1,38 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +from score_video import score_video +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = sys.argv[1] +cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}' +have_json = set() +fnames = set() +for di,_, fns in os.walk(rtpath): + for fn in fns: + if fn.endswith('.mp4'): + fnames.add(os.path.join(di,fn)) + elif fn.endswith('.json'): + have_json.add(os.path.join(di,fn.replace('.json','.mp4'))) + +files_to_score = list(fnames - have_json) +random.shuffle(files_to_score) + + +def try_catch_chunk(vids): + try: + score_video(vids) +# score_image(vids) + except Exception as e: + print(e) + +lst = files_to_score +n = 25 +chunks = [lst[i:i + n] for i in range(0, len(lst), n)] +# %% +if __name__ == '__main__': + with Pool(2) as p: + output = p.map(try_catch_chunk,chunks) + # output = p.map(score_video,chunks) + diff --git a/score_in_directory.py b/score_in_directory.py index cbce72b..37a4684 100644 --- a/score_in_directory.py +++ b/score_in_directory.py @@ -4,8 +4,8 @@ from multiprocessing import Pool import sys sys.path.append('/home/thebears/Seafile/Designs/ML') from score_video import score_video - -rtpath = '/srv/ftp/hummingbird/2021' +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = sys.argv[1] cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}' have_json = set() fnames = set() @@ -17,12 +17,15 @@ for di,_, fns in os.walk(rtpath): have_json.add(os.path.join(di,fn.replace('.json','.mp4'))) files_to_score = list(fnames - have_json) -random.shuffle(files_to_score) +files_to_score = sorted(files_to_score) +print(files_to_score) +#random.shuffle(files_to_score) def try_catch_chunk(vids): try: score_video(vids) +# score_image(vids) except Exception as e: print(e) @@ -31,5 +34,7 @@ n = 25 chunks = [lst[i:i + n] for i in range(0, len(lst), n)] # %% if __name__ == '__main__': - with Pool(4) as p: - output = p.map(score_video,chunks) + with Pool(2) as p: + output = p.map(try_catch_chunk,chunks) + # output = p.map(score_video,chunks) + diff --git a/score_video (SFConflict ispatel@live.com 2021-07-07-14-37-14).py b/score_video (SFConflict ispatel@live.com 2021-07-07-14-37-14).py new file mode 100644 index 0000000..6acad59 --- /dev/null +++ b/score_video (SFConflict ispatel@live.com 2021-07-07-14-37-14).py @@ -0,0 +1,131 @@ + + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +import json +import cv2 +import random + +from model import Model +import socket + + + +#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4' + +def score_video(vid_in_list): + + + no_cuda = socket.gethostname() == 'tree' + device='cpu' + model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json + newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth','')) + with open(newest_model + '.json','r') as nmj: + model_json = json.load(nmj) + + cats = model_json['categories'] + cats.sort(key=lambda x: x['new_id']) + num_cat = len(cats) + 1 + model_type = model_json['model_type'] + model = Model(num_cat, model_type) + labels = [x['name'] for x in cats] + model.load_state_dict( + torch.load(newest_model + '.pth', map_location = torch.device(device)) + ) + model.eval() + + if isinstance(vid_in_list, str): + vid_in_list = [vid_in_list] + + for idx_vid, vid_in in enumerate(vid_in_list): + try: + vid_path = os.path.abspath(vid_in) + scores_json = vid_path.rsplit('.')[0]+'.json' + print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path) + if os.path.exists(scores_json): + print(f"JSON {scores_json} already exists") + exit() + vid_dir = os.path.dirname(vid_path) + os.system(f'sudo chmod 777 {vid_dir}') + + + + cap = cv2.VideoCapture(vid_path) + + from torchvision.utils import draw_bounding_boxes + import torch as t + import matplotlib.pyplot as plt + import matplotlib + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + + results = list() + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + + for frame_num in range(0, total_frames, 30): + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + img = cap.read()[1] + image = img[:, :, ::-1].copy() + o = T.ToTensor()(image) + img = o[None, :, :, :] + + with torch.no_grad(): + ou = model(img) + + if len(ou) > 0: + ofscore = ou[0] + + for k in ofscore: + ofscore[k] = ofscore[k].numpy().tolist() + + ofscore['names'] = [labels[x-1] for x in ofscore['labels']] + ofscore['frame_number'] = frame_num + + results.append(ofscore) + + with open(scores_json,'w') as jj: + json.dump(results, jj, indent=4) + except Exception as e: + print(e) + + +if __name__ == '__main__': + score_video(sys.argv[1]) + + +# %% +# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/score_video (SFConflict ispatel@live.com 2021-09-27-14-27-04).py b/score_video (SFConflict ispatel@live.com 2021-09-27-14-27-04).py new file mode 100644 index 0000000..d5926a4 --- /dev/null +++ b/score_video (SFConflict ispatel@live.com 2021-09-27-14-27-04).py @@ -0,0 +1,157 @@ + + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +import json +import cv2 +import random + +from model import Model +import socket +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib + + + +#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4' + + + + +def score_video(vid_in_list): + + + no_cuda = socket.gethostname() == 'tree' + device='cpu' + model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json +# newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth','')) + with open(model_path + '.json','r') as nmj: + model_json = json.load(nmj) + + cats = model_json['categories'] + cats.sort(key=lambda x: x['new_id']) + num_cat = len(cats) + 1 + model_type = model_json['model_type'] + model = Model(num_cat, model_type) + labels = [x['name'] for x in cats] + model.load_state_dict( + torch.load(model_path + '.pth', map_location = torch.device(device)) + ) + model.eval() + + if isinstance(vid_in_list, str): + vid_in_list = [vid_in_list] + + for idx_vid, vid_in in enumerate(vid_in_list): + vid_path = os.path.abspath(vid_in) + img_path = vid_path.rstrip('.mp4')+'.jpg' + if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) + scores_json = vid_path.rsplit('.')[0]+'.json' + print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path) + if os.path.exists(scores_json): + print(f"JSON {scores_json} already exists") + exit() + vid_dir = os.path.dirname(vid_path) + os.system(f'sudo chmod 777 {vid_dir}') + + + def score_image(img): + image = img[:, :, ::-1].copy() + o = T.ToTensor()(image) + img = o[None, :, :, :] + + with torch.no_grad(): + ou = model(img) + + ofscore = None + if len(ou) > 0: + ofscore = ou[0] + + for k in ofscore: + ofscore[k] = ofscore[k].numpy().tolist() + + ofscore['names'] = [labels[x-1] for x in ofscore['labels']] + + + return ofscore + + + results = list() + + do_video = True + if os.path.exists(img_path): + print(f'Found thumbnail and scoring {img_path}') + img = cv2.imread(img_path) + outsc = score_image(img) + if outsc is not None: + if len(outsc['boxes']) > 0: + print('Found bird in picture, skipping video scoring') + do_video = False + outsc['frame_number'] = 'thumbnail' + results.append(outsc) + + if do_video: + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + + + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + + for frame_num in range(0, total_frames, 15): + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + img = cap.read()[1] + outsc = score_image(img) + if outsc is not None: + outsc['frame_number'] = frame_num + results.append(outsc) + + with open(scores_json,'w') as jj: + json.dump(results, jj, indent=4) + + + +if __name__ == '__main__': + score_video(sys.argv[1]) + + +# %% +# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/score_video.py b/score_video.py index fad41e8..e06a3fe 100644 --- a/score_video.py +++ b/score_video.py @@ -16,19 +16,19 @@ import random from model import Model import socket - - - -#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4' +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib +step_frame = 15 def score_video(vid_in_list): - no_cuda = socket.gethostname() == 'tree' device='cpu' - model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json - newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth','')) - with open(newest_model + '.json','r') as nmj: + model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json +# newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth','')) + with open(model_path + '.json','r') as nmj: model_json = json.load(nmj) cats = model_json['categories'] @@ -38,7 +38,7 @@ def score_video(vid_in_list): model = Model(num_cat, model_type) labels = [x['name'] for x in cats] model.load_state_dict( - torch.load(newest_model + '.pth', map_location = torch.device(device)) + torch.load(model_path + '.pth', map_location = torch.device(device)) ) model.eval() @@ -47,6 +47,11 @@ def score_video(vid_in_list): for idx_vid, vid_in in enumerate(vid_in_list): vid_path = os.path.abspath(vid_in) + img_path = vid_path.rstrip('.mp4')+'.jpg' + if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) scores_json = vid_path.rsplit('.')[0]+'.json' print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path) if os.path.exists(scores_json): @@ -55,24 +60,8 @@ def score_video(vid_in_list): vid_dir = os.path.dirname(vid_path) os.system(f'sudo chmod 777 {vid_dir}') - - - cap = cv2.VideoCapture(vid_path) - - from torchvision.utils import draw_bounding_boxes - import torch as t - import matplotlib.pyplot as plt - import matplotlib - cap = cv2.VideoCapture(vid_path) - frame_num = 0 - - results = list() - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - - for frame_num in range(0, total_frames, 30): - cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) - img = cap.read()[1] + + def score_image(img): image = img[:, :, ::-1].copy() o = T.ToTensor()(image) img = o[None, :, :, :] @@ -80,6 +69,7 @@ def score_video(vid_in_list): with torch.no_grad(): ou = model(img) + ofscore = None if len(ou) > 0: ofscore = ou[0] @@ -87,9 +77,49 @@ def score_video(vid_in_list): ofscore[k] = ofscore[k].numpy().tolist() ofscore['names'] = [labels[x-1] for x in ofscore['labels']] - ofscore['frame_number'] = frame_num - results.append(ofscore) + + return ofscore + + + results = list() + + do_video = True + if os.path.exists(img_path): + print(f'Found thumbnail and scoring {img_path}') + img = cv2.imread(img_path) + outsc = score_image(img) + if outsc is not None: + if len(outsc['boxes']) > 0: + print('Found bird in picture, (still doing video scoring)') + outsc['frame_number'] = 'thumbnail' + results.append(outsc) + + do_video = True + if do_video: + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + + + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + + for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1] + if img is None: + break + outsc = score_image(img) + + if outsc is not None: + outsc['frame_number'] = frame_num + results.append(outsc) + + for i in range(step_frame): + + img = cap.read()[1]; + if img is None: + break + with open(scores_json,'w') as jj: json.dump(results, jj, indent=4) diff --git a/score_video_birds.py b/score_video_birds.py new file mode 100644 index 0000000..29e5e21 --- /dev/null +++ b/score_video_birds.py @@ -0,0 +1,170 @@ + + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +import json +import cv2 +import random + +from model import Model +import socket +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib +step_frame = 15 + + + + + +no_cuda = socket.gethostname() == 'tree' +device='cpu' +model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/birds_only'#0210701_202822.json + +with open(model_path + '.json','r') as nmj: + model_json = json.load(nmj) + +cats = model_json['categories'] +cats.sort(key=lambda x: x['new_id']) +num_cat = len(cats) + 1 +model_type = model_json['model_type'] +num_cat = 319 +model = Model(num_cat, model_type) +labels = [x['name'] for x in cats] +# %% +model.load_state_dict( + torch.load(model_path + '.pth', map_location = torch.device(device)) +) +model.eval() + + + +idx_vid = 0 +#vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4' +vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4' + + +vid_path = os.path.abspath(vid_in) +img_path = vid_path.rstrip('.mp4')+'.jpg' +if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg' + img_path = '_'.join(img_path_sp) +scores_json = vid_path.rsplit('.')[0]+'.json' + +if os.path.exists(scores_json): + print(f"JSON {scores_json} already exists") + exit() +vid_dir = os.path.dirname(vid_path) +os.system(f'sudo chmod 777 {vid_dir}') + + +def score_image(img): + image = img[:, :, ::-1].copy() + o = T.ToTensor()(image) + img = o[None, :, :, :] + + with torch.no_grad(): + ou = model(img) + + ofscore = None + if len(ou) > 0: + ofscore = ou[0] + + for k in ofscore: + ofscore[k] = ofscore[k].numpy().tolist() + + ofscore['names'] = [labels[x-1] for x in ofscore['labels']] + + + return ofscore + + +results = list() + +do_video = True +if os.path.exists(img_path): + print(f'Found thumbnail and scoring {img_path}') + img = cv2.imread(img_path) + outsc = score_image(img) + if outsc is not None: + if len(outsc['boxes']) > 0: + print('Found bird in picture, (still doing video scoring)') + outsc['frame_number'] = 'thumbnail' + print(outsc) + results.append(outsc) +# %% +from pprint import pprint +do_video = True +if do_video: + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + + + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) +# for i in range(5808): +# cap.read(); + +# cap.set(cv2.CAP_PROP_POS_FRAMES, 5808) + for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1] +# img = cv2.resize(img, [960,450]) + if img is None: + break + outsc = score_image(img) + + + if outsc is not None: + outsc['frame_number'] = frame_num + pprint(outsc) + results.append(outsc) + + for i in range(step_frame): + + img = cap.read()[1]; + if img is None: + break + +# %% +with open(scores_json,'w') as jj: + json.dump(results, jj, indent=4) + + + + +# %% +# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/score_video_birds_remote.py b/score_video_birds_remote.py new file mode 100644 index 0000000..7bc01df --- /dev/null +++ b/score_video_birds_remote.py @@ -0,0 +1,202 @@ +# %% +from torch.autograd.grad_mode import F +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys + +sys.path.append('K:\Designs\ML\inaturalist_models') +import json +import cv2 +import random + +from model import Model +import socket +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib + +step_frame = 15 + +no_cuda = socket.gethostname() == 'tree' +device = 'cpu' +model_path = r'K:\Designs\ML\inaturalist_models\models\birds_only' #0210701_202822.json + +with open(model_path + '.json', 'r') as nmj: + model_json = json.load(nmj) + +cats = model_json['categories'] +cats.sort(key=lambda x: x['new_id']) +num_cat = len(cats) + 1 +model_type = model_json['model_type'] +num_cat = 319 +model = Model(num_cat, model_type) +labels = [x['name'] for x in cats] +# %% +model.load_state_dict( + torch.load(model_path + '.pth', map_location=torch.device(device)) +) +model.eval() +model.cuda() + +idx_vid = 0 +#vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4' +#vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4' +# vid_in = r'L:\railing\2021\09\21\Railing_00_20210921095155.mp4' +vid_in = r'L:\pond\2021\09\20\Pond_00_20210920134828.mp4' + +vid_path = os.path.abspath(vid_in) +img_path = vid_path.rstrip('.mp4') + '.jpg' +if not os.path.exists(img_path): + img_path_sp = img_path.split('_') + img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg')) + 1) + '.jpg' + img_path = '_'.join(img_path_sp) +scores_json = vid_path.rsplit('.')[0] + '.json' + +# %% +ttran = T.ToTensor() +results = list() + +do_video = True +images = list() +if os.path.exists(img_path): + print(f'Found thumbnail and scoring {img_path}') + tnail_img = cv2.imread(img_path)[:, :, ::-1].copy() + + images.append(ttran(tnail_img)) + +step_frame = 5 +if do_video: + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1][:, :, ::-1].copy() + + if img is None: + break + + images.append(ttran(img)) + + if len(images) == 16: + imgtensor = t.stack(images).cuda() + with torch.no_grad(): + print('Starting to score') + output = model(imgtensor) + print('Finished score') + + detached = list() + for f in output: + newd = dict() + for k in ['boxes', 'labels', 'scores']: + newd[k] = f[k].detach().cpu() + + detached.append(newd) + print(newd) + results.append(detached) + + images.clear() + +# %% +imgs = np.moveaxis(np.stack(images), 3, 1) +imgtensor = torch.cuda.FloatTensor(imgs) +with torch.no_grad(): + output = model(imgtensor) +# %% +image = img[:, :, ::-1].copy() +o = T.ToTensor()(image) +img = o[None, :, :, :] +# %% + +# %% +cap = cv2.VideoCapture(vid_path) +frame_num = 0 +step_frame = 5 +total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) +images = list() +for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1].copy() + + images.append(img) + if len(images) == 16: + break +# %% +imgs = np.moveaxis(np.stack(images), 3, 1) +bt = torch.FloatTensor(imgs) + +# %% +from pprint import pprint + +step_frame = 5 + +images = list() +if do_video: + cap = cv2.VideoCapture(vid_path) + frame_num = 0 + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + for frame_num in range(0, total_frames, step_frame): + img = cap.read()[1] + if img is None: + break + + images.append(img) + +# %% + +# %% + +# %% +with open(scores_json, 'w') as jj: + json.dump(results, jj, indent=4) + +# def score_image(img): +# image = img[:, :, ::-1].copy() +# o = T.ToTensor()(image) +# img = o[None, :, :, :] + +# with torch.no_grad(): +# ou = model(img) + +# ofscore = None +# if len(ou) > 0: +# ofscore = ou[0] + +# for k in ofscore: +# ofscore[k] = ofscore[k].numpy().tolist() + +# ofscore['names'] = [labels[x - 1] for x in ofscore['labels']] + +# return ofscore +# %% +# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/score_video_trimmed.py b/score_video_trimmed.py new file mode 100644 index 0000000..6b087f9 --- /dev/null +++ b/score_video_trimmed.py @@ -0,0 +1,143 @@ + + +import torchvision +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor +from collections import defaultdict as ddict +import json +import torch +from torchvision import datasets, transforms as T +import numpy as np +import os +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +import json +import cv2 +import random + +from model import Model +import socket +from torchvision.utils import draw_bounding_boxes +import torch as t +import matplotlib.pyplot as plt +import matplotlib + + +#vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4' +vid_path = '/srv/ftp/hummingbird/2021/07/25/Hummingbird_01_20210725065610_trimmed.mp4' + +no_cuda = socket.gethostname() == 'tree' +device='cpu' +model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json + +with open(model_path + '.json','r') as nmj: + model_json = json.load(nmj) + +cats = model_json['categories'] +cats.sort(key=lambda x: x['new_id']) +num_cat = len(cats) + 1 +model_type = model_json['model_type'] +model = Model(num_cat, model_type) +labels = [x['name'] for x in cats] +model.load_state_dict( + torch.load(model_path + '.pth', map_location = torch.device(device)) +) +model.eval() + + + + + + + + + + +vid_dir = os.path.dirname(vid_path) +os.system(f'sudo chmod 777 {vid_dir}') + + +def score_image(img): + use_this = None + if isinstance(img, list): + use_this = list() + for y in img: + image = y[:, :, ::-1].copy() + o = T.ToTensor()(image) + y = o[:, :, :] + use_this.append(y) + + else: + image = img[:, :, ::-1].copy() + o = T.ToTensor()(image) + img = o[None, :, :, :] + use_this = img + + with torch.no_grad(): + ou = model(use_this) + + ofscore = None + if len(ou) > 0: + ofscore = ou[0] + + for k in ofscore: + ofscore[k] = ofscore[k].numpy().tolist() + + ofscore['names'] = [labels[x-1] for x in ofscore['labels']] + + + return ofscore + + + +cap = cv2.VideoCapture(vid_path) +frame_num = 0 + + +total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) +# %% + +imgs = list() +results = list() +for frame_num in range(0, total_frames, 15): + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + img = cap.read()[1] + imgs.append(img) +# outsc = score_image(img) +# if outsc is not None: +# outsc['frame_number'] = frame_num +# results.append(outsc) + + + + + + + + +# %% + +# import time +# import cv2 +# video = cv2.VideoCapture(vid_path) + +# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) +# # %% +# st = time.time() + +# while True: +# ret, read = video.read() +# if not ret: +# break + +# et = time.time() + +# print(et-st) + +# st = time.time() +# frs = list() +# for i in range(0,total_frames, 150): +# video.set(cv2.CAP_PROP_POS_FRAMES, i) +# ret, frame = video.read() +# frs.append(frame) +# et = time.time() +# print(et-st) diff --git a/train (SFConflict ispatel@live.com 2021-08-11-09-24-52).py b/train (SFConflict ispatel@live.com 2021-08-11-09-24-52).py new file mode 100644 index 0000000..105ca8c --- /dev/null +++ b/train (SFConflict ispatel@live.com 2021-08-11-09-24-52).py @@ -0,0 +1,126 @@ +# %% +from engine import train_one_epoch, evaluate +from model import Model +from data import iNaturalistDataset +import torch +import os +import datetime as dt +import json +import utils +import pandas as pd +import sys +if not os.path.exists("models/"): + os.mkdir("models") + +if torch.cuda.is_available(): + device = torch.device("cuda") +else: + device = torch.device("cpu") + +default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S") +default_model_path = default_model_root + ".pth" +default_model_info = default_model_root + ".json" +default_state_path = default_model_root + ".oth" + + +#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"]) + +csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv' +df = pd.read_csv(csv_path) +species_list = set(list(df[df['count']>1000]['species'])) +# %% +#model_type = "fasterrcnn_mobilenet_v3_large_fpn" +#batch_size = 16 + +model_type = 'fasterrcnn_resnet50_fpn' +batch_size = 8 + + + +def run(model_name = None, epoch_start = 0): + val_dataset = iNaturalistDataset( + validation=True, + species=species_list, + ) + train_dataset = iNaturalistDataset( + train=True, + species=species_list, + ) + + + if model_name is None: + fresh_start = True + model_info = default_model_info + model_path = default_model_path + state_path = default_state_path + else: + fresh_start = False + model_info = model_name.rstrip('.pth').rstrip('.json')+'.json' + model_path = model_info.rstrip('.json')+'.pth' + state_path = model_info.rstrip('.json')+'.oth' + + breakpoint() + if fresh_start: + with open(model_info, "w") as js_p: + json.dump( + {"categories": train_dataset.categories, "model_type": model_type}, + js_p, + default=str, + indent=4, + ) + + train_data_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=10, + collate_fn=utils.collate_fn, + ) + + val_data_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=10, + collate_fn=utils.collate_fn, + ) + + num_classes = train_dataset.num_classes + model = Model(num_classes, model_type) + model.to(device) + + if not fresh_start: + model.load_state_dict( + torch.load(model_path, map_location = torch.device(device)) + ) + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) + + if os.path.exists(state_path): + optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device))) + + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) + + num_epochs = 10 - epoch_start + + for epoch in range(num_epochs): + train_one_epoch( + model, optimizer, train_data_loader, device, epoch, print_freq=10 ) + lr_scheduler.step() + torch.save(model.state_dict(), model_path) + torch.save(optimizer.state_dict(), state_path) + evaluate(model, val_data_loader, device=device) + + + + +if __name__ == "__main__": + if len(sys.argv) == 3: + model_name = sys.argv[1] + epoch_start = int(sys.argv[2]) + run(model_name = model_name, epoch_start = epoch_start) + else: + run() + +# run() diff --git a/train (SFConflict ispatel@live.com 2021-09-27-14-27-04).py b/train (SFConflict ispatel@live.com 2021-09-27-14-27-04).py new file mode 100644 index 0000000..670c36c --- /dev/null +++ b/train (SFConflict ispatel@live.com 2021-09-27-14-27-04).py @@ -0,0 +1,139 @@ +# %% +from engine import train_one_epoch, evaluate +from model import Model +from data import iNaturalistDataset +import torch +import os +import datetime as dt +import json +import utils +import pandas as pd +import sys +if not os.path.exists("models/"): + os.mkdir("models") + +if torch.cuda.is_available(): + device = torch.device("cuda") +else: + device = torch.device("cpu") + +default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S") +default_model_path = default_model_root + ".pth" +default_model_info = default_model_root + ".json" +default_state_path = default_model_root + ".oth" +default_sched_path = default_model_root + ".sth" + + +#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"]) + +csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv' +df = pd.read_csv(csv_path) +species_list = set(list(df[df['count']>1000]['species'])) + +#model_type = "fasterrcnn_mobilenet_v3_large_fpn" +#batch_size = 16 + +model_type = 'fasterrcnn_resnet50_fpn' +batch_size = 8 +num_epochs = 10 + + +def run(model_name = None, epoch_start = 0): + val_dataset = iNaturalistDataset( + validation=True, + species=species_list, + ) + train_dataset = iNaturalistDataset( + train=True, + species=species_list, + ) + + print(len(val_dataset.categories)) + print(len(train_dataset.categories)) + if model_name is None: + fresh_start = True + model_info = default_model_info + model_path = default_model_path + state_path = default_state_path + sched_path = default_sched_path + else: + fresh_start = False + model_info = model_name.rstrip('.pth').rstrip('.json')+'.json' + model_path = model_info.rstrip('.json')+'.pth' + state_path = model_info.rstrip('.json')+'.oth' + sched_path = model_info.rstrip('.json')+'.sth' + + + + if fresh_start: + with open(model_info, "w") as js_p: + json.dump( + {"categories": train_dataset.categories, "model_type": model_type}, + js_p, + default=str, + indent=4, + ) + else: + print('Continuing run') + + train_data_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=10, + collate_fn=utils.collate_fn, + ) + + val_data_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=10, + collate_fn=utils.collate_fn, + ) + + num_classes = len(train_dataset.categories) + 1 + model = Model(num_classes, model_type) + model.to(device) + + if not fresh_start: + print('Loading state dict') + model.load_state_dict( + torch.load(model_path, map_location = torch.device(device)) + ) + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) + + if os.path.exists(state_path): + print('Loading optimizer') + optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device))) + + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) + + if os.path.exists(sched_path): + print('Loading scheduler') + lr_scheduler.load_state_dict(torch.load(sched_path, map_location = torch.device(device))) + + for epoch in range(epoch_start, num_epochs): + print('Epoch '+str(epoch)) + train_one_epoch( + model, optimizer, train_data_loader, device, epoch, print_freq=10 ) + lr_scheduler.step() + torch.save(model.state_dict(), model_path) + torch.save(optimizer.state_dict(), state_path) + torch.save(lr_scheduler.state_dict(), sched_path) + evaluate(model, val_data_loader, device=device) + + + + +if __name__ == "__main__": + if len(sys.argv) == 3: + model_name = sys.argv[1] + epoch_start = int(sys.argv[2]) + run(model_name = model_name, epoch_start = epoch_start) + else: + run() + +# run() diff --git a/train.py b/train.py index 9207eb4..225ccdc 100644 --- a/train.py +++ b/train.py @@ -7,7 +7,8 @@ import os import datetime as dt import json import utils - +import pandas as pd +import sys if not os.path.exists("models/"): os.mkdir("models") @@ -16,16 +17,27 @@ if torch.cuda.is_available(): else: device = torch.device("cpu") -model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S") -model_path = model_root + ".pth" -model_info = model_root + ".json" +default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S") +default_model_path = default_model_root + ".pth" +default_model_info = default_model_root + ".json" +default_state_path = default_model_root + ".oth" -species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"]) -model_type = "fasterrcnn_mobilenet_v3_large_fpn" +#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"]) + +csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv' +df = pd.read_csv(csv_path) +species_list = set(list(df[df['count']>1000]['species'])) + +#model_type = "fasterrcnn_mobilenet_v3_large_fpn" +#batch_size = 16 + +model_type = 'fasterrcnn_resnet50_fpn' +batch_size = 8 -def run(): + +def run(model_name = None, epoch_start = 0): val_dataset = iNaturalistDataset( validation=True, species=species_list, @@ -35,27 +47,41 @@ def run(): species=species_list, ) - with open(model_info, "w") as js_p: - json.dump( - {"categories": train_dataset.categories, "model_type": model_type}, - js_p, - default=str, - indent=4, - ) + + if model_name is None: + fresh_start = True + model_info = default_model_info + model_path = default_model_path + state_path = default_state_path + else: + fresh_start = False + model_info = model_name.rstrip('.pth').rstrip('.json')+'.json' + model_path = model_info.rstrip('.json')+'.pth' + state_path = model_info.rstrip('.json')+'.oth' + + + if fresh_start: + with open(model_info, "w") as js_p: + json.dump( + {"categories": train_dataset.categories, "model_type": model_type}, + js_p, + default=str, + indent=4, + ) train_data_loader = torch.utils.data.DataLoader( train_dataset, - batch_size=16, + batch_size=batch_size, shuffle=True, - num_workers=4, + num_workers=10, collate_fn=utils.collate_fn, ) val_data_loader = torch.utils.data.DataLoader( val_dataset, - batch_size=16, + batch_size=batch_size, shuffle=True, - num_workers=4, + num_workers=10, collate_fn=utils.collate_fn, ) @@ -63,20 +89,38 @@ def run(): model = Model(num_classes, model_type) model.to(device) + if not fresh_start: + model.load_state_dict( + torch.load(model_path, map_location = torch.device(device)) + ) + params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) + if os.path.exists(state_path): + optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device))) + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) - + num_epochs = 10 - for epoch in range(num_epochs): + + for epoch in range(epoch_start, num_epochs): train_one_epoch( - model, optimizer, train_data_loader, device, epoch, print_freq=10 - ) + model, optimizer, train_data_loader, device, epoch, print_freq=10 ) lr_scheduler.step() torch.save(model.state_dict(), model_path) + torch.save(optimizer.state_dict(), state_path) evaluate(model, val_data_loader, device=device) + + if __name__ == "__main__": - run() + if len(sys.argv) == 3: + model_name = sys.argv[1] + epoch_start = int(sys.argv[2]) + run(model_name = model_name, epoch_start = epoch_start) + else: + run() + +# run() diff --git a/trim_in_directory.py b/trim_in_directory.py new file mode 100644 index 0000000..af78f63 --- /dev/null +++ b/trim_in_directory.py @@ -0,0 +1,42 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') +from trim_video import trim_video +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = sys.argv[1] + +have_json = set() +fnames = set() +for di,_, fns in os.walk(rtpath): + for fn in fns: + if fn.endswith('.json') and 'trimmed' not in fn: + have_json.add(os.path.join(di,fn)) + + +files_to_score = have_json + + + +def try_catch_chunk(jsons): + try: + if not isinstance(jsons, list): + jsons = [jsons] + + for x in jsons: + trim_video(x) + + except Exception as e: + print(e) + +lst = list(files_to_score) +n = 25 +chunks = [lst[i:i + n] for i in range(0, len(lst), n)] +# %% +if __name__ == '__main__': + with Pool(8) as p: + output = p.map(try_catch_chunk,chunks) + # output = p.map(score_video,chunks) + + diff --git a/trim_video (SFConflict ispatel@live.com 2021-08-11-09-24-52).py b/trim_video (SFConflict ispatel@live.com 2021-08-11-09-24-52).py new file mode 100644 index 0000000..ecf973c --- /dev/null +++ b/trim_video (SFConflict ispatel@live.com 2021-08-11-09-24-52).py @@ -0,0 +1,113 @@ + +from ffprobe import FFProbe +import ffmpeg +import shutil +import json +import math +import os + + +def execute_trim_video(js_path, start_frame, end_frame, empty_video = False): + with open(js_path,'r') as jj: + data = json.load(jj) + + + movie_dir = os.path.dirname(js_path) + archive_dir_p = movie_dir + archive_dir_p = archive_dir_p.split('/') + archive_dir_p.insert(-3,'originals') + archive_dir = '/'.join(archive_dir_p) + '/' + + + if not os.path.exists(archive_dir): + os.makedirs(archive_dir) + + rt_name = os.path.splitext(js_path)[0] + movie_path = rt_name+'.mp4' + movie_new_path = rt_name+'_trimmed.mp4' + js_new_path = rt_name + '_trimmed.json' + + frame_pad = 5 #seconds + vid_info = FFProbe(movie_path) + vid_stream = vid_info.video[0] + + framerate = vid_stream.framerate + duration = float(vid_stream.duration) + max_frames = duration * framerate + + start_frame = max(0, start_frame - frame_pad * framerate) + end_frame = min( max_frames, end_frame + frame_pad * framerate) + + start_time = start_frame / framerate + end_time = end_frame / framerate + + + + to_keep = list() + for x in data: + json_frame_num = x['frame_number'] + if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail': + to_keep.append(x) + elif json_frame_num >=start_frame and json_frame_num <= end_frame: + x['frame_number_original'] = x['frame_number'] + x['frame_number'] -= start_frame + + to_keep.append(x) + + + trim_duration = end_time - start_time + + if os.path.exists(movie_new_path): + os.remove(movie_new_path) + + if not empty_video: + cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}' + return_code = os.system(cmd) + + with open(js_new_path,'w') as jnp: + json.dump(to_keep, jnp, indent=4) + + if empty_video or return_code == 0: + os.remove(movie_path) + os.remove(js_path) + #shutil.move(movie_path, archive_dir) + #shutil.move(js_path ,archive_dir) + + + + + + + + +def trim_video(jspath): + with open(jspath,'r') as jj: + data = json.load(jj) + + start_frame = math.inf + end_frame = -math.inf + + + skip_trimming = False + empty_video = True + for x in data: + if len(x['boxes']) > 0: + json_frame_num = x['frame_number'] + + if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail': + skip_trimming = True + elif max(x['scores']) > 0.25: + start_frame = min(start_frame, json_frame_num) + end_frame = max(end_frame, json_frame_num) + empty_video = False + + + + if not skip_trimming: + execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video) + + + + + + diff --git a/trim_video.py b/trim_video.py new file mode 100644 index 0000000..10d31b8 --- /dev/null +++ b/trim_video.py @@ -0,0 +1,106 @@ + +from ffprobe import FFProbe +import ffmpeg +import shutil +import json +import math +import os +def execute_trim_video(js_path, start_frame, end_frame, empty_video = False): + with open(js_path,'r') as jj: + data = json.load(jj) + + movie_dir = os.path.dirname(js_path) + archive_dir = os.path.join(movie_dir, 'original') + '/' + + + if not os.path.exists(archive_dir): + os.mkdir(archive_dir) + + rt_name = os.path.splitext(js_path)[0] + movie_path = rt_name+'.mp4' + movie_new_path = rt_name+'_trimmed.mp4' + js_new_path = rt_name + '_trimmed.json' + + frame_pad = 5 #seconds + vid_info = FFProbe(movie_path) + vid_stream = vid_info.video[0] + + framerate = vid_stream.framerate + duration = float(vid_stream.duration) + max_frames = duration * framerate + + start_frame = max(0, start_frame - frame_pad * framerate) + end_frame = min( max_frames, end_frame + frame_pad * framerate) + + start_time = start_frame / framerate + end_time = end_frame / framerate + + + + to_keep = list() + for x in data: + json_frame_num = x['frame_number'] + if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail': + to_keep.append(x) + elif json_frame_num >=start_frame and json_frame_num <= end_frame: + x['frame_number_original'] = x['frame_number'] + x['frame_number'] -= start_frame + + to_keep.append(x) + + + trim_duration = end_time - start_time + + if os.path.exists(movie_new_path): + os.remove(movie_new_path) + + if not empty_video: + cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}' + return_code = os.system(cmd) + + with open(js_new_path,'w') as jnp: + json.dump(to_keep, jnp, indent=4) + + if empty_video or return_code == 0: + + shutil.move(movie_path, archive_dir) + shutil.move(js_path ,archive_dir) + + + + + + + + +def trim_video(jspath): + with open(jspath,'r') as jj: + data = json.load(jj) + + start_frame = math.inf + end_frame = -math.inf + + + skip_trimming = False + empty_video = True + for x in data: + if len(x['boxes']) > 0: + json_frame_num = x['frame_number'] + + if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail': + skip_trimming = True + elif max(x['scores']) > 0.05: + start_frame = min(start_frame, json_frame_num) + end_frame = max(end_frame, json_frame_num) + empty_video = False + + + + if not skip_trimming: + execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video) + + + + + + diff --git a/undo_move_originals.py b/undo_move_originals.py new file mode 100644 index 0000000..f747699 --- /dev/null +++ b/undo_move_originals.py @@ -0,0 +1,58 @@ +import os +import random +from multiprocessing import Pool +import sys +sys.path.append('/home/thebears/Seafile/Designs/ML') + +#rtpath = '/srv/ftp/hummingbird/2021' +rtpath = os.path.abspath(sys.argv[1]) +#rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/') + +didir = list() +for di,dnames, fns in os.walk(rtpath): + numbers = di.split('/')[-3:] + if all([n.isnumeric() for n in numbers]): + didir.append(di) + + + + +def list_files(path): + for file in os.listdir(path): + if os.path.isfile(os.path.join(path, file)): + yield file + +import shutil +import os +for cdr in didir: + + files_origin = list() + for di, dnames, fns in os.walk(cdr): + if di == cdr: + pass + else: + files_origin.extend([os.path.join(di,f) for f in fns]) + + + for src_file in files_origin: + fname = os.path.basename(src_file) + targ_file = os.path.join(cdr, fname) + os.rename(src_file, targ_file) + + + dirs_purge = list() + for di, dnames, fns in os.walk(cdr): + for d in dnames: + cpath = os.path.join(di, d) + fna = [x for x in list_files(cpath)] + if len(fna) == 0: + dirs_purge.append(cpath) + + for d in dirs_purge: + if os.path.exists(d): + shutil.rmtree(d) + + + + + diff --git a/utils (SFConflict ispatel@live.com 2021-09-27-14-27-04).py b/utils (SFConflict ispatel@live.com 2021-09-27-14-27-04).py new file mode 100644 index 0000000..72728d4 --- /dev/null +++ b/utils (SFConflict ispatel@live.com 2021-09-27-14-27-04).py @@ -0,0 +1,322 @@ +from collections import defaultdict, deque +import datetime +import errno +import os +import sys +import time + +import torch +import torch.distributed as dist + + + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value, + ) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + data_list = [None] * world_size + dist.all_gather_object(data_list, data) + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError( + "'{}' object has no attribute '{}'".format(type(self).__name__, attr) + ) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append("{}: {}".format(name, str(meter))) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + ] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print( + "{} Total time: {} ({:.4f} s / it)".format( + header, total_time_str, total_time / len(iterable) + ) + ) + + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): + def f(x): + if x >= warmup_iters: + return 1 + alpha = float(x) / warmup_iters + return warmup_factor * (1 - alpha) + alpha + + return torch.optim.lr_scheduler.LambdaLR(optimizer, f) + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if is_master or force or True: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) + args.gpu = args.rank % torch.cuda.device_count() + else: + print("Not using distributed mode") + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = "nccl" + print( + "| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True + ) + torch.distributed.init_process_group( + backend=args.dist_backend, + init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank, + ) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) diff --git a/utils.py b/utils.py index 88baf61..c0db43e 100644 --- a/utils.py +++ b/utils.py @@ -256,8 +256,8 @@ def setup_for_distributed(is_master): def print(*args, **kwargs): force = kwargs.pop("force", False) - if is_master or force: - builtin_print(*args, **kwargs) + if is_master or force or True: + print(*args, **kwargs) __builtin__.print = print