yawc

2021-09-27 16:02:11 -04:00
parent 90edf9bd45
commit e18232df84
35 changed files with 3037 additions and 78 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,23 @@
 models/20210720_155640.oth filter=lfs diff=lfs merge=lfs -text
 models/20210921_190116.json filter=lfs diff=lfs merge=lfs -text
 models/20210921_190419.json filter=lfs diff=lfs merge=lfs -text
 models/birds_only.json filter=lfs diff=lfs merge=lfs -text
 models/hummingbird.json filter=lfs diff=lfs merge=lfs -text
 models/20210701_202822.json filter=lfs diff=lfs merge=lfs -text
 models/20210720_155640.json filter=lfs diff=lfs merge=lfs -text
 models/hummingbird.pth filter=lfs diff=lfs merge=lfs -text
 models/20210921_190436.oth filter=lfs diff=lfs merge=lfs -text
 models/20210921_190436.pth filter=lfs diff=lfs merge=lfs -text
 models/20210720_155640.pth filter=lfs diff=lfs merge=lfs -text
 models/20210810_104206.json filter=lfs diff=lfs merge=lfs -text
 models/20210921_190257.json filter=lfs diff=lfs merge=lfs -text
 models/20210921_190436.json filter=lfs diff=lfs merge=lfs -text
 models/20210921_190436.sth filter=lfs diff=lfs merge=lfs -text
 models/20210923_153848.json filter=lfs diff=lfs merge=lfs -text
 models/20210701_202822.pth filter=lfs diff=lfs merge=lfs -text
 models/20210720_155509.json filter=lfs diff=lfs merge=lfs -text
 models/20210923_174607.json filter=lfs diff=lfs merge=lfs -text
 models/birds_only.pth filter=lfs diff=lfs merge=lfs -text
 models/birds_only.oth filter=lfs diff=lfs merge=lfs -text
 models/20210923_153808.json filter=lfs diff=lfs merge=lfs -text
 models/20210923_174546.json filter=lfs diff=lfs merge=lfs -text
--- a/anaylze_visits.py
+++ b/anaylze_visits.py
@@ -0,0 +1,106 @@
 # %%
 folder = r'\\192.168.1.242\ftp\hummingbird\2021\08\\08'
 import os
 dest_path = r'C:\\Users\\TheBears\\Desktop\\'
 files = os.listdir(folder)
 import numpy as np
 f_jsons = list()
 for f in files:
    if f.endswith('.json'):
        f_jsons.append(os.path.join(folder,f))
 import json
 import datetime as dt
 def get_obs_in_json(curr_js):
    all_obs = list()
    curf = os.path.basename(curr_js)
    curf = curf.split('.')[0].replace('_trimmed','').split('_')[-1]
    dt_obj = dt.datetime.strptime(curf, '%Y%m%d%H%M%S')
    with open(curr_js,'r') as ff:
        data = json.load(ff)
    for i in data:
        test_field = 'frame_number_original'
        if test_field in i:
            sec_offset = i[test_field]/30
        else:
            sec_offset = i['frame_number']
            if sec_offset == 'thumbnail':
                continue
        if len(i['boxes']) > 0:
            obs = [dt_obj + dt.timedelta(seconds=sec_offset/2), max(i['scores'])]
            all_obs.append(obs)
    cob = all_obs
    ret_this = []
    if len(cob) > 0:
        tie = np.asarray([x[0].timestamp() for x in cob])
        tavg = dt.datetime.fromtimestamp(int(np.average(tie)))
        scavg = np.max([x[1] for x in cob])
        ret_this = [tavg, scavg]
    return all_obs, ret_this
 entire_obs = list()
 obs_sep = list()
 obs_fnames = list()
 for curr_js in f_jsons:
    cc, avgg = get_obs_in_json(curr_js)
    if len(avgg) > 0:
        obs_sep.append(avgg)
        obs_fnames.append(curr_js.replace('.json','.mp4'))
    entire_obs.extend(cc)
 def scatter_dt(entire_obs, fname, hover_data = None):
    import numpy as np
    obs_array = np.asarray(entire_obs)
    x = np.asarray(obs_array[:,0], dtype=np.datetime64)
    y = obs_array[:,1]
    import plotly.express as px
    #fig = px.scatter(x = x,y = y, hovertext=obs_fnames)
    import plotly.graph_objects as go
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        hovertext=hover_data,
        hoverinfo="text",
        marker=dict(
            color="green"
        ),
        mode='markers',
        showlegend=False
    ))
    fig.write_html(os.path.join(dest_path,fname))
 scatter_dt(entire_obs, 'hbirds.html')
 scatter_dt(obs_sep, 'indiv.html', hover_data = obs_fnames)
 import plotly.express as px
 scores = np.asarray([x[1] for x in obs_sep])
 scors= np.sort(scores)
 cumu_total = len(scores) - np.arange(0,len(scores))
 fig = px.scatter( x= scors, y = cumu_total)
 fig.write_html(os.path.join(dest_path, 'cumul.html'))
 # %%
--- a/convert_to_onnx.py
+++ b/convert_to_onnx.py
@@ -0,0 +1,40 @@
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 from model import Model
 import torch
 device = 'cpu'
 model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json
 newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth',''))
 with open(newest_model + '.json','r') as nmj:
    model_json = json.load(nmj)
 cats = model_json['categories']
 cats.sort(key=lambda x: x['new_id'])
 num_cat = len(cats) + 1
 model_type = model_json['model_type']
 model = Model(num_cat, model_type)
 labels = [x['name'] for x in cats]
 model.load_state_dict(
    torch.load(newest_model + '.pth', map_location = torch.device(device))
 )
 model.eval()
 # %%
 onnx_model_path = "models"
 onnx_model_name = "hbirds.onnx"
 os.makedirs(onnx_model_path, exist_ok=True)
 full_model_path = os.path.join(onnx_model_path, onnx_model_name)
 # model export into ONNX format
 x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
 torch.onnx.export(model, x, full_model_path, opset_version = 12)
 # %%
 import cv2
 opencv_net = cv2.dnn.readNetFromONNX(full_model_path)
 print("OpenCV model was successfully read. Layer IDs: \n", opencv_net.getLayerNames())
--- a/2021-09-27-14-36-56).py
+++ b/2021-09-27-14-36-56).py
@@ -0,0 +1,192 @@
 # %%
 import os
 from unicodedata import category
 import torch
 from PIL import Image
 import sys
 import json
 import torch
 import transforms as T
 import os
 import numpy as np
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 if sys.platform == "win32":
    PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
 else:
    PATH_ROOT = '/home/thebears/data/ml/inaturalist'
 def get_transform(train):
    trsf = []
    trsf.append(T.ToTensor())
    if train:
        trsf.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(trsf)
 def create_map(list_in, from_key, to_key):
    cmap = dict()
    for l in list_in:
        cmap[l[from_key]] = l[to_key]
    return cmap
 class iNaturalistDataset(torch.utils.data.Dataset):
    def __init__(self, validation=False, train=False, species=None):
        self.validation = validation
        self.train = train
        if (not self.train and not self.validation) or (self.train and self.validation):
            raise Exception("Need to do either train or validation")
        self.transform = get_transform(self.train)
        if validation:
            json_path = os.path.join(
                PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json"
            )
        elif train:
            json_path = os.path.join(
                PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json"
            )
        with open(json_path, "r") as rj:
            f = json.load(rj)
        self.raw_data = f
        categories = list()
        image_info = dict()
        orig_id_to_name = dict()
        for idx, category in enumerate(f["categories"]):
            do_add = False
            orig_id_to_name[category["id"]] = category
            if species is None:
                do_add = True
            elif category["name"] in species:
                do_add = True
            elif category['supercategory'] == 'Mammalia':
                do_add = True
            if do_add:
                print('Adding '+str(category))
                categories.append(category)
        categories = sorted(categories, key=lambda k: k["name"])
        for idx, cat in enumerate(categories):
            cat["new_id"] = idx + 1
        orig_to_new_id = create_map(categories, "id", "new_id")
        for annot in f["annotations"]:
            if annot["category_id"] in orig_to_new_id:
                annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
                id = annot["image_id"]
                if id not in image_info:
                    image_info[id] = dict()
                annot["bbox"][2] += annot["bbox"][0]
                annot["bbox"][3] += annot["bbox"][1]
                image_info[id]["annotation"] = annot
        for img in f["images"]:
            id = img["id"]
            path = os.path.join(PATH_ROOT, img["file_name"])
            height = img["height"]
            width = img["width"]
            if id in image_info:
                image_info[id].update({"path": path, "height": height, "width": width})
        for idx, (id, im_in) in enumerate(image_info.items()):
            im_in["idx"] = idx
        self.images = image_info
        self.categories = categories
        self.orig_id_to_name = orig_id_to_name
        self.idx_to_id = [x for x in self.images]
        self.num_classes = len(self.categories) + 1
        self.num_samples = len(self.images)
    def __len__(self):
        return self.num_samples
    def __getitem__(self, idx):
        idd = self.idx_to_id[idx]
        c_image = self.images[idd]
        img_path = c_image["path"]
        img = Image.open(img_path).convert("RGB")
        annot = c_image["annotation"]
        bbox = annot["bbox"]
        boxes = bbox
        target = dict()
        target["boxes"] = torch.as_tensor([boxes])
        target["labels"] = torch.as_tensor(
            [annot["new_category_id"]], dtype=torch.int64
        )
        target["image_id"] = torch.tensor([annot["image_id"]])
        target["area"] = torch.as_tensor([annot["area"]])
        target["iscrowd"] = torch.zeros((1,), dtype=torch.int64)
        if self.transform is not None:
            img, target = self.transform(img, target)
        return img, target
 if False:
    train_dataset = iNaturalistDataset(train=True)
    loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json")
    with open(loc_path, "r") as lfile:
        locs = json.load(lfile)
    from bear_utils import get_distance_from_home
    # %%
    category_distances = dict()
    inserts = 0
    for loc in locs:
        lat = loc["lat"]
        lon = loc["lon"]
        im_id = loc["id"]
        if lat is None or lon is None:
            continue
        ff = get_distance_from_home(lat, lon)
        if im_id in train_dataset.images:
            inserts += 1
            train_dataset.images[im_id]["distance"] = ff
            category_id = train_dataset.images[im_id]["annotation"]["category_id"]
            if category_id not in category_distances:
                category_distances[category_id] = list()
            category_distances[category_id].append(ff)
    # %%
    from EcoNameTranslator import to_common
    for k, v in category_distances.items():
        name = train_dataset.orig_id_to_name[k]
        if np.average(np.asarray(v) < 250) > 0.1:
            if name["supercategory"] == "Aves":
                print(len(v), to_common([name["name"]]))
    # %%
    fc = sorted(
        category_distances, key=lambda x: len(category_distances[x]), reverse=True
    )
    for x in fc:
        cc = train_dataset.orig_id_to_name[x]
        if cc["supercategory"] == "Aves":
            ou = to_common([cc["name"]])
            print(ou, len(category_distances[x]))
 # %%
--- a/data.py
+++ b/data.py
@@ -68,7 +68,6 @@ class iNaturalistDataset(torch.utils.data.Dataset):
            if species is None:
                do_add = True
            elif category["name"] in species:
                print(category["name"])
                do_add = True
            if do_add:
--- a/2021-09-27-14-36-56).py
+++ b/2021-09-27-14-36-56).py
@@ -0,0 +1,110 @@
 import math
 import sys
 import time
 import torch
 import torchvision.models.detection.mask_rcnn
 from coco_utils import get_coco_api_from_dataset
 from coco_eval import CocoEvaluator
 import utils
 def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)
    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)
        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()
        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        if lr_scheduler is not None:
            lr_scheduler.step()
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
    return metric_logger
 def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types
@torch.no_grad()
 def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'
    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)
    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()
    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
--- a/engine.py
+++ b/engine.py
@@ -20,15 +20,15 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)
        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
-
+        print('Hey I''m here')
        losses = sum(loss for loss in loss_dict.values())
        # reduce losses over all GPUs for logging purposes
--- a/filter_species.py
+++ b/filter_species.py
@@ -0,0 +1,45 @@
 import csv
 csv_path = '/home/thebears/data/ebirddata/output_mi/ebd_US-MI_relMay-2021.txt'
 fields = ['COMMON NAME','SCIENTIFIC NAME','LATITUDE', 'LONGITUDE','OBSERVATION DATE']
 tokeep = list()
 idx = 0
 with open(csv_path) as csvfile:
    data = csv.DictReader(csvfile, delimiter='\t')
    for idx, row in enumerate(data):
        if idx % 1000 == 0:
            print(idx)
 # %%
 sc = dict()
 for x in tokeep:
    scientific_name = x['SCIENTIFIC NAME']
    if scientific_name not in sc:
        sc[scientific_name] = [x['COMMON NAME'], 0]
    sc[scientific_name][1]+=1
 # %%
 sor = {x:sc[x] for x in sorted(sc, key = lambda x : sc[x][1], reverse=True)}
 with open('/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv','w') as csvfile:
    finames = ['species','name','count']
    writer = csv.DictWriter(csvfile, fieldnames=finames)
    writer.writeheader()
    for key,values in sor.items():
        fn = {'species':key, 'name':values[0], 'count':values[1]}
        writer.writerow(fn)
--- a/flag_videos_to_keep.py
+++ b/flag_videos_to_keep.py
@@ -1,11 +1,15 @@
 import json
 import shutil
 import os
 import numpy as np
 source_path = '/srv/ftp/hummingbird/2021'
 #target_path = '/home/thebears/Videos/ftp'
-target_path = '/home/thebears/ftp_links'
+target_path = '/home/thebears/data/ftplinks'
-
+import scipy.stats
 target_mean = 0.4
 target_std = 1
 gauss = scipy.stats.norm(target_mean, target_std)
 have_json = set()
 for di, _, fns in os.walk(source_path):
@@ -16,38 +20,69 @@ for di, _, fns in os.walk(source_path):
 def box_area(box):
   return  (box[3]-box[1]) * (box[2] - box[0]) / 100000
 do_stop = False
 fracs = dict()
 saveo = None
 saveb = None
 def gaussian(x, mu = target_mean, sig = target_std):
    return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))
 scc = list()
 for c_js in have_json:
    hits = 0
    total = 0
    o = json.load(open(c_js,'r'))
    if c_js.endswith('Hummingbird_01_20210701105440.json'):
        saveo = o
    if c_js.endswith('Hummingbird_01_20210627111405.json'):
        saveb = o
    avg = 0
    max_sc = 0
    for i in o:
-        total += 1
+#        for x,b in zip(i['scores'], i['boxes']):
-#        if len([x for x in i['scores'] if x > 0.1]) > 0:
+#            scc.append((x,box_area(b)))
        if len(i['boxes']) > 0:
            hits += 1
-    fracs[c_js] = [hits, total]
+        if len(i['scores']) > 0:
            css = max(i['scores']) 
            mf = gaussian(box_area(i['boxes'][0]))
            avg += css * mf / len(o)
-    if do_stop:
+    fracs[c_js] = avg
-        break
+
 ratios = dict()
 for x,y in fracs.items():
-    ratios[x] = y[0]/y[1]
+    ratios[x] = y
-# %%
+
 sorted_ratios = {x:ratios[x] for x in sorted(ratios, key=lambda x: ratios[x])}
 import shutil
 for d in os.listdir(target_path):
    shutil.rmtree(target_path + '/' + d)
 import math
 dir_created = set()
-for fname, ratio in ratios.items():
+for idx, (fname, ratio) in enumerate(sorted_ratios.items()):
-    cr = math.floor(ratio * 10)/10
+    cr = math.floor(100*ratio)
-    target_dir = os.path.join(target_path, str(cr))
+    target_dir = os.path.join(target_path, '{0:02g}'.format(cr))
    if not os.path.exists(target_dir) and target_dir not in dir_created:
        os.mkdir(target_dir)
        dir_created.add(target_dir)
@@ -59,4 +94,30 @@ for fname, ratio in ratios.items():
    source_file = fname.replace('.json','.mp4')
    target_file = os.path.join(target_dir, os.path.basename(source_file))
    os.symlink(source_file, target_file)
    source_file = fname.replace('.json','.json')
    target_file = os.path.join(target_dir, os.path.basename(source_file))
    os.symlink(source_file, target_file)
 # %%
 if False:
    plt.close('all')
    inp = saveo
    sco = list()
    for x in inp:
        if len(x['scores']) > 0:
            cscore = max(x['scores'])
            ar = box_area(x['boxes'][0])/ 100000
        else:
            cscore = 0
            ar = 0
        sco.append((cscore, ar))
    plt.plot(sco)
 # %%
    distr = np.asarray(scc)
--- a/generate_matrix_sets.py
+++ b/generate_matrix_sets.py
@@ -0,0 +1,25 @@
 vpath = '/srv/ftp/railing/2021/09/04/Railing_00_20210904070617.mp4'
 stack_path = os.path.splitext(vpath)[0]
 import cv2
 import numpy
 import torch
 cap = cv2.VideoCapture(vpath)
 frame_num = 0
 step_frame = 5
 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 # %%
 images = list()
 stack_num = 0
 for frame_num in range(0, total_frames, step_frame):
    img = cap.read()[1].copy()
    images.append(img)
    if len(images) == 16:
        stack_num +=1
        break
 # %%
 imgs = np.moveaxis(np.stack(images), 3, 1)
 bt = torch.FloatTensor(imgs)
 tensor_path = f'{stack_path}.{stack_num:03}.pt'
--- a/identify_behavior.py
+++ b/identify_behavior.py
@@ -0,0 +1,64 @@
 import os
 rtdir = '/srv/ftp/hummingbird/2021/07/21'
 #rtdir = os.getcwd()
 files_read = list()
 for cdir, _,  files in os.walk(rtdir):
    for f in files:
        if f.endswith('.json'):
            files_read.append(os.path.join(cdir, f))
 import json
 fcontents = dict()
 for f in files_read:
    with open(f) as ff:
        fcontents[f] = json.load(ff)
 def compute_2d_scores(cbbs):
    locs = list()
    scores = list()
    n_scores = 0
    for cbb in cbbs:
        if len(cbb['scores']) > 0:
            n_scores+=1
            first_box = cbb['boxes'][0]
            first_score = cbb['scores'][0]
            x_cent = np.mean(first_box[0::2])
            y_cent = np.mean(first_box[1::2])
            locs.append([x_cent, y_cent])
            scores.append(first_score)
    center = np.average(locs, weights=scores, axis=0)
    stddev = np.average(np.linalg.norm(center - locs,axis=1),weights=scores)
    return stddev, scores
 data = {'file':list(), 'stddev':list(),'weights':list()}
 for fname, contents in fcontents.items():
    if len(contents) > 0:
        data['file'].append(fname)
        sdev, sc = compute_2d_scores(contents)
        data['stddev'].append(sdev)
        data['weights'].append(np.average(sc))
 import pandas as pd
 df = pd.DataFrame(data)
 fil = df.loc[(df['weights'] > 0.4) & (df['stddev'] > 400) ]
--- a/purge_originals.py
+++ b/purge_originals.py
@@ -0,0 +1,58 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = os.path.abspath(sys.argv[1])
 #rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/')
 didir = list()
 for di,dnames, fns in os.walk(rtpath):
    numbers = di.split('/')[-3:]
    if all([n.isnumeric() for n in numbers]):
        didir.append(di)
 def list_files(path):
    for file in os.listdir(path):
        if os.path.isfile(os.path.join(path, file)):
            yield file
 import shutil
 import os
 for cdr in  didir:
    files_origin = list()
    for di, dnames, fns in os.walk(cdr):
        if di == cdr:
            pass
        else:
            files_origin.extend([os.path.join(di,f) for f in fns])
    for src_file in files_origin:
        fname = os.path.basename(src_file)
        targ_file = os.path.join(cdr, fname)
        os.rename(src_file, targ_file)
    dirs_purge = list()
    for di, dnames, fns in os.walk(cdr):
        for d in dnames:
            cpath = os.path.join(di, d)
            fna = [x for x in list_files(cpath)]
            if len(fna) == 0:
                dirs_purge.append(cpath)
    for d in dirs_purge:
        if os.path.exists(d):
            shutil.rmtree(d)
--- a/2021-08-11-09-24-52).py
+++ b/2021-08-11-09-24-52).py
@@ -0,0 +1,59 @@
 import os
 #rtdir = '/srv/ftp/hummingbird/2021/07/21'
 rtdir = os.getcwd()
 files_read = list()
 for cdir, _,  files in os.walk(rtdir):
    for f in files:
        if f.endswith('.json'):
            files_read.append(os.path.join(cdir, f))
 import json
 fcontents = dict()
 for f in files_read:
    with open(f) as ff:
        fcontents[f] = json.load(ff)
 to_purge = dict()
 for fname, contents in fcontents.items():
    if len(contents) > 0:
        nscores = 0
        for x in contents:
            if len(x['scores']) > 0:
                nscores+= max(x['scores']) >= 0.6
 #        nscores = sum([max(x['scores']) for x in contents])
 #        nscores = sum([len(x['scores']) for x in contents])
        if nscores == 0:
            to_purge[fname] = nscores
 delete_list = set()
 for f_json, n_scores in to_purge.items():
    vid_path = f_json.rstrip('.json') + '.mp4'
    img_path = vid_path.rstrip('.mp4')+'.jpg'
    if not os.path.exists(img_path):
        img_path_sp = img_path.split('_')
        if 'trimmed' in img_path:
            img_path_sp[-2] = str(int(img_path_sp[-2].rstrip('.jpg'))+1) 
        else:
            img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
        img_path = '_'.join(img_path_sp)
    delete_list.update({vid_path,img_path, f_json})
 print(f'Deleting {len(delete_list)} files')
 for f in delete_list:
    if os.path.exists(f):
 #        pass
        os.remove(f)
--- a/2021-09-27-14-27-04).py
+++ b/2021-09-27-14-27-04).py
@@ -0,0 +1,49 @@
 import os
 #rtdir = '/srv/ftp/hummingbird/2021/07/21'
 rtdir = os.getcwd()
 files_read = list()
 for cdir, _,  files in os.walk(rtdir):
    for f in files:
        if f.endswith('.json'):
            files_read.append(os.path.join(cdir, f))
 import json
 fcontents = dict()
 for f in files_read:
    with open(f) as ff:
        fcontents[f] = json.load(ff)
 to_purge = dict()
 for fname, contents in fcontents.items():
    if len(contents) > 0:
        nscores = sum([len(x['scores']) for x in contents])
        if nscores == 0:
            to_purge[fname] = nscores
 delete_list = set()
 for f_json, n_scores in to_purge.items():
    vid_path = f_json.rstrip('.json') + '.mp4'
    img_path = vid_path.rstrip('.mp4')+'.jpg'
    if not os.path.exists(img_path):
        img_path_sp = img_path.split('_')
        img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
        img_path = '_'.join(img_path_sp)
    delete_list.update({vid_path,img_path, f_json})
 print(delete_list)
 for f in delete_list:
    if os.path.exists(f):
        os.remove(f)
--- a/purge_videos.py
+++ b/purge_videos.py
@@ -0,0 +1,54 @@
 import os
 #rtdir = '/srv/ftp/hummingbird/2021/07/21'
 rtdir = os.getcwd()
 files_read = list()
 for cdir, _,  files in os.walk(rtdir):
    for f in files:
        if f.endswith('.json'):# and 'trimmed' not in f:
            files_read.append(os.path.join(cdir, f))
 import json
 fcontents = dict()
 for f in files_read:
    with open(f) as ff:
        try:
            fcontents[f] = json.load(ff)
        except:
            print(f, ' Failed')
 to_purge = list()
 for fname, contents in fcontents.items():
    if len(contents) > 0:
        nscores = list()
        for x in contents:
            if len(x['scores']) > 0:
                nscores.append(max(x['scores']))
        if len(nscores) == 0 or max(nscores) < 0.60:
            to_purge.append(fname)
 delete_list = set()
 for f_json in to_purge:
    vid_path = f_json.rstrip('.json') + '.mp4'
    img_path = vid_path.rstrip('.mp4')+'.jpg'
    if not os.path.exists(img_path) and 'trimmed' not in img_path:
        img_path_sp = img_path.split('_')
        img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
        img_path = '_'.join(img_path_sp)
    delete_list.update({vid_path,img_path, f_json})
 print(delete_list)
 for f in delete_list:
    if os.path.exists(f):
        os.remove(f)
--- a/quantize_model.py
+++ b/quantize_model.py
@@ -0,0 +1,155 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 import json
 import cv2
 import random
 from model import Model
 import socket
 from torchvision.utils import draw_bounding_boxes
 import torch as t
 import matplotlib.pyplot as plt
 import matplotlib
 no_cuda =  socket.gethostname() == 'tree'
 device='cpu'
 model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
 with open(model_path + '.json','r') as nmj:
    model_json = json.load(nmj)
 cats = model_json['categories']
 cats.sort(key=lambda x: x['new_id'])
 num_cat = len(cats) + 1
 model_type = model_json['model_type']
 model = Model(num_cat, model_type)
 labels = [x['name'] for x in cats]
 model.load_state_dict(
    torch.load(model_path + '.pth', map_location = torch.device(device))
 )
 model.eval()
 # %%
 backend = "fbgemm"
 model.qconfig = torch.quantization.get_default_qconfig(backend)
 torch.backends.quantized.engine = backend
 model_static_quantized = torch.quantization.prepare(model, inplace=False)
 model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)
 def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')
 print_model_size(model_static_quantized) 
 # %%
 results = list()
 vid_path = '/srv/ftp/hummingbird/2021/07/28/Hummingbird_01_20210728063745.mp4'
 cap = cv2.VideoCapture(vid_path)
 frame_num = 0
 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 step_frame = 15
 import time
 idces = 0
 st = time.time()
 for frame_num in range(0, total_frames, step_frame):
    srcimg = cap.read()[1]
    print(frame_num)
    if srcimg is None:
        break
    image = srcimg[:, :, ::-1].copy()
    o = T.ToTensor()(image)
    img = o[None, :, :, :]
    with torch.no_grad():
        ou = model(img)
    print(ou)
    for i in range(step_frame):
        img = cap.read()[1];
        if img is None:
            break
 # %%
 et = time.time()
 model(img)
 st = time.time()
 print(st-et)
 # %%
 img_use = img
 st = time.time()
 features = model.backbone(img_use)
 print(time.time() - st)
 st = time.time()
 proposals = model.rpn(img_use, features)
 print(time.time() - st)
 st = time.time()
 head = model.head(features, proposals)
 print(time.time() - st)
 # %%
 # vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/2021-08-11-09-24-52).py
+++ b/2021-08-11-09-24-52).py
@@ -0,0 +1,46 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = os.path.abspath(sys.argv[1])
 didir = list()
 for di,dnames, fns in os.walk(rtpath):
    numbers = di.split('/')[-3:]
    if all([n.isnumeric() for n in numbers]):
        didir.append(di)
 def list_files(path):
    for file in os.listdir(path):
        if os.path.isfile(os.path.join(path, file)):
            yield file
 for cdir in didir:
    files = [x for x in list_files(cdir)]
    todelete = list()
    for y in files:
        if 'trimmed' not in y:
            ff = os.path.splitext(y)
            fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]]))
            if os.path.exists(fcheck):
                todelete.append(fcheck)
    to_purge = set()
    for de in todelete:
        if os.path.exists(de.replace('_trimmed','')):
            to_purge.add(de)
    for y in to_purge:
        os.remove(y)
--- a/reset_trim.py
+++ b/reset_trim.py
@@ -0,0 +1,47 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = os.path.abspath(sys.argv[1])
 rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/')
 didir = list()
 for di,dnames, fns in os.walk(rtpath):
    numbers = di.split('/')[-3:]
    if all([n.isnumeric() for n in numbers]):
        didir.append(di)
 def list_files(path):
    for file in os.listdir(path):
        if os.path.isfile(os.path.join(path, file)):
            yield file
 for cdir in didir:
    files = [x for x in list_files(cdir)]
    todelete = list()
    for y in files:
        if 'trimmed' not in y:
            ff = os.path.splitext(y)
            fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]]))
            if os.path.exists(fcheck):
                todelete.append(fcheck)
    to_purge = set()
    for de in todelete:
        if os.path.exists(de.replace('_trimmed','')):
            to_purge.add(de)
    for y in to_purge:
        os.remove(y)
--- a/2021-09-27-14-27-04).py
+++ b/2021-09-27-14-27-04).py
@@ -0,0 +1,38 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 from score_video import score_video
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = sys.argv[1]
 cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}'
 have_json = set()
 fnames = set()
 for di,_, fns in os.walk(rtpath):
    for fn in fns:
        if fn.endswith('.mp4'):
            fnames.add(os.path.join(di,fn))
        elif fn.endswith('.json'):
            have_json.add(os.path.join(di,fn.replace('.json','.mp4')))
 files_to_score = list(fnames - have_json)
 random.shuffle(files_to_score)
 def try_catch_chunk(vids):
    try:
        score_video(vids)
 #        score_image(vids)
    except Exception as e:
        print(e)
 lst = files_to_score
 n = 25
 chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
 # %%
 if __name__ == '__main__':
    with Pool(2) as p:
       output = p.map(try_catch_chunk,chunks)
      #  output = p.map(score_video,chunks)
--- a/score_in_directory.py
+++ b/score_in_directory.py
@@ -4,8 +4,8 @@ from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 from score_video import score_video
-
+#rtpath = '/srv/ftp/hummingbird/2021'
-rtpath = '/srv/ftp/hummingbird/2021'
+rtpath = sys.argv[1]
 cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}'
 have_json = set()
 fnames = set()
@@ -17,12 +17,15 @@ for di,_, fns in os.walk(rtpath):
            have_json.add(os.path.join(di,fn.replace('.json','.mp4')))
 files_to_score = list(fnames - have_json)
-random.shuffle(files_to_score)
+files_to_score = sorted(files_to_score)
 print(files_to_score)
 #random.shuffle(files_to_score)
 def try_catch_chunk(vids):
    try:
        score_video(vids)
 #        score_image(vids)
    except Exception as e:
        print(e)
@@ -31,5 +34,7 @@ n = 25
 chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
 # %%
 if __name__ == '__main__':
-    with Pool(4) as p:
+    with Pool(2) as p:
-        output = p.map(score_video,chunks)
+       output = p.map(try_catch_chunk,chunks)
      #  output = p.map(score_video,chunks)
--- a/2021-07-07-14-37-14).py
+++ b/2021-07-07-14-37-14).py
@@ -0,0 +1,131 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 import json
 import cv2
 import random
 from model import Model
 import socket
 #vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4'
 def score_video(vid_in_list):
    no_cuda =  socket.gethostname() == 'tree'
    device='cpu'
    model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json
    newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth',''))
    with open(newest_model + '.json','r') as nmj:
        model_json = json.load(nmj)
    cats = model_json['categories']
    cats.sort(key=lambda x: x['new_id'])
    num_cat = len(cats) + 1
    model_type = model_json['model_type']
    model = Model(num_cat, model_type)
    labels = [x['name'] for x in cats]
    model.load_state_dict(
        torch.load(newest_model + '.pth', map_location = torch.device(device))
    )
    model.eval()
    if isinstance(vid_in_list, str):
        vid_in_list = [vid_in_list]
    for idx_vid, vid_in in enumerate(vid_in_list):
        try:
            vid_path = os.path.abspath(vid_in)
            scores_json = vid_path.rsplit('.')[0]+'.json'
            print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
            if os.path.exists(scores_json):
                print(f"JSON {scores_json} already exists")
                exit()
            vid_dir = os.path.dirname(vid_path)
            os.system(f'sudo chmod 777 {vid_dir}')
            cap = cv2.VideoCapture(vid_path)
            from torchvision.utils import draw_bounding_boxes
            import torch as t
            import matplotlib.pyplot as plt
            import matplotlib
            cap = cv2.VideoCapture(vid_path)
            frame_num = 0
            results = list()
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            for frame_num in range(0, total_frames, 30):
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
                img = cap.read()[1]
                image = img[:, :, ::-1].copy()
                o = T.ToTensor()(image)
                img = o[None, :, :, :]
                with torch.no_grad():
                    ou = model(img)
                if len(ou) > 0:
                    ofscore = ou[0]
                    for k in ofscore:
                        ofscore[k] = ofscore[k].numpy().tolist() 
                    ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
                    ofscore['frame_number'] = frame_num
                    results.append(ofscore)
            with open(scores_json,'w') as jj:
                json.dump(results, jj, indent=4)
        except Exception as e:
            print(e)
 if __name__ == '__main__':
    score_video(sys.argv[1])
 # %%
 # vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/2021-09-27-14-27-04).py
+++ b/2021-09-27-14-27-04).py
@@ -0,0 +1,157 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 import json
 import cv2
 import random
 from model import Model
 import socket
 from torchvision.utils import draw_bounding_boxes
 import torch as t
 import matplotlib.pyplot as plt
 import matplotlib
 #vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4'
 def score_video(vid_in_list):
    no_cuda =  socket.gethostname() == 'tree'
    device='cpu'
    model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
 #    newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth',''))
    with open(model_path + '.json','r') as nmj:
        model_json = json.load(nmj)
    cats = model_json['categories']
    cats.sort(key=lambda x: x['new_id'])
    num_cat = len(cats) + 1
    model_type = model_json['model_type']
    model = Model(num_cat, model_type)
    labels = [x['name'] for x in cats]
    model.load_state_dict(
        torch.load(model_path + '.pth', map_location = torch.device(device))
    )
    model.eval()
    if isinstance(vid_in_list, str):
        vid_in_list = [vid_in_list]
    for idx_vid, vid_in in enumerate(vid_in_list):
        vid_path = os.path.abspath(vid_in)
        img_path = vid_path.rstrip('.mp4')+'.jpg'
        if not os.path.exists(img_path):
            img_path_sp = img_path.split('_')
            img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
            img_path = '_'.join(img_path_sp)
        scores_json = vid_path.rsplit('.')[0]+'.json'
        print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
        if os.path.exists(scores_json):
            print(f"JSON {scores_json} already exists")
            exit()
        vid_dir = os.path.dirname(vid_path)
        os.system(f'sudo chmod 777 {vid_dir}')
        def score_image(img):
            image = img[:, :, ::-1].copy()
            o = T.ToTensor()(image)
            img = o[None, :, :, :]
            with torch.no_grad():
                ou = model(img)
            ofscore = None
            if len(ou) > 0:
                ofscore = ou[0]
                for k in ofscore:
                    ofscore[k] = ofscore[k].numpy().tolist() 
                ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
            return ofscore
        results = list()
        do_video = True
        if os.path.exists(img_path):
            print(f'Found thumbnail  and scoring {img_path}')
            img = cv2.imread(img_path)
            outsc = score_image(img)
            if outsc is not None:
                if len(outsc['boxes']) > 0:
                    print('Found bird in picture, skipping video scoring')
                    do_video = False
                outsc['frame_number'] = 'thumbnail'
                results.append(outsc)
        if do_video:
            cap = cv2.VideoCapture(vid_path)
            frame_num = 0
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            for frame_num in range(0, total_frames, 15):
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
                img = cap.read()[1]
                outsc = score_image(img)
                if outsc is not None:
                    outsc['frame_number'] = frame_num
                    results.append(outsc)
        with open(scores_json,'w') as jj:
            json.dump(results, jj, indent=4)
 if __name__ == '__main__':
    score_video(sys.argv[1])
 # %%
 # vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/score_video.py
+++ b/score_video.py
@@ -16,19 +16,19 @@ import random
 from model import Model
 import socket
-
+from torchvision.utils import draw_bounding_boxes
-
+import torch as t
-
+import matplotlib.pyplot as plt
-#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4'
+import matplotlib
 step_frame = 15
 def score_video(vid_in_list):
    no_cuda =  socket.gethostname() == 'tree'
    device='cpu'
-    model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json
+    model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
-    newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth',''))
+#    newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth',''))
-    with open(newest_model + '.json','r') as nmj:
+    with open(model_path + '.json','r') as nmj:
        model_json = json.load(nmj)
    cats = model_json['categories']
@@ -38,7 +38,7 @@ def score_video(vid_in_list):
    model = Model(num_cat, model_type)
    labels = [x['name'] for x in cats]
    model.load_state_dict(
-        torch.load(newest_model + '.pth', map_location = torch.device(device))
+        torch.load(model_path + '.pth', map_location = torch.device(device))
    )
    model.eval()
@@ -47,6 +47,11 @@ def score_video(vid_in_list):
    for idx_vid, vid_in in enumerate(vid_in_list):
        vid_path = os.path.abspath(vid_in)
        img_path = vid_path.rstrip('.mp4')+'.jpg'
        if not os.path.exists(img_path):
            img_path_sp = img_path.split('_')
            img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
            img_path = '_'.join(img_path_sp)
        scores_json = vid_path.rsplit('.')[0]+'.json'
        print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
        if os.path.exists(scores_json):
@@ -56,23 +61,7 @@ def score_video(vid_in_list):
        os.system(f'sudo chmod 777 {vid_dir}')
-
+        def score_image(img):
        cap = cv2.VideoCapture(vid_path)
        from torchvision.utils import draw_bounding_boxes
        import torch as t
        import matplotlib.pyplot as plt
        import matplotlib
        cap = cv2.VideoCapture(vid_path)
        frame_num = 0
        results = list()
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        for frame_num in range(0, total_frames, 30):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
            img = cap.read()[1]
            image = img[:, :, ::-1].copy()
            o = T.ToTensor()(image)
            img = o[None, :, :, :]
@@ -80,6 +69,7 @@ def score_video(vid_in_list):
            with torch.no_grad():
                ou = model(img)
            ofscore = None
            if len(ou) > 0:
                ofscore = ou[0]
@@ -87,9 +77,49 @@ def score_video(vid_in_list):
                    ofscore[k] = ofscore[k].numpy().tolist() 
                ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
                ofscore['frame_number'] = frame_num
-                results.append(ofscore)
+
            return ofscore
        results = list()
        do_video = True
        if os.path.exists(img_path):
            print(f'Found thumbnail  and scoring {img_path}')
            img = cv2.imread(img_path)
            outsc = score_image(img)
            if outsc is not None:
                if len(outsc['boxes']) > 0:
                    print('Found bird in picture, (still doing video scoring)')
                outsc['frame_number'] = 'thumbnail'
                results.append(outsc)
        do_video = True
        if do_video:
            cap = cv2.VideoCapture(vid_path)
            frame_num = 0
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            for frame_num in range(0, total_frames, step_frame):
                img = cap.read()[1]
                if img is None:
                    break
                outsc = score_image(img)
                if outsc is not None:
                    outsc['frame_number'] = frame_num
                    results.append(outsc)
                for i in range(step_frame):
                    img = cap.read()[1];
                    if img is None:
                        break
        with open(scores_json,'w') as jj:
            json.dump(results, jj, indent=4)
--- a/score_video_birds.py
+++ b/score_video_birds.py
@@ -0,0 +1,170 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 import json
 import cv2
 import random
 from model import Model
 import socket
 from torchvision.utils import draw_bounding_boxes
 import torch as t
 import matplotlib.pyplot as plt
 import matplotlib
 step_frame = 15
 no_cuda =  socket.gethostname() == 'tree'
 device='cpu'
 model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/birds_only'#0210701_202822.json
 with open(model_path + '.json','r') as nmj:
    model_json = json.load(nmj)
 cats = model_json['categories']
 cats.sort(key=lambda x: x['new_id'])
 num_cat = len(cats) + 1
 model_type = model_json['model_type']
 num_cat = 319
 model = Model(num_cat, model_type)
 labels = [x['name'] for x in cats]
 # %%
 model.load_state_dict(
    torch.load(model_path + '.pth', map_location = torch.device(device))
 )
 model.eval()
 idx_vid = 0
 #vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4'
 vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4'
 vid_path = os.path.abspath(vid_in)
 img_path = vid_path.rstrip('.mp4')+'.jpg'
 if not os.path.exists(img_path):
    img_path_sp = img_path.split('_')
    img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
    img_path = '_'.join(img_path_sp)
 scores_json = vid_path.rsplit('.')[0]+'.json'
 if os.path.exists(scores_json):
    print(f"JSON {scores_json} already exists")
    exit()
 vid_dir = os.path.dirname(vid_path)
 os.system(f'sudo chmod 777 {vid_dir}')
 def score_image(img):
    image = img[:, :, ::-1].copy()
    o = T.ToTensor()(image)
    img = o[None, :, :, :]
    with torch.no_grad():
        ou = model(img)
    ofscore = None
    if len(ou) > 0:
        ofscore = ou[0]
        for k in ofscore:
            ofscore[k] = ofscore[k].numpy().tolist() 
        ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
    return ofscore
 results = list()
 do_video = True
 if os.path.exists(img_path):
    print(f'Found thumbnail  and scoring {img_path}')
    img = cv2.imread(img_path)
    outsc = score_image(img)
    if outsc is not None:
        if len(outsc['boxes']) > 0:
            print('Found bird in picture, (still doing video scoring)')
        outsc['frame_number'] = 'thumbnail'
        print(outsc)
        results.append(outsc)
 # %%
 from pprint import pprint
 do_video = True
 if do_video:
    cap = cv2.VideoCapture(vid_path)
    frame_num = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 #    for i in range(5808):
 #        cap.read();
 #    cap.set(cv2.CAP_PROP_POS_FRAMES, 5808)
    for frame_num in range(0, total_frames, step_frame):
        img = cap.read()[1]
 #        img = cv2.resize(img, [960,450])
        if img is None:
            break
        outsc = score_image(img)
        if outsc is not None:
            outsc['frame_number'] = frame_num
            pprint(outsc)
            results.append(outsc)
        for i in range(step_frame):
            img = cap.read()[1];
            if img is None:
                break
 # %%
 with open(scores_json,'w') as jj:
    json.dump(results, jj, indent=4)
 # %%
 # vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/score_video_birds_remote.py
+++ b/score_video_birds_remote.py
@@ -0,0 +1,202 @@
 # %%
 from torch.autograd.grad_mode import F
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('K:\Designs\ML\inaturalist_models')
 import json
 import cv2
 import random
 from model import Model
 import socket
 from torchvision.utils import draw_bounding_boxes
 import torch as t
 import matplotlib.pyplot as plt
 import matplotlib
 step_frame = 15
 no_cuda = socket.gethostname() == 'tree'
 device = 'cpu'
 model_path = r'K:\Designs\ML\inaturalist_models\models\birds_only'  #0210701_202822.json
 with open(model_path + '.json', 'r') as nmj:
    model_json = json.load(nmj)
 cats = model_json['categories']
 cats.sort(key=lambda x: x['new_id'])
 num_cat = len(cats) + 1
 model_type = model_json['model_type']
 num_cat = 319
 model = Model(num_cat, model_type)
 labels = [x['name'] for x in cats]
 # %%
 model.load_state_dict(
    torch.load(model_path + '.pth', map_location=torch.device(device))
 )
 model.eval()
 model.cuda()
 idx_vid = 0
 #vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4'
 #vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4'
 # vid_in = r'L:\railing\2021\09\21\Railing_00_20210921095155.mp4'
 vid_in = r'L:\pond\2021\09\20\Pond_00_20210920134828.mp4'
 vid_path = os.path.abspath(vid_in)
 img_path = vid_path.rstrip('.mp4') + '.jpg'
 if not os.path.exists(img_path):
    img_path_sp = img_path.split('_')
    img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg')) + 1) + '.jpg'
    img_path = '_'.join(img_path_sp)
 scores_json = vid_path.rsplit('.')[0] + '.json'
 # %%
 ttran = T.ToTensor()
 results = list()
 do_video = True
 images = list()
 if os.path.exists(img_path):
    print(f'Found thumbnail  and scoring {img_path}')
    tnail_img = cv2.imread(img_path)[:, :, ::-1].copy()
    images.append(ttran(tnail_img))
 step_frame = 5
 if do_video:
    cap = cv2.VideoCapture(vid_path)
    frame_num = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for frame_num in range(0, total_frames, step_frame):
        img = cap.read()[1][:, :, ::-1].copy()
        if img is None:
            break
        images.append(ttran(img))
        if len(images) == 16:
            imgtensor = t.stack(images).cuda()
            with torch.no_grad():
                print('Starting to score')
                output = model(imgtensor)
                print('Finished score')
            detached = list()
            for f in output:
                newd = dict()
                for k in ['boxes', 'labels', 'scores']:
                    newd[k] = f[k].detach().cpu()
                detached.append(newd)
                print(newd)
            results.append(detached)
            images.clear()
 # %%
 imgs = np.moveaxis(np.stack(images), 3, 1)
 imgtensor = torch.cuda.FloatTensor(imgs)
 with torch.no_grad():
    output = model(imgtensor)
 # %%
 image = img[:, :, ::-1].copy()
 o = T.ToTensor()(image)
 img = o[None, :, :, :]
 # %%
 # %%
 cap = cv2.VideoCapture(vid_path)
 frame_num = 0
 step_frame = 5
 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 images = list()
 for frame_num in range(0, total_frames, step_frame):
    img = cap.read()[1].copy()
    images.append(img)
    if len(images) == 16:
        break
 # %%
 imgs = np.moveaxis(np.stack(images), 3, 1)
 bt = torch.FloatTensor(imgs)
 # %%
 from pprint import pprint
 step_frame = 5
 images = list()
 if do_video:
    cap = cv2.VideoCapture(vid_path)
    frame_num = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    for frame_num in range(0, total_frames, step_frame):
        img = cap.read()[1]
        if img is None:
            break
        images.append(img)
 # %%
 # %%
 # %%
 with open(scores_json, 'w') as jj:
    json.dump(results, jj, indent=4)
 # def score_image(img):
 #     image = img[:, :, ::-1].copy()
 #     o = T.ToTensor()(image)
 #     img = o[None, :, :, :]
 #     with torch.no_grad():
 #         ou = model(img)
 #     ofscore = None
 #     if len(ou) > 0:
 #         ofscore = ou[0]
 #         for k in ofscore:
 #             ofscore[k] = ofscore[k].numpy().tolist()
 #         ofscore['names'] = [labels[x - 1] for x in ofscore['labels']]
 #     return ofscore
 # %%
 # vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/score_video_trimmed.py
+++ b/score_video_trimmed.py
@@ -0,0 +1,143 @@
 import torchvision
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from collections import defaultdict as ddict
 import json
 import torch
 from torchvision import datasets, transforms as T
 import numpy as np
 import os
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 import json
 import cv2
 import random
 from model import Model
 import socket
 from torchvision.utils import draw_bounding_boxes
 import torch as t
 import matplotlib.pyplot as plt
 import matplotlib
 #vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
 vid_path = '/srv/ftp/hummingbird/2021/07/25/Hummingbird_01_20210725065610_trimmed.mp4'
 no_cuda =  socket.gethostname() == 'tree'
 device='cpu'
 model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
 with open(model_path + '.json','r') as nmj:
    model_json = json.load(nmj)
 cats = model_json['categories']
 cats.sort(key=lambda x: x['new_id'])
 num_cat = len(cats) + 1
 model_type = model_json['model_type']
 model = Model(num_cat, model_type)
 labels = [x['name'] for x in cats]
 model.load_state_dict(
    torch.load(model_path + '.pth', map_location = torch.device(device))
 )
 model.eval()
 vid_dir = os.path.dirname(vid_path)
 os.system(f'sudo chmod 777 {vid_dir}')
 def score_image(img):
    use_this = None
    if isinstance(img, list):
        use_this = list()
        for y in img:
            image = y[:, :, ::-1].copy()
            o = T.ToTensor()(image)
            y = o[:, :, :]
            use_this.append(y)
    else:
        image = img[:, :, ::-1].copy()
        o = T.ToTensor()(image)
        img = o[None, :, :, :]
        use_this = img
    with torch.no_grad():
        ou = model(use_this)
    ofscore = None
    if len(ou) > 0:
        ofscore = ou[0]
        for k in ofscore:
            ofscore[k] = ofscore[k].numpy().tolist() 
        ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
    return ofscore
 cap = cv2.VideoCapture(vid_path)
 frame_num = 0
 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 # %%
 imgs = list()
 results = list()
 for frame_num in range(0, total_frames, 15):
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    img = cap.read()[1]
    imgs.append(img)
 #    outsc = score_image(img)
 #    if outsc is not None:
 #        outsc['frame_number'] = frame_num
 #        results.append(outsc)
 # %%
 # import time
 # import cv2
 # video = cv2.VideoCapture(vid_path)
 # total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 # # %%
 # st = time.time()
 # while True:
 #     ret, read = video.read()
 #     if not ret:
 #         break
 # et = time.time()
 # print(et-st)
 # st = time.time()
 # frs = list()
 # for i in range(0,total_frames, 150):
 #     video.set(cv2.CAP_PROP_POS_FRAMES, i)
 #     ret, frame = video.read()
 #     frs.append(frame)
 # et = time.time()
 # print(et-st)
--- a/2021-08-11-09-24-52).py
+++ b/2021-08-11-09-24-52).py
@@ -0,0 +1,126 @@
 # %%
 from engine import train_one_epoch, evaluate
 from model import Model
 from data import iNaturalistDataset
 import torch
 import os
 import datetime as dt
 import json
 import utils
 import pandas as pd
 import sys
 if not os.path.exists("models/"):
    os.mkdir("models")
 if torch.cuda.is_available():
    device = torch.device("cuda")
 else:
    device = torch.device("cpu")
 default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
 default_model_path = default_model_root + ".pth"
 default_model_info = default_model_root + ".json"
 default_state_path = default_model_root + ".oth"
 #species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
 csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
 df = pd.read_csv(csv_path)
 species_list = set(list(df[df['count']>1000]['species']))
 # %%
 #model_type = "fasterrcnn_mobilenet_v3_large_fpn"
 #batch_size = 16
 model_type = 'fasterrcnn_resnet50_fpn'
 batch_size = 8
 def run(model_name = None, epoch_start = 0):
    val_dataset = iNaturalistDataset(
        validation=True,
        species=species_list,
    )
    train_dataset = iNaturalistDataset(
        train=True,
        species=species_list,
    )
    if model_name is None:
        fresh_start = True
        model_info = default_model_info
        model_path = default_model_path
        state_path = default_state_path
    else:
        fresh_start = False
        model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
        model_path = model_info.rstrip('.json')+'.pth'
        state_path = model_info.rstrip('.json')+'.oth'
    breakpoint()
    if fresh_start:
        with open(model_info, "w") as js_p:
            json.dump(
                {"categories": train_dataset.categories, "model_type": model_type},
                js_p,
                default=str,
                indent=4,
            )
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=10,
        collate_fn=utils.collate_fn,
    )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=10,
        collate_fn=utils.collate_fn,
    )
    num_classes = train_dataset.num_classes
    model = Model(num_classes, model_type)
    model.to(device)
    if not fresh_start:
        model.load_state_dict(
            torch.load(model_path, map_location = torch.device(device))
            )
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    if os.path.exists(state_path):
        optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    num_epochs = 10 - epoch_start
    for epoch in range(num_epochs):
        train_one_epoch(
            model, optimizer, train_data_loader, device, epoch, print_freq=10 )
        lr_scheduler.step()
        torch.save(model.state_dict(), model_path)
        torch.save(optimizer.state_dict(), state_path)
        evaluate(model, val_data_loader, device=device)
 if __name__ == "__main__":
    if len(sys.argv) == 3:
        model_name = sys.argv[1]
        epoch_start = int(sys.argv[2])
        run(model_name  = model_name, epoch_start = epoch_start)
    else:
        run()
 #    run()
--- a/2021-09-27-14-27-04).py
+++ b/2021-09-27-14-27-04).py
@@ -0,0 +1,139 @@
 # %%
 from engine import train_one_epoch, evaluate
 from model import Model
 from data import iNaturalistDataset
 import torch
 import os
 import datetime as dt
 import json
 import utils
 import pandas as pd
 import sys
 if not os.path.exists("models/"):
    os.mkdir("models")
 if torch.cuda.is_available():
    device = torch.device("cuda")
 else:
    device = torch.device("cpu")
 default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
 default_model_path = default_model_root + ".pth"
 default_model_info = default_model_root + ".json"
 default_state_path = default_model_root + ".oth"
 default_sched_path = default_model_root + ".sth"
 #species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
 csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
 df = pd.read_csv(csv_path)
 species_list = set(list(df[df['count']>1000]['species']))
 #model_type = "fasterrcnn_mobilenet_v3_large_fpn"
 #batch_size = 16
 model_type = 'fasterrcnn_resnet50_fpn'
 batch_size = 8
 num_epochs = 10
 def run(model_name = None, epoch_start = 0):
    val_dataset = iNaturalistDataset(
        validation=True,
        species=species_list,
    )
    train_dataset = iNaturalistDataset(
        train=True,
        species=species_list,
    )
    print(len(val_dataset.categories))
    print(len(train_dataset.categories))
    if model_name is None:
        fresh_start = True
        model_info = default_model_info
        model_path = default_model_path
        state_path = default_state_path
        sched_path = default_sched_path
    else:
        fresh_start = False
        model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
        model_path = model_info.rstrip('.json')+'.pth'
        state_path = model_info.rstrip('.json')+'.oth'
        sched_path = model_info.rstrip('.json')+'.sth'
    if fresh_start:
        with open(model_info, "w") as js_p:
            json.dump(
                {"categories": train_dataset.categories, "model_type": model_type},
                js_p,
                default=str,
                indent=4,
            )
    else:
        print('Continuing run')
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=10,
        collate_fn=utils.collate_fn,
    )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=10,
        collate_fn=utils.collate_fn,
    )
    num_classes = len(train_dataset.categories) + 1
    model = Model(num_classes, model_type)
    model.to(device)
    if not fresh_start:
        print('Loading state dict')
        model.load_state_dict(
            torch.load(model_path, map_location = torch.device(device))
            )
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    if os.path.exists(state_path):
        print('Loading optimizer')
        optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    if os.path.exists(sched_path):
        print('Loading scheduler')
        lr_scheduler.load_state_dict(torch.load(sched_path, map_location = torch.device(device)))
    for epoch in range(epoch_start, num_epochs):
        print('Epoch '+str(epoch))
        train_one_epoch(
            model, optimizer, train_data_loader, device, epoch, print_freq=10 )
        lr_scheduler.step()
        torch.save(model.state_dict(), model_path)
        torch.save(optimizer.state_dict(), state_path)
        torch.save(lr_scheduler.state_dict(), sched_path)
        evaluate(model, val_data_loader, device=device)
 if __name__ == "__main__":
    if len(sys.argv) == 3:
        model_name = sys.argv[1]
        epoch_start = int(sys.argv[2])
        run(model_name  = model_name, epoch_start = epoch_start)
    else:
        run()
 #    run()
--- a/train.py
+++ b/train.py
@@ -7,7 +7,8 @@ import os
 import datetime as dt
 import json
 import utils
-
+import pandas as pd
 import sys
 if not os.path.exists("models/"):
    os.mkdir("models")
@@ -16,16 +17,27 @@ if torch.cuda.is_available():
 else:
    device = torch.device("cpu")
-model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
-model_path = model_root + ".pth"
+default_model_path = default_model_root + ".pth"
-model_info = model_root + ".json"
+default_model_info = default_model_root + ".json"
 default_state_path = default_model_root + ".oth"
-species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
+#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
-model_type = "fasterrcnn_mobilenet_v3_large_fpn"
+
 csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
 df = pd.read_csv(csv_path)
 species_list = set(list(df[df['count']>1000]['species']))
 #model_type = "fasterrcnn_mobilenet_v3_large_fpn"
 #batch_size = 16
 model_type = 'fasterrcnn_resnet50_fpn'
 batch_size = 8
-def run():
+
 def run(model_name = None, epoch_start = 0):
    val_dataset = iNaturalistDataset(
        validation=True,
        species=species_list,
@@ -35,27 +47,41 @@ def run():
        species=species_list,
    )
-    with open(model_info, "w") as js_p:
+
-        json.dump(
+    if model_name is None:
-            {"categories": train_dataset.categories, "model_type": model_type},
+        fresh_start = True
-            js_p,
+        model_info = default_model_info
-            default=str,
+        model_path = default_model_path
-            indent=4,
+        state_path = default_state_path
-        )
+    else:
        fresh_start = False
        model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
        model_path = model_info.rstrip('.json')+'.pth'
        state_path = model_info.rstrip('.json')+'.oth'
    if fresh_start:
        with open(model_info, "w") as js_p:
            json.dump(
                {"categories": train_dataset.categories, "model_type": model_type},
                js_p,
                default=str,
                indent=4,
            )
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
-        batch_size=16,
+        batch_size=batch_size,
        shuffle=True,
-        num_workers=4,
+        num_workers=10,
        collate_fn=utils.collate_fn,
    )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset,
-        batch_size=16,
+        batch_size=batch_size,
        shuffle=True,
-        num_workers=4,
+        num_workers=10,
        collate_fn=utils.collate_fn,
    )
@@ -63,20 +89,38 @@ def run():
    model = Model(num_classes, model_type)
    model.to(device)
    if not fresh_start:
        model.load_state_dict(
            torch.load(model_path, map_location = torch.device(device))
            )
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    if os.path.exists(state_path):
        optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    num_epochs = 10
-    for epoch in range(num_epochs):
+     
    for epoch in range(epoch_start, num_epochs):
        train_one_epoch(
-            model, optimizer, train_data_loader, device, epoch, print_freq=10
+            model, optimizer, train_data_loader, device, epoch, print_freq=10 )
        )
        lr_scheduler.step()
        torch.save(model.state_dict(), model_path)
        torch.save(optimizer.state_dict(), state_path)
        evaluate(model, val_data_loader, device=device)
 if __name__ == "__main__":
-    run()
+    if len(sys.argv) == 3:
        model_name = sys.argv[1]
        epoch_start = int(sys.argv[2])
        run(model_name  = model_name, epoch_start = epoch_start)
    else:
        run()
 #    run()
--- a/trim_in_directory.py
+++ b/trim_in_directory.py
@@ -0,0 +1,42 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 from trim_video import trim_video
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = sys.argv[1]
 have_json = set()
 fnames = set()
 for di,_, fns in os.walk(rtpath):
    for fn in fns:
        if fn.endswith('.json') and 'trimmed' not in fn:
            have_json.add(os.path.join(di,fn))
 files_to_score = have_json
 def try_catch_chunk(jsons):
    try:
        if not isinstance(jsons, list):
            jsons = [jsons]
        for x in jsons:
            trim_video(x)
    except Exception as e:
        print(e)
 lst = list(files_to_score)
 n = 25
 chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
 # %%
 if __name__ == '__main__':
    with Pool(8) as p:
       output = p.map(try_catch_chunk,chunks)
      #  output = p.map(score_video,chunks)
--- a/2021-08-11-09-24-52).py
+++ b/2021-08-11-09-24-52).py
@@ -0,0 +1,113 @@
 from ffprobe import FFProbe
 import ffmpeg
 import shutil
 import json
 import math
 import os
 def execute_trim_video(js_path, start_frame, end_frame, empty_video = False):
    with open(js_path,'r') as jj:
        data = json.load(jj)
    movie_dir = os.path.dirname(js_path)
    archive_dir_p = movie_dir
    archive_dir_p = archive_dir_p.split('/')
    archive_dir_p.insert(-3,'originals')
    archive_dir =  '/'.join(archive_dir_p) + '/'
    if not os.path.exists(archive_dir):
        os.makedirs(archive_dir)
    rt_name = os.path.splitext(js_path)[0]
    movie_path = rt_name+'.mp4'
    movie_new_path = rt_name+'_trimmed.mp4'
    js_new_path = rt_name + '_trimmed.json'
    frame_pad = 5 #seconds
    vid_info = FFProbe(movie_path)
    vid_stream = vid_info.video[0]
    framerate = vid_stream.framerate
    duration = float(vid_stream.duration)
    max_frames = duration * framerate
    start_frame = max(0, start_frame - frame_pad * framerate)
    end_frame = min( max_frames, end_frame + frame_pad * framerate)
    start_time = start_frame / framerate
    end_time = end_frame / framerate
    to_keep = list()
    for x in data:
        json_frame_num = x['frame_number']
        if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
            to_keep.append(x)
        elif json_frame_num >=start_frame and json_frame_num <= end_frame:
            x['frame_number_original'] = x['frame_number']
            x['frame_number'] -= start_frame
            to_keep.append(x)
    trim_duration = end_time - start_time
    if os.path.exists(movie_new_path):
        os.remove(movie_new_path)
    if not empty_video:
        cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}'
        return_code = os.system(cmd)
        with open(js_new_path,'w') as jnp:
            json.dump(to_keep, jnp, indent=4)
    if empty_video or return_code == 0:
        os.remove(movie_path)
        os.remove(js_path)
        #shutil.move(movie_path, archive_dir)
        #shutil.move(js_path ,archive_dir)
 def trim_video(jspath):
    with open(jspath,'r') as jj:
        data = json.load(jj)
    start_frame = math.inf
    end_frame = -math.inf
    skip_trimming = False
    empty_video = True
    for x in data:
        if len(x['boxes']) > 0:
            json_frame_num = x['frame_number']
            if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
                skip_trimming = True
            elif max(x['scores']) > 0.25:
                start_frame = min(start_frame, json_frame_num)
                end_frame = max(end_frame, json_frame_num)
                empty_video = False
    if not skip_trimming:
        execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video)
--- a/trim_video.py
+++ b/trim_video.py
@@ -0,0 +1,106 @@
 from ffprobe import FFProbe
 import ffmpeg
 import shutil
 import json
 import math
 import os
 def execute_trim_video(js_path, start_frame, end_frame, empty_video = False):
    with open(js_path,'r') as jj:
        data = json.load(jj)
    movie_dir = os.path.dirname(js_path)
    archive_dir = os.path.join(movie_dir, 'original') + '/'
    if not os.path.exists(archive_dir):
        os.mkdir(archive_dir)
    rt_name = os.path.splitext(js_path)[0]
    movie_path = rt_name+'.mp4'
    movie_new_path = rt_name+'_trimmed.mp4'
    js_new_path = rt_name + '_trimmed.json'
    frame_pad = 5 #seconds
    vid_info = FFProbe(movie_path)
    vid_stream = vid_info.video[0]
    framerate = vid_stream.framerate
    duration = float(vid_stream.duration)
    max_frames = duration * framerate
    start_frame = max(0, start_frame - frame_pad * framerate)
    end_frame = min( max_frames, end_frame + frame_pad * framerate)
    start_time = start_frame / framerate
    end_time = end_frame / framerate
    to_keep = list()
    for x in data:
        json_frame_num = x['frame_number']
        if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
            to_keep.append(x)
        elif json_frame_num >=start_frame and json_frame_num <= end_frame:
            x['frame_number_original'] = x['frame_number']
            x['frame_number'] -= start_frame
            to_keep.append(x)
    trim_duration = end_time - start_time
    if os.path.exists(movie_new_path):
        os.remove(movie_new_path)
    if not empty_video:
        cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}'
        return_code = os.system(cmd)
        with open(js_new_path,'w') as jnp:
            json.dump(to_keep, jnp, indent=4)
    if empty_video or return_code == 0:
        shutil.move(movie_path, archive_dir)
        shutil.move(js_path ,archive_dir)
 def trim_video(jspath):
    with open(jspath,'r') as jj:
        data = json.load(jj)
    start_frame = math.inf
    end_frame = -math.inf
    skip_trimming = False
    empty_video = True
    for x in data:
        if len(x['boxes']) > 0:
            json_frame_num = x['frame_number']
            if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
                skip_trimming = True
            elif max(x['scores']) > 0.05:
                start_frame = min(start_frame, json_frame_num)
                end_frame = max(end_frame, json_frame_num)
                empty_video = False
    if not skip_trimming:
        execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video)
--- a/undo_move_originals.py
+++ b/undo_move_originals.py
@@ -0,0 +1,58 @@
 import os
 import random
 from multiprocessing import Pool
 import sys
 sys.path.append('/home/thebears/Seafile/Designs/ML')
 #rtpath = '/srv/ftp/hummingbird/2021'
 rtpath = os.path.abspath(sys.argv[1])
 #rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/')
 didir = list()
 for di,dnames, fns in os.walk(rtpath):
    numbers = di.split('/')[-3:]
    if all([n.isnumeric() for n in numbers]):
        didir.append(di)
 def list_files(path):
    for file in os.listdir(path):
        if os.path.isfile(os.path.join(path, file)):
            yield file
 import shutil
 import os
 for cdr in  didir:
    files_origin = list()
    for di, dnames, fns in os.walk(cdr):
        if di == cdr:
            pass
        else:
            files_origin.extend([os.path.join(di,f) for f in fns])
    for src_file in files_origin:
        fname = os.path.basename(src_file)
        targ_file = os.path.join(cdr, fname)
        os.rename(src_file, targ_file)
    dirs_purge = list()
    for di, dnames, fns in os.walk(cdr):
        for d in dnames:
            cpath = os.path.join(di, d)
            fna = [x for x in list_files(cpath)]
            if len(fna) == 0:
                dirs_purge.append(cpath)
    for d in dirs_purge:
        if os.path.exists(d):
            shutil.rmtree(d)
--- a/2021-09-27-14-27-04).py
+++ b/2021-09-27-14-27-04).py
@@ -0,0 +1,322 @@
 from collections import defaultdict, deque
 import datetime
 import errno
 import os
 import sys
 import time
 import torch
 import torch.distributed as dist
 class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """
    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt
    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n
    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]
    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()
    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()
    @property
    def global_avg(self):
        return self.total / self.count
    @property
    def max(self):
        return max(self.deque)
    @property
    def value(self):
        return self.deque[-1]
    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value,
        )
 def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]
    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)
    return data_list
 def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict
 class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter
    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)
    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError(
            "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
        )
    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append("{}: {}".format(name, str(meter)))
        return self.delimiter.join(loss_str)
    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()
    def add_meter(self, name, meter):
        self.meters[name] = meter
    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt="{avg:.4f}")
        data_time = SmoothedValue(fmt="{avg:.4f}")
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join(
                [
                    header,
                    "[{0" + space_fmt + "}/{1}]",
                    "eta: {eta}",
                    "{meters}",
                    "time: {time}",
                    "data: {data}",
                    "max mem: {memory:.0f}",
                ]
            )
        else:
            log_msg = self.delimiter.join(
                [
                    header,
                    "[{0" + space_fmt + "}/{1}]",
                    "eta: {eta}",
                    "{meters}",
                    "time: {time}",
                    "data: {data}",
                ]
            )
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(
                        log_msg.format(
                            i,
                            len(iterable),
                            eta=eta_string,
                            meters=str(self),
                            time=str(iter_time),
                            data=str(data_time),
                            memory=torch.cuda.max_memory_allocated() / MB,
                        )
                    )
                else:
                    print(
                        log_msg.format(
                            i,
                            len(iterable),
                            eta=eta_string,
                            meters=str(self),
                            time=str(iter_time),
                            data=str(data_time),
                        )
                    )
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print(
            "{} Total time: {} ({:.4f} s / it)".format(
                header, total_time_str, total_time / len(iterable)
            )
        )
 def collate_fn(batch):
    return tuple(zip(*batch))
 def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha
    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
 def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
 def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print
    def print(*args, **kwargs):
        force = kwargs.pop("force", False)
        if is_master or force or True:
            builtin_print(*args, **kwargs)
    __builtin__.print = print
 def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True
 def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()
 def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()
 def is_main_process():
    return get_rank() == 0
 def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)
 def init_distributed_mode(args):
    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ["WORLD_SIZE"])
        args.gpu = int(os.environ["LOCAL_RANK"])
    elif "SLURM_PROCID" in os.environ:
        args.rank = int(os.environ["SLURM_PROCID"])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print("Not using distributed mode")
        args.distributed = False
        return
    args.distributed = True
    torch.cuda.set_device(args.gpu)
    args.dist_backend = "nccl"
    print(
        "| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True
    )
    torch.distributed.init_process_group(
        backend=args.dist_backend,
        init_method=args.dist_url,
        world_size=args.world_size,
        rank=args.rank,
    )
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)
--- a/utils.py
+++ b/utils.py
@@ -256,8 +256,8 @@ def setup_for_distributed(is_master):
    def print(*args, **kwargs):
        force = kwargs.pop("force", False)
-        if is_master or force:
+        if is_master or force or True:
-            builtin_print(*args, **kwargs)
+            print(*args, **kwargs)
    __builtin__.print = print