This commit is contained in:
2021-09-27 16:02:11 -04:00
parent 90edf9bd45
commit e18232df84
35 changed files with 3037 additions and 78 deletions

23
.gitattributes vendored Normal file
View File

@@ -0,0 +1,23 @@
models/20210720_155640.oth filter=lfs diff=lfs merge=lfs -text
models/20210921_190116.json filter=lfs diff=lfs merge=lfs -text
models/20210921_190419.json filter=lfs diff=lfs merge=lfs -text
models/birds_only.json filter=lfs diff=lfs merge=lfs -text
models/hummingbird.json filter=lfs diff=lfs merge=lfs -text
models/20210701_202822.json filter=lfs diff=lfs merge=lfs -text
models/20210720_155640.json filter=lfs diff=lfs merge=lfs -text
models/hummingbird.pth filter=lfs diff=lfs merge=lfs -text
models/20210921_190436.oth filter=lfs diff=lfs merge=lfs -text
models/20210921_190436.pth filter=lfs diff=lfs merge=lfs -text
models/20210720_155640.pth filter=lfs diff=lfs merge=lfs -text
models/20210810_104206.json filter=lfs diff=lfs merge=lfs -text
models/20210921_190257.json filter=lfs diff=lfs merge=lfs -text
models/20210921_190436.json filter=lfs diff=lfs merge=lfs -text
models/20210921_190436.sth filter=lfs diff=lfs merge=lfs -text
models/20210923_153848.json filter=lfs diff=lfs merge=lfs -text
models/20210701_202822.pth filter=lfs diff=lfs merge=lfs -text
models/20210720_155509.json filter=lfs diff=lfs merge=lfs -text
models/20210923_174607.json filter=lfs diff=lfs merge=lfs -text
models/birds_only.pth filter=lfs diff=lfs merge=lfs -text
models/birds_only.oth filter=lfs diff=lfs merge=lfs -text
models/20210923_153808.json filter=lfs diff=lfs merge=lfs -text
models/20210923_174546.json filter=lfs diff=lfs merge=lfs -text

106
anaylze_visits.py Normal file
View File

@@ -0,0 +1,106 @@
# %%
folder = r'\\192.168.1.242\ftp\hummingbird\2021\08\\08'
import os
dest_path = r'C:\\Users\\TheBears\\Desktop\\'
files = os.listdir(folder)
import numpy as np
f_jsons = list()
for f in files:
if f.endswith('.json'):
f_jsons.append(os.path.join(folder,f))
import json
import datetime as dt
def get_obs_in_json(curr_js):
all_obs = list()
curf = os.path.basename(curr_js)
curf = curf.split('.')[0].replace('_trimmed','').split('_')[-1]
dt_obj = dt.datetime.strptime(curf, '%Y%m%d%H%M%S')
with open(curr_js,'r') as ff:
data = json.load(ff)
for i in data:
test_field = 'frame_number_original'
if test_field in i:
sec_offset = i[test_field]/30
else:
sec_offset = i['frame_number']
if sec_offset == 'thumbnail':
continue
if len(i['boxes']) > 0:
obs = [dt_obj + dt.timedelta(seconds=sec_offset/2), max(i['scores'])]
all_obs.append(obs)
cob = all_obs
ret_this = []
if len(cob) > 0:
tie = np.asarray([x[0].timestamp() for x in cob])
tavg = dt.datetime.fromtimestamp(int(np.average(tie)))
scavg = np.max([x[1] for x in cob])
ret_this = [tavg, scavg]
return all_obs, ret_this
entire_obs = list()
obs_sep = list()
obs_fnames = list()
for curr_js in f_jsons:
cc, avgg = get_obs_in_json(curr_js)
if len(avgg) > 0:
obs_sep.append(avgg)
obs_fnames.append(curr_js.replace('.json','.mp4'))
entire_obs.extend(cc)
def scatter_dt(entire_obs, fname, hover_data = None):
import numpy as np
obs_array = np.asarray(entire_obs)
x = np.asarray(obs_array[:,0], dtype=np.datetime64)
y = obs_array[:,1]
import plotly.express as px
#fig = px.scatter(x = x,y = y, hovertext=obs_fnames)
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x,
y=y,
hovertext=hover_data,
hoverinfo="text",
marker=dict(
color="green"
),
mode='markers',
showlegend=False
))
fig.write_html(os.path.join(dest_path,fname))
scatter_dt(entire_obs, 'hbirds.html')
scatter_dt(obs_sep, 'indiv.html', hover_data = obs_fnames)
import plotly.express as px
scores = np.asarray([x[1] for x in obs_sep])
scors= np.sort(scores)
cumu_total = len(scores) - np.arange(0,len(scores))
fig = px.scatter( x= scors, y = cumu_total)
fig.write_html(os.path.join(dest_path, 'cumul.html'))
# %%

40
convert_to_onnx.py Normal file
View File

@@ -0,0 +1,40 @@
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
from model import Model
import torch
device = 'cpu'
model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json
newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth',''))
with open(newest_model + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
model.load_state_dict(
torch.load(newest_model + '.pth', map_location = torch.device(device))
)
model.eval()
# %%
onnx_model_path = "models"
onnx_model_name = "hbirds.onnx"
os.makedirs(onnx_model_path, exist_ok=True)
full_model_path = os.path.join(onnx_model_path, onnx_model_name)
# model export into ONNX format
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
torch.onnx.export(model, x, full_model_path, opset_version = 12)
# %%
import cv2
opencv_net = cv2.dnn.readNetFromONNX(full_model_path)
print("OpenCV model was successfully read. Layer IDs: \n", opencv_net.getLayerNames())

View File

@@ -0,0 +1,192 @@
# %%
import os
from unicodedata import category
import torch
from PIL import Image
import sys
import json
import torch
import transforms as T
import os
import numpy as np
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if sys.platform == "win32":
PATH_ROOT = r"D:\ishan\ml\inaturalist\\"
else:
PATH_ROOT = '/home/thebears/data/ml/inaturalist'
def get_transform(train):
trsf = []
trsf.append(T.ToTensor())
if train:
trsf.append(T.RandomHorizontalFlip(0.5))
return T.Compose(trsf)
def create_map(list_in, from_key, to_key):
cmap = dict()
for l in list_in:
cmap[l[from_key]] = l[to_key]
return cmap
class iNaturalistDataset(torch.utils.data.Dataset):
def __init__(self, validation=False, train=False, species=None):
self.validation = validation
self.train = train
if (not self.train and not self.validation) or (self.train and self.validation):
raise Exception("Need to do either train or validation")
self.transform = get_transform(self.train)
if validation:
json_path = os.path.join(
PATH_ROOT, "val_2017_bboxes", "val_2017_bboxes.json"
)
elif train:
json_path = os.path.join(
PATH_ROOT, "train_2017_bboxes", "train_2017_bboxes.json"
)
with open(json_path, "r") as rj:
f = json.load(rj)
self.raw_data = f
categories = list()
image_info = dict()
orig_id_to_name = dict()
for idx, category in enumerate(f["categories"]):
do_add = False
orig_id_to_name[category["id"]] = category
if species is None:
do_add = True
elif category["name"] in species:
do_add = True
elif category['supercategory'] == 'Mammalia':
do_add = True
if do_add:
print('Adding '+str(category))
categories.append(category)
categories = sorted(categories, key=lambda k: k["name"])
for idx, cat in enumerate(categories):
cat["new_id"] = idx + 1
orig_to_new_id = create_map(categories, "id", "new_id")
for annot in f["annotations"]:
if annot["category_id"] in orig_to_new_id:
annot["new_category_id"] = orig_to_new_id[annot["category_id"]]
id = annot["image_id"]
if id not in image_info:
image_info[id] = dict()
annot["bbox"][2] += annot["bbox"][0]
annot["bbox"][3] += annot["bbox"][1]
image_info[id]["annotation"] = annot
for img in f["images"]:
id = img["id"]
path = os.path.join(PATH_ROOT, img["file_name"])
height = img["height"]
width = img["width"]
if id in image_info:
image_info[id].update({"path": path, "height": height, "width": width})
for idx, (id, im_in) in enumerate(image_info.items()):
im_in["idx"] = idx
self.images = image_info
self.categories = categories
self.orig_id_to_name = orig_id_to_name
self.idx_to_id = [x for x in self.images]
self.num_classes = len(self.categories) + 1
self.num_samples = len(self.images)
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
idd = self.idx_to_id[idx]
c_image = self.images[idd]
img_path = c_image["path"]
img = Image.open(img_path).convert("RGB")
annot = c_image["annotation"]
bbox = annot["bbox"]
boxes = bbox
target = dict()
target["boxes"] = torch.as_tensor([boxes])
target["labels"] = torch.as_tensor(
[annot["new_category_id"]], dtype=torch.int64
)
target["image_id"] = torch.tensor([annot["image_id"]])
target["area"] = torch.as_tensor([annot["area"]])
target["iscrowd"] = torch.zeros((1,), dtype=torch.int64)
if self.transform is not None:
img, target = self.transform(img, target)
return img, target
if False:
train_dataset = iNaturalistDataset(train=True)
loc_path = os.path.join(PATH_ROOT, "inat2017_locations", "train2017_locations.json")
with open(loc_path, "r") as lfile:
locs = json.load(lfile)
from bear_utils import get_distance_from_home
# %%
category_distances = dict()
inserts = 0
for loc in locs:
lat = loc["lat"]
lon = loc["lon"]
im_id = loc["id"]
if lat is None or lon is None:
continue
ff = get_distance_from_home(lat, lon)
if im_id in train_dataset.images:
inserts += 1
train_dataset.images[im_id]["distance"] = ff
category_id = train_dataset.images[im_id]["annotation"]["category_id"]
if category_id not in category_distances:
category_distances[category_id] = list()
category_distances[category_id].append(ff)
# %%
from EcoNameTranslator import to_common
for k, v in category_distances.items():
name = train_dataset.orig_id_to_name[k]
if np.average(np.asarray(v) < 250) > 0.1:
if name["supercategory"] == "Aves":
print(len(v), to_common([name["name"]]))
# %%
fc = sorted(
category_distances, key=lambda x: len(category_distances[x]), reverse=True
)
for x in fc:
cc = train_dataset.orig_id_to_name[x]
if cc["supercategory"] == "Aves":
ou = to_common([cc["name"]])
print(ou, len(category_distances[x]))
# %%

View File

@@ -68,7 +68,6 @@ class iNaturalistDataset(torch.utils.data.Dataset):
if species is None: if species is None:
do_add = True do_add = True
elif category["name"] in species: elif category["name"] in species:
print(category["name"])
do_add = True do_add = True
if do_add: if do_add:

View File

@@ -0,0 +1,110 @@
import math
import sys
import time
import torch
import torchvision.models.detection.mask_rcnn
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
if epoch == 0:
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
def _get_iou_types(model):
model_without_ddp = model
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model_without_ddp = model.module
iou_types = ["bbox"]
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
iou_types.append("segm")
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
iou_types.append("keypoints")
return iou_types
@torch.no_grad()
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator

View File

@@ -20,15 +20,15 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
if epoch == 0: if epoch == 0:
warmup_factor = 1. / 1000 warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1) warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header): for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images) images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets] targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets) loss_dict = model(images, targets)
print('Hey I''m here')
losses = sum(loss for loss in loss_dict.values()) losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes # reduce losses over all GPUs for logging purposes

45
filter_species.py Normal file
View File

@@ -0,0 +1,45 @@
import csv
csv_path = '/home/thebears/data/ebirddata/output_mi/ebd_US-MI_relMay-2021.txt'
fields = ['COMMON NAME','SCIENTIFIC NAME','LATITUDE', 'LONGITUDE','OBSERVATION DATE']
tokeep = list()
idx = 0
with open(csv_path) as csvfile:
data = csv.DictReader(csvfile, delimiter='\t')
for idx, row in enumerate(data):
if idx % 1000 == 0:
print(idx)
# %%
sc = dict()
for x in tokeep:
scientific_name = x['SCIENTIFIC NAME']
if scientific_name not in sc:
sc[scientific_name] = [x['COMMON NAME'], 0]
sc[scientific_name][1]+=1
# %%
sor = {x:sc[x] for x in sorted(sc, key = lambda x : sc[x][1], reverse=True)}
with open('/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv','w') as csvfile:
finames = ['species','name','count']
writer = csv.DictWriter(csvfile, fieldnames=finames)
writer.writeheader()
for key,values in sor.items():
fn = {'species':key, 'name':values[0], 'count':values[1]}
writer.writerow(fn)

View File

@@ -1,11 +1,15 @@
import json import json
import shutil import shutil
import os import os
import numpy as np
source_path = '/srv/ftp/hummingbird/2021' source_path = '/srv/ftp/hummingbird/2021'
#target_path = '/home/thebears/Videos/ftp' #target_path = '/home/thebears/Videos/ftp'
target_path = '/home/thebears/ftp_links' target_path = '/home/thebears/data/ftplinks'
import scipy.stats
target_mean = 0.4
target_std = 1
gauss = scipy.stats.norm(target_mean, target_std)
have_json = set() have_json = set()
for di, _, fns in os.walk(source_path): for di, _, fns in os.walk(source_path):
@@ -16,38 +20,69 @@ for di, _, fns in os.walk(source_path):
def box_area(box):
return (box[3]-box[1]) * (box[2] - box[0]) / 100000
do_stop = False do_stop = False
fracs = dict() fracs = dict()
saveo = None
saveb = None
def gaussian(x, mu = target_mean, sig = target_std):
return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))
scc = list()
for c_js in have_json: for c_js in have_json:
hits = 0 hits = 0
total = 0 total = 0
o = json.load(open(c_js,'r')) o = json.load(open(c_js,'r'))
if c_js.endswith('Hummingbird_01_20210701105440.json'):
saveo = o
if c_js.endswith('Hummingbird_01_20210627111405.json'):
saveb = o
avg = 0
max_sc = 0
for i in o: for i in o:
total += 1 # for x,b in zip(i['scores'], i['boxes']):
# if len([x for x in i['scores'] if x > 0.1]) > 0: # scc.append((x,box_area(b)))
if len(i['boxes']) > 0:
hits += 1
fracs[c_js] = [hits, total] if len(i['scores']) > 0:
css = max(i['scores'])
mf = gaussian(box_area(i['boxes'][0]))
avg += css * mf / len(o)
if do_stop: fracs[c_js] = avg
break
ratios = dict() ratios = dict()
for x,y in fracs.items(): for x,y in fracs.items():
ratios[x] = y[0]/y[1] ratios[x] = y
# %%
sorted_ratios = {x:ratios[x] for x in sorted(ratios, key=lambda x: ratios[x])}
import shutil
for d in os.listdir(target_path):
shutil.rmtree(target_path + '/' + d)
import math import math
dir_created = set() dir_created = set()
for fname, ratio in ratios.items(): for idx, (fname, ratio) in enumerate(sorted_ratios.items()):
cr = math.floor(ratio * 10)/10 cr = math.floor(100*ratio)
target_dir = os.path.join(target_path, str(cr)) target_dir = os.path.join(target_path, '{0:02g}'.format(cr))
if not os.path.exists(target_dir) and target_dir not in dir_created: if not os.path.exists(target_dir) and target_dir not in dir_created:
os.mkdir(target_dir) os.mkdir(target_dir)
dir_created.add(target_dir) dir_created.add(target_dir)
@@ -59,4 +94,30 @@ for fname, ratio in ratios.items():
source_file = fname.replace('.json','.mp4') source_file = fname.replace('.json','.mp4')
target_file = os.path.join(target_dir, os.path.basename(source_file)) target_file = os.path.join(target_dir, os.path.basename(source_file))
os.symlink(source_file, target_file) os.symlink(source_file, target_file)
source_file = fname.replace('.json','.json')
target_file = os.path.join(target_dir, os.path.basename(source_file))
os.symlink(source_file, target_file)
# %%
if False:
plt.close('all')
inp = saveo
sco = list()
for x in inp:
if len(x['scores']) > 0:
cscore = max(x['scores'])
ar = box_area(x['boxes'][0])/ 100000
else:
cscore = 0
ar = 0
sco.append((cscore, ar))
plt.plot(sco)
# %%
distr = np.asarray(scc)

25
generate_matrix_sets.py Normal file
View File

@@ -0,0 +1,25 @@
vpath = '/srv/ftp/railing/2021/09/04/Railing_00_20210904070617.mp4'
stack_path = os.path.splitext(vpath)[0]
import cv2
import numpy
import torch
cap = cv2.VideoCapture(vpath)
frame_num = 0
step_frame = 5
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# %%
images = list()
stack_num = 0
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1].copy()
images.append(img)
if len(images) == 16:
stack_num +=1
break
# %%
imgs = np.moveaxis(np.stack(images), 3, 1)
bt = torch.FloatTensor(imgs)
tensor_path = f'{stack_path}.{stack_num:03}.pt'

64
identify_behavior.py Normal file
View File

@@ -0,0 +1,64 @@
import os
rtdir = '/srv/ftp/hummingbird/2021/07/21'
#rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
fcontents[f] = json.load(ff)
def compute_2d_scores(cbbs):
locs = list()
scores = list()
n_scores = 0
for cbb in cbbs:
if len(cbb['scores']) > 0:
n_scores+=1
first_box = cbb['boxes'][0]
first_score = cbb['scores'][0]
x_cent = np.mean(first_box[0::2])
y_cent = np.mean(first_box[1::2])
locs.append([x_cent, y_cent])
scores.append(first_score)
center = np.average(locs, weights=scores, axis=0)
stddev = np.average(np.linalg.norm(center - locs,axis=1),weights=scores)
return stddev, scores
data = {'file':list(), 'stddev':list(),'weights':list()}
for fname, contents in fcontents.items():
if len(contents) > 0:
data['file'].append(fname)
sdev, sc = compute_2d_scores(contents)
data['stddev'].append(sdev)
data['weights'].append(np.average(sc))
import pandas as pd
df = pd.DataFrame(data)
fil = df.loc[(df['weights'] > 0.4) & (df['stddev'] > 400) ]

58
purge_originals.py Normal file
View File

@@ -0,0 +1,58 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = os.path.abspath(sys.argv[1])
#rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/')
didir = list()
for di,dnames, fns in os.walk(rtpath):
numbers = di.split('/')[-3:]
if all([n.isnumeric() for n in numbers]):
didir.append(di)
def list_files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path, file)):
yield file
import shutil
import os
for cdr in didir:
files_origin = list()
for di, dnames, fns in os.walk(cdr):
if di == cdr:
pass
else:
files_origin.extend([os.path.join(di,f) for f in fns])
for src_file in files_origin:
fname = os.path.basename(src_file)
targ_file = os.path.join(cdr, fname)
os.rename(src_file, targ_file)
dirs_purge = list()
for di, dnames, fns in os.walk(cdr):
for d in dnames:
cpath = os.path.join(di, d)
fna = [x for x in list_files(cpath)]
if len(fna) == 0:
dirs_purge.append(cpath)
for d in dirs_purge:
if os.path.exists(d):
shutil.rmtree(d)

View File

@@ -0,0 +1,59 @@
import os
#rtdir = '/srv/ftp/hummingbird/2021/07/21'
rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
fcontents[f] = json.load(ff)
to_purge = dict()
for fname, contents in fcontents.items():
if len(contents) > 0:
nscores = 0
for x in contents:
if len(x['scores']) > 0:
nscores+= max(x['scores']) >= 0.6
# nscores = sum([max(x['scores']) for x in contents])
# nscores = sum([len(x['scores']) for x in contents])
if nscores == 0:
to_purge[fname] = nscores
delete_list = set()
for f_json, n_scores in to_purge.items():
vid_path = f_json.rstrip('.json') + '.mp4'
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
if 'trimmed' in img_path:
img_path_sp[-2] = str(int(img_path_sp[-2].rstrip('.jpg'))+1)
else:
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
delete_list.update({vid_path,img_path, f_json})
print(f'Deleting {len(delete_list)} files')
for f in delete_list:
if os.path.exists(f):
# pass
os.remove(f)

View File

@@ -0,0 +1,49 @@
import os
#rtdir = '/srv/ftp/hummingbird/2021/07/21'
rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
fcontents[f] = json.load(ff)
to_purge = dict()
for fname, contents in fcontents.items():
if len(contents) > 0:
nscores = sum([len(x['scores']) for x in contents])
if nscores == 0:
to_purge[fname] = nscores
delete_list = set()
for f_json, n_scores in to_purge.items():
vid_path = f_json.rstrip('.json') + '.mp4'
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
delete_list.update({vid_path,img_path, f_json})
print(delete_list)
for f in delete_list:
if os.path.exists(f):
os.remove(f)

54
purge_videos.py Normal file
View File

@@ -0,0 +1,54 @@
import os
#rtdir = '/srv/ftp/hummingbird/2021/07/21'
rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):# and 'trimmed' not in f:
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
try:
fcontents[f] = json.load(ff)
except:
print(f, ' Failed')
to_purge = list()
for fname, contents in fcontents.items():
if len(contents) > 0:
nscores = list()
for x in contents:
if len(x['scores']) > 0:
nscores.append(max(x['scores']))
if len(nscores) == 0 or max(nscores) < 0.60:
to_purge.append(fname)
delete_list = set()
for f_json in to_purge:
vid_path = f_json.rstrip('.json') + '.mp4'
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path) and 'trimmed' not in img_path:
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
delete_list.update({vid_path,img_path, f_json})
print(delete_list)
for f in delete_list:
if os.path.exists(f):
os.remove(f)

155
quantize_model.py Normal file
View File

@@ -0,0 +1,155 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
import json
import cv2
import random
from model import Model
import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
no_cuda = socket.gethostname() == 'tree'
device='cpu'
model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
with open(model_path + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
model.load_state_dict(
torch.load(model_path + '.pth', map_location = torch.device(device))
)
model.eval()
# %%
backend = "fbgemm"
model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)
def print_model_size(mdl):
torch.save(mdl.state_dict(), "tmp.pt")
print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
os.remove('tmp.pt')
print_model_size(model_static_quantized)
# %%
results = list()
vid_path = '/srv/ftp/hummingbird/2021/07/28/Hummingbird_01_20210728063745.mp4'
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
step_frame = 15
import time
idces = 0
st = time.time()
for frame_num in range(0, total_frames, step_frame):
srcimg = cap.read()[1]
print(frame_num)
if srcimg is None:
break
image = srcimg[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
with torch.no_grad():
ou = model(img)
print(ou)
for i in range(step_frame):
img = cap.read()[1];
if img is None:
break
# %%
et = time.time()
model(img)
st = time.time()
print(st-et)
# %%
img_use = img
st = time.time()
features = model.backbone(img_use)
print(time.time() - st)
st = time.time()
proposals = model.rpn(img_use, features)
print(time.time() - st)
st = time.time()
head = model.head(features, proposals)
print(time.time() - st)
# %%
# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

View File

@@ -0,0 +1,46 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = os.path.abspath(sys.argv[1])
didir = list()
for di,dnames, fns in os.walk(rtpath):
numbers = di.split('/')[-3:]
if all([n.isnumeric() for n in numbers]):
didir.append(di)
def list_files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path, file)):
yield file
for cdir in didir:
files = [x for x in list_files(cdir)]
todelete = list()
for y in files:
if 'trimmed' not in y:
ff = os.path.splitext(y)
fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]]))
if os.path.exists(fcheck):
todelete.append(fcheck)
to_purge = set()
for de in todelete:
if os.path.exists(de.replace('_trimmed','')):
to_purge.add(de)
for y in to_purge:
os.remove(y)

47
reset_trim.py Normal file
View File

@@ -0,0 +1,47 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = os.path.abspath(sys.argv[1])
rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/')
didir = list()
for di,dnames, fns in os.walk(rtpath):
numbers = di.split('/')[-3:]
if all([n.isnumeric() for n in numbers]):
didir.append(di)
def list_files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path, file)):
yield file
for cdir in didir:
files = [x for x in list_files(cdir)]
todelete = list()
for y in files:
if 'trimmed' not in y:
ff = os.path.splitext(y)
fcheck = os.path.join(cdir, ''.join([ff[0],'_trimmed',ff[1]]))
if os.path.exists(fcheck):
todelete.append(fcheck)
to_purge = set()
for de in todelete:
if os.path.exists(de.replace('_trimmed','')):
to_purge.add(de)
for y in to_purge:
os.remove(y)

View File

@@ -0,0 +1,38 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
from score_video import score_video
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = sys.argv[1]
cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}'
have_json = set()
fnames = set()
for di,_, fns in os.walk(rtpath):
for fn in fns:
if fn.endswith('.mp4'):
fnames.add(os.path.join(di,fn))
elif fn.endswith('.json'):
have_json.add(os.path.join(di,fn.replace('.json','.mp4')))
files_to_score = list(fnames - have_json)
random.shuffle(files_to_score)
def try_catch_chunk(vids):
try:
score_video(vids)
# score_image(vids)
except Exception as e:
print(e)
lst = files_to_score
n = 25
chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
# %%
if __name__ == '__main__':
with Pool(2) as p:
output = p.map(try_catch_chunk,chunks)
# output = p.map(score_video,chunks)

View File

@@ -4,8 +4,8 @@ from multiprocessing import Pool
import sys import sys
sys.path.append('/home/thebears/Seafile/Designs/ML') sys.path.append('/home/thebears/Seafile/Designs/ML')
from score_video import score_video from score_video import score_video
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = '/srv/ftp/hummingbird/2021' rtpath = sys.argv[1]
cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}' cmd = '/usr/bin/python3 /home/thebears/Seafile/Designs/ML/inaturalist_models/score_video.py {mp4name}'
have_json = set() have_json = set()
fnames = set() fnames = set()
@@ -17,12 +17,15 @@ for di,_, fns in os.walk(rtpath):
have_json.add(os.path.join(di,fn.replace('.json','.mp4'))) have_json.add(os.path.join(di,fn.replace('.json','.mp4')))
files_to_score = list(fnames - have_json) files_to_score = list(fnames - have_json)
random.shuffle(files_to_score) files_to_score = sorted(files_to_score)
print(files_to_score)
#random.shuffle(files_to_score)
def try_catch_chunk(vids): def try_catch_chunk(vids):
try: try:
score_video(vids) score_video(vids)
# score_image(vids)
except Exception as e: except Exception as e:
print(e) print(e)
@@ -31,5 +34,7 @@ n = 25
chunks = [lst[i:i + n] for i in range(0, len(lst), n)] chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
# %% # %%
if __name__ == '__main__': if __name__ == '__main__':
with Pool(4) as p: with Pool(2) as p:
output = p.map(score_video,chunks) output = p.map(try_catch_chunk,chunks)
# output = p.map(score_video,chunks)

View File

@@ -0,0 +1,131 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
import json
import cv2
import random
from model import Model
import socket
#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4'
def score_video(vid_in_list):
no_cuda = socket.gethostname() == 'tree'
device='cpu'
model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json
newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth',''))
with open(newest_model + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
model.load_state_dict(
torch.load(newest_model + '.pth', map_location = torch.device(device))
)
model.eval()
if isinstance(vid_in_list, str):
vid_in_list = [vid_in_list]
for idx_vid, vid_in in enumerate(vid_in_list):
try:
vid_path = os.path.abspath(vid_in)
scores_json = vid_path.rsplit('.')[0]+'.json'
print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
if os.path.exists(scores_json):
print(f"JSON {scores_json} already exists")
exit()
vid_dir = os.path.dirname(vid_path)
os.system(f'sudo chmod 777 {vid_dir}')
cap = cv2.VideoCapture(vid_path)
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
cap = cv2.VideoCapture(vid_path)
frame_num = 0
results = list()
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, 30):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
img = cap.read()[1]
image = img[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
with torch.no_grad():
ou = model(img)
if len(ou) > 0:
ofscore = ou[0]
for k in ofscore:
ofscore[k] = ofscore[k].numpy().tolist()
ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
ofscore['frame_number'] = frame_num
results.append(ofscore)
with open(scores_json,'w') as jj:
json.dump(results, jj, indent=4)
except Exception as e:
print(e)
if __name__ == '__main__':
score_video(sys.argv[1])
# %%
# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

View File

@@ -0,0 +1,157 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
import json
import cv2
import random
from model import Model
import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4'
def score_video(vid_in_list):
no_cuda = socket.gethostname() == 'tree'
device='cpu'
model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
# newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth',''))
with open(model_path + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
model.load_state_dict(
torch.load(model_path + '.pth', map_location = torch.device(device))
)
model.eval()
if isinstance(vid_in_list, str):
vid_in_list = [vid_in_list]
for idx_vid, vid_in in enumerate(vid_in_list):
vid_path = os.path.abspath(vid_in)
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
scores_json = vid_path.rsplit('.')[0]+'.json'
print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
if os.path.exists(scores_json):
print(f"JSON {scores_json} already exists")
exit()
vid_dir = os.path.dirname(vid_path)
os.system(f'sudo chmod 777 {vid_dir}')
def score_image(img):
image = img[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
with torch.no_grad():
ou = model(img)
ofscore = None
if len(ou) > 0:
ofscore = ou[0]
for k in ofscore:
ofscore[k] = ofscore[k].numpy().tolist()
ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
return ofscore
results = list()
do_video = True
if os.path.exists(img_path):
print(f'Found thumbnail and scoring {img_path}')
img = cv2.imread(img_path)
outsc = score_image(img)
if outsc is not None:
if len(outsc['boxes']) > 0:
print('Found bird in picture, skipping video scoring')
do_video = False
outsc['frame_number'] = 'thumbnail'
results.append(outsc)
if do_video:
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, 15):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
img = cap.read()[1]
outsc = score_image(img)
if outsc is not None:
outsc['frame_number'] = frame_num
results.append(outsc)
with open(scores_json,'w') as jj:
json.dump(results, jj, indent=4)
if __name__ == '__main__':
score_video(sys.argv[1])
# %%
# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

View File

@@ -16,19 +16,19 @@ import random
from model import Model from model import Model
import socket import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
#vid_path = '/home/thebears/data/hummingbird_videos/Hummingbird_01_20210601055009.mp4' import matplotlib
step_frame = 15
def score_video(vid_in_list): def score_video(vid_in_list):
no_cuda = socket.gethostname() == 'tree' no_cuda = socket.gethostname() == 'tree'
device='cpu' device='cpu'
model_rt_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/'#0210701_202822.json model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
newest_model = os.path.join(model_rt_path, max(os.listdir(model_rt_path)).replace('.pth','')) # newest_model = os.path.join(model_rt_path, min(os.listdir(model_rt_path)).replace('.pth',''))
with open(newest_model + '.json','r') as nmj: with open(model_path + '.json','r') as nmj:
model_json = json.load(nmj) model_json = json.load(nmj)
cats = model_json['categories'] cats = model_json['categories']
@@ -38,7 +38,7 @@ def score_video(vid_in_list):
model = Model(num_cat, model_type) model = Model(num_cat, model_type)
labels = [x['name'] for x in cats] labels = [x['name'] for x in cats]
model.load_state_dict( model.load_state_dict(
torch.load(newest_model + '.pth', map_location = torch.device(device)) torch.load(model_path + '.pth', map_location = torch.device(device))
) )
model.eval() model.eval()
@@ -47,6 +47,11 @@ def score_video(vid_in_list):
for idx_vid, vid_in in enumerate(vid_in_list): for idx_vid, vid_in in enumerate(vid_in_list):
vid_path = os.path.abspath(vid_in) vid_path = os.path.abspath(vid_in)
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
scores_json = vid_path.rsplit('.')[0]+'.json' scores_json = vid_path.rsplit('.')[0]+'.json'
print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path) print(os.getpid(),':',str(idx_vid),'/',str(len(vid_in_list)),vid_path)
if os.path.exists(scores_json): if os.path.exists(scores_json):
@@ -56,23 +61,7 @@ def score_video(vid_in_list):
os.system(f'sudo chmod 777 {vid_dir}') os.system(f'sudo chmod 777 {vid_dir}')
def score_image(img):
cap = cv2.VideoCapture(vid_path)
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
cap = cv2.VideoCapture(vid_path)
frame_num = 0
results = list()
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, 30):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
img = cap.read()[1]
image = img[:, :, ::-1].copy() image = img[:, :, ::-1].copy()
o = T.ToTensor()(image) o = T.ToTensor()(image)
img = o[None, :, :, :] img = o[None, :, :, :]
@@ -80,6 +69,7 @@ def score_video(vid_in_list):
with torch.no_grad(): with torch.no_grad():
ou = model(img) ou = model(img)
ofscore = None
if len(ou) > 0: if len(ou) > 0:
ofscore = ou[0] ofscore = ou[0]
@@ -87,9 +77,49 @@ def score_video(vid_in_list):
ofscore[k] = ofscore[k].numpy().tolist() ofscore[k] = ofscore[k].numpy().tolist()
ofscore['names'] = [labels[x-1] for x in ofscore['labels']] ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
ofscore['frame_number'] = frame_num
results.append(ofscore)
return ofscore
results = list()
do_video = True
if os.path.exists(img_path):
print(f'Found thumbnail and scoring {img_path}')
img = cv2.imread(img_path)
outsc = score_image(img)
if outsc is not None:
if len(outsc['boxes']) > 0:
print('Found bird in picture, (still doing video scoring)')
outsc['frame_number'] = 'thumbnail'
results.append(outsc)
do_video = True
if do_video:
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1]
if img is None:
break
outsc = score_image(img)
if outsc is not None:
outsc['frame_number'] = frame_num
results.append(outsc)
for i in range(step_frame):
img = cap.read()[1];
if img is None:
break
with open(scores_json,'w') as jj: with open(scores_json,'w') as jj:
json.dump(results, jj, indent=4) json.dump(results, jj, indent=4)

170
score_video_birds.py Normal file
View File

@@ -0,0 +1,170 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
import json
import cv2
import random
from model import Model
import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
step_frame = 15
no_cuda = socket.gethostname() == 'tree'
device='cpu'
model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/birds_only'#0210701_202822.json
with open(model_path + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
num_cat = 319
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
# %%
model.load_state_dict(
torch.load(model_path + '.pth', map_location = torch.device(device))
)
model.eval()
idx_vid = 0
#vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4'
vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4'
vid_path = os.path.abspath(vid_in)
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
scores_json = vid_path.rsplit('.')[0]+'.json'
if os.path.exists(scores_json):
print(f"JSON {scores_json} already exists")
exit()
vid_dir = os.path.dirname(vid_path)
os.system(f'sudo chmod 777 {vid_dir}')
def score_image(img):
image = img[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
with torch.no_grad():
ou = model(img)
ofscore = None
if len(ou) > 0:
ofscore = ou[0]
for k in ofscore:
ofscore[k] = ofscore[k].numpy().tolist()
ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
return ofscore
results = list()
do_video = True
if os.path.exists(img_path):
print(f'Found thumbnail and scoring {img_path}')
img = cv2.imread(img_path)
outsc = score_image(img)
if outsc is not None:
if len(outsc['boxes']) > 0:
print('Found bird in picture, (still doing video scoring)')
outsc['frame_number'] = 'thumbnail'
print(outsc)
results.append(outsc)
# %%
from pprint import pprint
do_video = True
if do_video:
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# for i in range(5808):
# cap.read();
# cap.set(cv2.CAP_PROP_POS_FRAMES, 5808)
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1]
# img = cv2.resize(img, [960,450])
if img is None:
break
outsc = score_image(img)
if outsc is not None:
outsc['frame_number'] = frame_num
pprint(outsc)
results.append(outsc)
for i in range(step_frame):
img = cap.read()[1];
if img is None:
break
# %%
with open(scores_json,'w') as jj:
json.dump(results, jj, indent=4)
# %%
# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

202
score_video_birds_remote.py Normal file
View File

@@ -0,0 +1,202 @@
# %%
from torch.autograd.grad_mode import F
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('K:\Designs\ML\inaturalist_models')
import json
import cv2
import random
from model import Model
import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
step_frame = 15
no_cuda = socket.gethostname() == 'tree'
device = 'cpu'
model_path = r'K:\Designs\ML\inaturalist_models\models\birds_only' #0210701_202822.json
with open(model_path + '.json', 'r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
num_cat = 319
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
# %%
model.load_state_dict(
torch.load(model_path + '.pth', map_location=torch.device(device))
)
model.eval()
model.cuda()
idx_vid = 0
#vid_in = '/home/thebears/Videos/Goodstuff/Pond_Oriole_Robin_2021.07.20_11.36.53_6.mp4'
#vid_in = '/srv/ftp/railing/2021/08/09/Railing_00_20210809083418.mp4'
# vid_in = r'L:\railing\2021\09\21\Railing_00_20210921095155.mp4'
vid_in = r'L:\pond\2021\09\20\Pond_00_20210920134828.mp4'
vid_path = os.path.abspath(vid_in)
img_path = vid_path.rstrip('.mp4') + '.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg')) + 1) + '.jpg'
img_path = '_'.join(img_path_sp)
scores_json = vid_path.rsplit('.')[0] + '.json'
# %%
ttran = T.ToTensor()
results = list()
do_video = True
images = list()
if os.path.exists(img_path):
print(f'Found thumbnail and scoring {img_path}')
tnail_img = cv2.imread(img_path)[:, :, ::-1].copy()
images.append(ttran(tnail_img))
step_frame = 5
if do_video:
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1][:, :, ::-1].copy()
if img is None:
break
images.append(ttran(img))
if len(images) == 16:
imgtensor = t.stack(images).cuda()
with torch.no_grad():
print('Starting to score')
output = model(imgtensor)
print('Finished score')
detached = list()
for f in output:
newd = dict()
for k in ['boxes', 'labels', 'scores']:
newd[k] = f[k].detach().cpu()
detached.append(newd)
print(newd)
results.append(detached)
images.clear()
# %%
imgs = np.moveaxis(np.stack(images), 3, 1)
imgtensor = torch.cuda.FloatTensor(imgs)
with torch.no_grad():
output = model(imgtensor)
# %%
image = img[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
# %%
# %%
cap = cv2.VideoCapture(vid_path)
frame_num = 0
step_frame = 5
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
images = list()
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1].copy()
images.append(img)
if len(images) == 16:
break
# %%
imgs = np.moveaxis(np.stack(images), 3, 1)
bt = torch.FloatTensor(imgs)
# %%
from pprint import pprint
step_frame = 5
images = list()
if do_video:
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
for frame_num in range(0, total_frames, step_frame):
img = cap.read()[1]
if img is None:
break
images.append(img)
# %%
# %%
# %%
with open(scores_json, 'w') as jj:
json.dump(results, jj, indent=4)
# def score_image(img):
# image = img[:, :, ::-1].copy()
# o = T.ToTensor()(image)
# img = o[None, :, :, :]
# with torch.no_grad():
# ou = model(img)
# ofscore = None
# if len(ou) > 0:
# ofscore = ou[0]
# for k in ofscore:
# ofscore[k] = ofscore[k].numpy().tolist()
# ofscore['names'] = [labels[x - 1] for x in ofscore['labels']]
# return ofscore
# %%
# vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

143
score_video_trimmed.py Normal file
View File

@@ -0,0 +1,143 @@
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from collections import defaultdict as ddict
import json
import torch
from torchvision import datasets, transforms as T
import numpy as np
import os
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
import json
import cv2
import random
from model import Model
import socket
from torchvision.utils import draw_bounding_boxes
import torch as t
import matplotlib.pyplot as plt
import matplotlib
#vid_path = '/srv/ftp/hummingbird/2021/06/27/Hummingbird_01_20210627101803.mp4'
vid_path = '/srv/ftp/hummingbird/2021/07/25/Hummingbird_01_20210725065610_trimmed.mp4'
no_cuda = socket.gethostname() == 'tree'
device='cpu'
model_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/models/hummingbird'#0210701_202822.json
with open(model_path + '.json','r') as nmj:
model_json = json.load(nmj)
cats = model_json['categories']
cats.sort(key=lambda x: x['new_id'])
num_cat = len(cats) + 1
model_type = model_json['model_type']
model = Model(num_cat, model_type)
labels = [x['name'] for x in cats]
model.load_state_dict(
torch.load(model_path + '.pth', map_location = torch.device(device))
)
model.eval()
vid_dir = os.path.dirname(vid_path)
os.system(f'sudo chmod 777 {vid_dir}')
def score_image(img):
use_this = None
if isinstance(img, list):
use_this = list()
for y in img:
image = y[:, :, ::-1].copy()
o = T.ToTensor()(image)
y = o[:, :, :]
use_this.append(y)
else:
image = img[:, :, ::-1].copy()
o = T.ToTensor()(image)
img = o[None, :, :, :]
use_this = img
with torch.no_grad():
ou = model(use_this)
ofscore = None
if len(ou) > 0:
ofscore = ou[0]
for k in ofscore:
ofscore[k] = ofscore[k].numpy().tolist()
ofscore['names'] = [labels[x-1] for x in ofscore['labels']]
return ofscore
cap = cv2.VideoCapture(vid_path)
frame_num = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# %%
imgs = list()
results = list()
for frame_num in range(0, total_frames, 15):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
img = cap.read()[1]
imgs.append(img)
# outsc = score_image(img)
# if outsc is not None:
# outsc['frame_number'] = frame_num
# results.append(outsc)
# %%
# import time
# import cv2
# video = cv2.VideoCapture(vid_path)
# total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# # %%
# st = time.time()
# while True:
# ret, read = video.read()
# if not ret:
# break
# et = time.time()
# print(et-st)
# st = time.time()
# frs = list()
# for i in range(0,total_frames, 150):
# video.set(cv2.CAP_PROP_POS_FRAMES, i)
# ret, frame = video.read()
# frs.append(frame)
# et = time.time()
# print(et-st)

View File

@@ -0,0 +1,126 @@
# %%
from engine import train_one_epoch, evaluate
from model import Model
from data import iNaturalistDataset
import torch
import os
import datetime as dt
import json
import utils
import pandas as pd
import sys
if not os.path.exists("models/"):
os.mkdir("models")
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
default_model_path = default_model_root + ".pth"
default_model_info = default_model_root + ".json"
default_state_path = default_model_root + ".oth"
#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
df = pd.read_csv(csv_path)
species_list = set(list(df[df['count']>1000]['species']))
# %%
#model_type = "fasterrcnn_mobilenet_v3_large_fpn"
#batch_size = 16
model_type = 'fasterrcnn_resnet50_fpn'
batch_size = 8
def run(model_name = None, epoch_start = 0):
val_dataset = iNaturalistDataset(
validation=True,
species=species_list,
)
train_dataset = iNaturalistDataset(
train=True,
species=species_list,
)
if model_name is None:
fresh_start = True
model_info = default_model_info
model_path = default_model_path
state_path = default_state_path
else:
fresh_start = False
model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
model_path = model_info.rstrip('.json')+'.pth'
state_path = model_info.rstrip('.json')+'.oth'
breakpoint()
if fresh_start:
with open(model_info, "w") as js_p:
json.dump(
{"categories": train_dataset.categories, "model_type": model_type},
js_p,
default=str,
indent=4,
)
train_data_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=10,
collate_fn=utils.collate_fn,
)
val_data_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=10,
collate_fn=utils.collate_fn,
)
num_classes = train_dataset.num_classes
model = Model(num_classes, model_type)
model.to(device)
if not fresh_start:
model.load_state_dict(
torch.load(model_path, map_location = torch.device(device))
)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
if os.path.exists(state_path):
optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 10 - epoch_start
for epoch in range(num_epochs):
train_one_epoch(
model, optimizer, train_data_loader, device, epoch, print_freq=10 )
lr_scheduler.step()
torch.save(model.state_dict(), model_path)
torch.save(optimizer.state_dict(), state_path)
evaluate(model, val_data_loader, device=device)
if __name__ == "__main__":
if len(sys.argv) == 3:
model_name = sys.argv[1]
epoch_start = int(sys.argv[2])
run(model_name = model_name, epoch_start = epoch_start)
else:
run()
# run()

View File

@@ -0,0 +1,139 @@
# %%
from engine import train_one_epoch, evaluate
from model import Model
from data import iNaturalistDataset
import torch
import os
import datetime as dt
import json
import utils
import pandas as pd
import sys
if not os.path.exists("models/"):
os.mkdir("models")
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
default_model_path = default_model_root + ".pth"
default_model_info = default_model_root + ".json"
default_state_path = default_model_root + ".oth"
default_sched_path = default_model_root + ".sth"
#species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
df = pd.read_csv(csv_path)
species_list = set(list(df[df['count']>1000]['species']))
#model_type = "fasterrcnn_mobilenet_v3_large_fpn"
#batch_size = 16
model_type = 'fasterrcnn_resnet50_fpn'
batch_size = 8
num_epochs = 10
def run(model_name = None, epoch_start = 0):
val_dataset = iNaturalistDataset(
validation=True,
species=species_list,
)
train_dataset = iNaturalistDataset(
train=True,
species=species_list,
)
print(len(val_dataset.categories))
print(len(train_dataset.categories))
if model_name is None:
fresh_start = True
model_info = default_model_info
model_path = default_model_path
state_path = default_state_path
sched_path = default_sched_path
else:
fresh_start = False
model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
model_path = model_info.rstrip('.json')+'.pth'
state_path = model_info.rstrip('.json')+'.oth'
sched_path = model_info.rstrip('.json')+'.sth'
if fresh_start:
with open(model_info, "w") as js_p:
json.dump(
{"categories": train_dataset.categories, "model_type": model_type},
js_p,
default=str,
indent=4,
)
else:
print('Continuing run')
train_data_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=10,
collate_fn=utils.collate_fn,
)
val_data_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=10,
collate_fn=utils.collate_fn,
)
num_classes = len(train_dataset.categories) + 1
model = Model(num_classes, model_type)
model.to(device)
if not fresh_start:
print('Loading state dict')
model.load_state_dict(
torch.load(model_path, map_location = torch.device(device))
)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
if os.path.exists(state_path):
print('Loading optimizer')
optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
if os.path.exists(sched_path):
print('Loading scheduler')
lr_scheduler.load_state_dict(torch.load(sched_path, map_location = torch.device(device)))
for epoch in range(epoch_start, num_epochs):
print('Epoch '+str(epoch))
train_one_epoch(
model, optimizer, train_data_loader, device, epoch, print_freq=10 )
lr_scheduler.step()
torch.save(model.state_dict(), model_path)
torch.save(optimizer.state_dict(), state_path)
torch.save(lr_scheduler.state_dict(), sched_path)
evaluate(model, val_data_loader, device=device)
if __name__ == "__main__":
if len(sys.argv) == 3:
model_name = sys.argv[1]
epoch_start = int(sys.argv[2])
run(model_name = model_name, epoch_start = epoch_start)
else:
run()
# run()

View File

@@ -7,7 +7,8 @@ import os
import datetime as dt import datetime as dt
import json import json
import utils import utils
import pandas as pd
import sys
if not os.path.exists("models/"): if not os.path.exists("models/"):
os.mkdir("models") os.mkdir("models")
@@ -16,16 +17,27 @@ if torch.cuda.is_available():
else: else:
device = torch.device("cpu") device = torch.device("cpu")
model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S") default_model_root = "models/" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = model_root + ".pth" default_model_path = default_model_root + ".pth"
model_info = model_root + ".json" default_model_info = default_model_root + ".json"
default_state_path = default_model_root + ".oth"
species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"]) #species_list = set(["Poecile atricapillus", "Archilochus colubris", "Icterus galbula"])
model_type = "fasterrcnn_mobilenet_v3_large_fpn"
csv_path = '/home/thebears/Seafile/Designs/ML/inaturalist_models/species_occurence.csv'
df = pd.read_csv(csv_path)
species_list = set(list(df[df['count']>1000]['species']))
#model_type = "fasterrcnn_mobilenet_v3_large_fpn"
#batch_size = 16
model_type = 'fasterrcnn_resnet50_fpn'
batch_size = 8
def run():
def run(model_name = None, epoch_start = 0):
val_dataset = iNaturalistDataset( val_dataset = iNaturalistDataset(
validation=True, validation=True,
species=species_list, species=species_list,
@@ -35,27 +47,41 @@ def run():
species=species_list, species=species_list,
) )
with open(model_info, "w") as js_p:
json.dump( if model_name is None:
{"categories": train_dataset.categories, "model_type": model_type}, fresh_start = True
js_p, model_info = default_model_info
default=str, model_path = default_model_path
indent=4, state_path = default_state_path
) else:
fresh_start = False
model_info = model_name.rstrip('.pth').rstrip('.json')+'.json'
model_path = model_info.rstrip('.json')+'.pth'
state_path = model_info.rstrip('.json')+'.oth'
if fresh_start:
with open(model_info, "w") as js_p:
json.dump(
{"categories": train_dataset.categories, "model_type": model_type},
js_p,
default=str,
indent=4,
)
train_data_loader = torch.utils.data.DataLoader( train_data_loader = torch.utils.data.DataLoader(
train_dataset, train_dataset,
batch_size=16, batch_size=batch_size,
shuffle=True, shuffle=True,
num_workers=4, num_workers=10,
collate_fn=utils.collate_fn, collate_fn=utils.collate_fn,
) )
val_data_loader = torch.utils.data.DataLoader( val_data_loader = torch.utils.data.DataLoader(
val_dataset, val_dataset,
batch_size=16, batch_size=batch_size,
shuffle=True, shuffle=True,
num_workers=4, num_workers=10,
collate_fn=utils.collate_fn, collate_fn=utils.collate_fn,
) )
@@ -63,20 +89,38 @@ def run():
model = Model(num_classes, model_type) model = Model(num_classes, model_type)
model.to(device) model.to(device)
if not fresh_start:
model.load_state_dict(
torch.load(model_path, map_location = torch.device(device))
)
params = [p for p in model.parameters() if p.requires_grad] params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
if os.path.exists(state_path):
optimizer.load_state_dict(torch.load(state_path, map_location = torch.device(device)))
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 10 num_epochs = 10
for epoch in range(num_epochs):
for epoch in range(epoch_start, num_epochs):
train_one_epoch( train_one_epoch(
model, optimizer, train_data_loader, device, epoch, print_freq=10 model, optimizer, train_data_loader, device, epoch, print_freq=10 )
)
lr_scheduler.step() lr_scheduler.step()
torch.save(model.state_dict(), model_path) torch.save(model.state_dict(), model_path)
torch.save(optimizer.state_dict(), state_path)
evaluate(model, val_data_loader, device=device) evaluate(model, val_data_loader, device=device)
if __name__ == "__main__": if __name__ == "__main__":
run() if len(sys.argv) == 3:
model_name = sys.argv[1]
epoch_start = int(sys.argv[2])
run(model_name = model_name, epoch_start = epoch_start)
else:
run()
# run()

42
trim_in_directory.py Normal file
View File

@@ -0,0 +1,42 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
from trim_video import trim_video
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = sys.argv[1]
have_json = set()
fnames = set()
for di,_, fns in os.walk(rtpath):
for fn in fns:
if fn.endswith('.json') and 'trimmed' not in fn:
have_json.add(os.path.join(di,fn))
files_to_score = have_json
def try_catch_chunk(jsons):
try:
if not isinstance(jsons, list):
jsons = [jsons]
for x in jsons:
trim_video(x)
except Exception as e:
print(e)
lst = list(files_to_score)
n = 25
chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
# %%
if __name__ == '__main__':
with Pool(8) as p:
output = p.map(try_catch_chunk,chunks)
# output = p.map(score_video,chunks)

View File

@@ -0,0 +1,113 @@
from ffprobe import FFProbe
import ffmpeg
import shutil
import json
import math
import os
def execute_trim_video(js_path, start_frame, end_frame, empty_video = False):
with open(js_path,'r') as jj:
data = json.load(jj)
movie_dir = os.path.dirname(js_path)
archive_dir_p = movie_dir
archive_dir_p = archive_dir_p.split('/')
archive_dir_p.insert(-3,'originals')
archive_dir = '/'.join(archive_dir_p) + '/'
if not os.path.exists(archive_dir):
os.makedirs(archive_dir)
rt_name = os.path.splitext(js_path)[0]
movie_path = rt_name+'.mp4'
movie_new_path = rt_name+'_trimmed.mp4'
js_new_path = rt_name + '_trimmed.json'
frame_pad = 5 #seconds
vid_info = FFProbe(movie_path)
vid_stream = vid_info.video[0]
framerate = vid_stream.framerate
duration = float(vid_stream.duration)
max_frames = duration * framerate
start_frame = max(0, start_frame - frame_pad * framerate)
end_frame = min( max_frames, end_frame + frame_pad * framerate)
start_time = start_frame / framerate
end_time = end_frame / framerate
to_keep = list()
for x in data:
json_frame_num = x['frame_number']
if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
to_keep.append(x)
elif json_frame_num >=start_frame and json_frame_num <= end_frame:
x['frame_number_original'] = x['frame_number']
x['frame_number'] -= start_frame
to_keep.append(x)
trim_duration = end_time - start_time
if os.path.exists(movie_new_path):
os.remove(movie_new_path)
if not empty_video:
cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}'
return_code = os.system(cmd)
with open(js_new_path,'w') as jnp:
json.dump(to_keep, jnp, indent=4)
if empty_video or return_code == 0:
os.remove(movie_path)
os.remove(js_path)
#shutil.move(movie_path, archive_dir)
#shutil.move(js_path ,archive_dir)
def trim_video(jspath):
with open(jspath,'r') as jj:
data = json.load(jj)
start_frame = math.inf
end_frame = -math.inf
skip_trimming = False
empty_video = True
for x in data:
if len(x['boxes']) > 0:
json_frame_num = x['frame_number']
if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
skip_trimming = True
elif max(x['scores']) > 0.25:
start_frame = min(start_frame, json_frame_num)
end_frame = max(end_frame, json_frame_num)
empty_video = False
if not skip_trimming:
execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video)

106
trim_video.py Normal file
View File

@@ -0,0 +1,106 @@
from ffprobe import FFProbe
import ffmpeg
import shutil
import json
import math
import os
def execute_trim_video(js_path, start_frame, end_frame, empty_video = False):
with open(js_path,'r') as jj:
data = json.load(jj)
movie_dir = os.path.dirname(js_path)
archive_dir = os.path.join(movie_dir, 'original') + '/'
if not os.path.exists(archive_dir):
os.mkdir(archive_dir)
rt_name = os.path.splitext(js_path)[0]
movie_path = rt_name+'.mp4'
movie_new_path = rt_name+'_trimmed.mp4'
js_new_path = rt_name + '_trimmed.json'
frame_pad = 5 #seconds
vid_info = FFProbe(movie_path)
vid_stream = vid_info.video[0]
framerate = vid_stream.framerate
duration = float(vid_stream.duration)
max_frames = duration * framerate
start_frame = max(0, start_frame - frame_pad * framerate)
end_frame = min( max_frames, end_frame + frame_pad * framerate)
start_time = start_frame / framerate
end_time = end_frame / framerate
to_keep = list()
for x in data:
json_frame_num = x['frame_number']
if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
to_keep.append(x)
elif json_frame_num >=start_frame and json_frame_num <= end_frame:
x['frame_number_original'] = x['frame_number']
x['frame_number'] -= start_frame
to_keep.append(x)
trim_duration = end_time - start_time
if os.path.exists(movie_new_path):
os.remove(movie_new_path)
if not empty_video:
cmd = f'ffmpeg -ss {start_time} -i {movie_path} -ss 0 -t {trim_duration} -c copy -map 0 {movie_new_path}'
return_code = os.system(cmd)
with open(js_new_path,'w') as jnp:
json.dump(to_keep, jnp, indent=4)
if empty_video or return_code == 0:
shutil.move(movie_path, archive_dir)
shutil.move(js_path ,archive_dir)
def trim_video(jspath):
with open(jspath,'r') as jj:
data = json.load(jj)
start_frame = math.inf
end_frame = -math.inf
skip_trimming = False
empty_video = True
for x in data:
if len(x['boxes']) > 0:
json_frame_num = x['frame_number']
if isinstance(json_frame_num, str) and json_frame_num == 'thumbnail':
skip_trimming = True
elif max(x['scores']) > 0.05:
start_frame = min(start_frame, json_frame_num)
end_frame = max(end_frame, json_frame_num)
empty_video = False
if not skip_trimming:
execute_trim_video(jspath, start_frame, end_frame, empty_video = empty_video)

58
undo_move_originals.py Normal file
View File

@@ -0,0 +1,58 @@
import os
import random
from multiprocessing import Pool
import sys
sys.path.append('/home/thebears/Seafile/Designs/ML')
#rtpath = '/srv/ftp/hummingbird/2021'
rtpath = os.path.abspath(sys.argv[1])
#rtpath = os.path.abspath('/srv/ftp/hummingbird/2021/07/')
didir = list()
for di,dnames, fns in os.walk(rtpath):
numbers = di.split('/')[-3:]
if all([n.isnumeric() for n in numbers]):
didir.append(di)
def list_files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path, file)):
yield file
import shutil
import os
for cdr in didir:
files_origin = list()
for di, dnames, fns in os.walk(cdr):
if di == cdr:
pass
else:
files_origin.extend([os.path.join(di,f) for f in fns])
for src_file in files_origin:
fname = os.path.basename(src_file)
targ_file = os.path.join(cdr, fname)
os.rename(src_file, targ_file)
dirs_purge = list()
for di, dnames, fns in os.walk(cdr):
for d in dnames:
cpath = os.path.join(di, d)
fna = [x for x in list_files(cpath)]
if len(fna) == 0:
dirs_purge.append(cpath)
for d in dirs_purge:
if os.path.exists(d):
shutil.rmtree(d)

View File

@@ -0,0 +1,322 @@
from collections import defaultdict, deque
import datetime
import errno
import os
import sys
import time
import torch
import torch.distributed as dist
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
if not is_dist_avail_and_initialized():
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
dist.barrier()
dist.all_reduce(t)
t = t.tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value,
)
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
data_list = [None] * world_size
dist.all_gather_object(data_list, data)
return data_list
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.all_reduce(values)
if average:
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError(
"'{}' object has no attribute '{}'".format(type(self).__name__, attr)
)
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append("{}: {}".format(name, str(meter)))
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ""
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt="{avg:.4f}")
data_time = SmoothedValue(fmt="{avg:.4f}")
space_fmt = ":" + str(len(str(len(iterable)))) + "d"
if torch.cuda.is_available():
log_msg = self.delimiter.join(
[
header,
"[{0" + space_fmt + "}/{1}]",
"eta: {eta}",
"{meters}",
"time: {time}",
"data: {data}",
"max mem: {memory:.0f}",
]
)
else:
log_msg = self.delimiter.join(
[
header,
"[{0" + space_fmt + "}/{1}]",
"eta: {eta}",
"{meters}",
"time: {time}",
"data: {data}",
]
)
MB = 1024.0 * 1024.0
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0 or i == len(iterable) - 1:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available():
print(
log_msg.format(
i,
len(iterable),
eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB,
)
)
else:
print(
log_msg.format(
i,
len(iterable),
eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
)
)
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print(
"{} Total time: {} ({:.4f} s / it)".format(
header, total_time_str, total_time / len(iterable)
)
)
def collate_fn(batch):
return tuple(zip(*batch))
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
def f(x):
if x >= warmup_iters:
return 1
alpha = float(x) / warmup_iters
return warmup_factor * (1 - alpha) + alpha
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
def mkdir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop("force", False)
if is_master or force or True:
builtin_print(*args, **kwargs)
__builtin__.print = print
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def save_on_master(*args, **kwargs):
if is_main_process():
torch.save(*args, **kwargs)
def init_distributed_mode(args):
if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ["LOCAL_RANK"])
elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count()
else:
print("Not using distributed mode")
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = "nccl"
print(
"| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True
)
torch.distributed.init_process_group(
backend=args.dist_backend,
init_method=args.dist_url,
world_size=args.world_size,
rank=args.rank,
)
torch.distributed.barrier()
setup_for_distributed(args.rank == 0)

View File

@@ -256,8 +256,8 @@ def setup_for_distributed(is_master):
def print(*args, **kwargs): def print(*args, **kwargs):
force = kwargs.pop("force", False) force = kwargs.pop("force", False)
if is_master or force: if is_master or force or True:
builtin_print(*args, **kwargs) print(*args, **kwargs)
__builtin__.print = print __builtin__.print = print