Files
inaturalist_pytorch_model/purge_videos (SFConflict ispatel@live.com 2021-08-11-09-24-52).py
2021-09-27 16:02:11 -04:00

60 lines
1.4 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
#rtdir = '/srv/ftp/hummingbird/2021/07/21'
rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
fcontents[f] = json.load(ff)
to_purge = dict()
for fname, contents in fcontents.items():
if len(contents) > 0:
nscores = 0
for x in contents:
if len(x['scores']) > 0:
nscores+= max(x['scores']) >= 0.6
# nscores = sum([max(x['scores']) for x in contents])
# nscores = sum([len(x['scores']) for x in contents])
if nscores == 0:
to_purge[fname] = nscores
delete_list = set()
for f_json, n_scores in to_purge.items():
vid_path = f_json.rstrip('.json') + '.mp4'
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
if 'trimmed' in img_path:
img_path_sp[-2] = str(int(img_path_sp[-2].rstrip('.jpg'))+1)
else:
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
delete_list.update({vid_path,img_path, f_json})
print(f'Deleting {len(delete_list)} files')
for f in delete_list:
if os.path.exists(f):
# pass
os.remove(f)