Files
inaturalist_pytorch_model/purge_videos (SFConflict ispatel@live.com 2021-08-11-09-24-52).py
2021-09-27 16:02:11 -04:00

60 lines
1.4 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
#rtdir = '/srv/ftp/hummingbird/2021/07/21'
rtdir = os.getcwd()
files_read = list()
for cdir, _, files in os.walk(rtdir):
for f in files:
if f.endswith('.json'):
files_read.append(os.path.join(cdir, f))
import json
fcontents = dict()
for f in files_read:
with open(f) as ff:
fcontents[f] = json.load(ff)
to_purge = dict()
for fname, contents in fcontents.items():
if len(contents) > 0:
nscores = 0
for x in contents:
if len(x['scores']) > 0:
nscores+= max(x['scores']) >= 0.6
# nscores = sum([max(x['scores']) for x in contents])
# nscores = sum([len(x['scores']) for x in contents])
if nscores == 0:
to_purge[fname] = nscores
delete_list = set()
for f_json, n_scores in to_purge.items():
vid_path = f_json.rstrip('.json') + '.mp4'
img_path = vid_path.rstrip('.mp4')+'.jpg'
if not os.path.exists(img_path):
img_path_sp = img_path.split('_')
if 'trimmed' in img_path:
img_path_sp[-2] = str(int(img_path_sp[-2].rstrip('.jpg'))+1)
else:
img_path_sp[-1] = str(int(img_path_sp[-1].rstrip('.jpg'))+1) + '.jpg'
img_path = '_'.join(img_path_sp)
delete_list.update({vid_path,img_path, f_json})
print(f'Deleting {len(delete_list)} files')
for f in delete_list:
if os.path.exists(f):
# pass
os.remove(f)