#!/usr/bin/python import glob import argparse import os import json from CommonCode.util import exit_if_not_ipython, get_cset_for_file_matching, is_ipython from CommonCode import kwq from CommonCode.settings import get_logger, LogColorize from kafka import TopicPartition from kafka.structs import OffsetAndMetadata from pprint import pprint pfm = LogColorize.remove_without_nuggets logger = get_logger(__name__,'/var/log/ml_vision_logs/02_remove_without_nuggets', stdout=True, systemd=False) input_topic = kwq.TOPICS.videos_scored_detection client_id='obj_detector_remove_without_nuggets' group_id = client_id consumer = kwq.create_consumer(input_topic, group_id = group_id, client_id = client_id) c_part = TopicPartition(input_topic, 0) consumer.assign([c_part]) producer = kwq.producer class RESULT_TYPE(): NOTHING = 0 HAS_OBJS = 1 KEPT = 2 REMOVED = 3 NO_JSON = 4 def get_ok_to_delete(file_path): max_look = 5 c_path = file_path settings_default = {'class_threshold':10000, 'frames_with_dets_threshold': -10000} for i in range(max_look): c_path = os.path.abspath(os.path.join(c_path, '..')) test_path = os.path.join(c_path, 'settings.json') if os.path.exists(test_path): with open(test_path, 'r') as rr: settings = json.load(rr) settings_default.update(settings) return settings_default def exec_file_remove_logic(file_path): result = RESULT_TYPE.NOTHING cset = get_cset_for_file_matching(file_path) settings = get_ok_to_delete(cset['.mp4']) logger.info(f"EVALUATING LOGIC WITH SETTINGS: "+str(settings)) if settings['class_threshold'] > 1: logger.info(f"THRESHOLD SET TO ABOVE 1, SKIPPING: {file_path}") result = RESULT_TYPE.HAS_OBJS return logger.info(f"EXECUTING_LOGIC :{file_path}") json_check = '.json.orin' if '.has_objs' in cset: logger.info(f"HAS_OBJS :{file_path}") result = RESULT_TYPE.HAS_OBJS return result if json_check in cset and os.path.exists(cset[ json_check]): logger.info(f"HAS_JSON :{file_path}") if os.path.getsize(cset[json_check]) == 0: det_data = {'scores':[]} else: with open(cset[json_check],'r') as jj: det_data = json.load(jj) thresh = settings['class_threshold'] frames_with_dets_thresh = settings['frames_with_dets_threshold'] all_scores = list() for x in det_data['scores']: sc_in = [y['score'] for y in x['detections']] sc_in.append(0) all_scores.append(max(sc_in)) frames_above_thresh = sum([x>thresh for x in all_scores ]) num_frames = len(det_data['scores']) ratio_with_frames = frames_above_thresh / (num_frames + 1) if ratio_with_frames > frames_with_dets_thresh: cpath = os.path.splitext(cset['.mp4'])[0] + '.has_objs.orin' with open(cpath,'w') as cc: pass logger.info(f"HAS_OBJECTS_DETECTED_WILL_NOT_REMOVE :{pfm(file_path)}") result = RESULT_TYPE.KEPT else: logger.info(f"OBJECTS_NOT_DETECTED_WILL_REMOVE :{pfm(file_path)}") for ff, cf in cset.items(): if cf.endswith(json_check) or cf.endswith('.jpg'): pass else: logger.info(f"REMOVING :{cf}") os.remove(cf) result = RESULT_TYPE.REMOVED else: result = RESULT_TYPE.NO_JSON logger.info(f"NO_JSON_DONT_REMOVE :{pfm(file_path)}") return result for msg in consumer: key = msg.key value = msg.value purge_reason = exec_file_remove_logic(value['filepath']) d_send = {'value':msg.value, 'key':msg.key} if purge_reason in [RESULT_TYPE.KEPT, RESULT_TYPE.HAS_OBJS]: producer.send(kwq.TOPICS.videos_with_nuggets, **d_send) elif purge_reason == RESULT_TYPE.REMOVED: producer.send(kwq.TOPICS.videos_without_nuggets, **d_send) elif purge_reason == RESULT_TYPE.NO_JSON: producer.send(kwq.TOPICS.videos_no_json, **d_send) # %%