YACWC

2025-10-04 11:45:45 -04:00
parent 028149b786
commit 36b7784b4e
11 changed files with 281 additions and 73 deletions
--- a/orin/01_do_obj_det
+++ b/orin/01_do_obj_det
--- a/orin/02_remove_without_nuggets/remove_without_nuggets.py
+++ b/orin/02_remove_without_nuggets/remove_without_nuggets.py
@@ -0,0 +1,132 @@
+#!/usr/bin/python
+import glob
+import argparse
+import os
+import json
+from CommonCode.util import exit_if_not_ipython, get_cset_for_file_matching, is_ipython
+from CommonCode import kwq
+from CommonCode.settings import get_logger, LogColorize
+from kafka import TopicPartition
+from kafka.structs import OffsetAndMetadata
+from pprint import pprint
+
+pfm = LogColorize.remove_without_nuggets
+logger = get_logger(__name__,'/var/log/ml_vision_logs/02_remove_without_nuggets', stdout=True, systemd=False)
+
+input_topic = kwq.TOPICS.videos_scored_detection
+
+client_id='obj_detector_remove_without_nuggets'
+group_id = client_id
+
+consumer = kwq.create_consumer(input_topic, group_id = group_id, client_id = client_id)
+c_part = TopicPartition(input_topic, 0)
+consumer.assign([c_part])
+
+producer = kwq.producer
+
+
+class RESULT_TYPE():
+    NOTHING = 0
+    HAS_OBJS = 1
+    KEPT = 2
+    REMOVED = 3
+    NO_JSON = 4
+
+
+def get_ok_to_delete(file_path):
+    max_look = 5
+    c_path = file_path
+    settings_default = {'class_threshold':10000, 'frames_with_dets_threshold': -10000}
+    for i in range(max_look):
+        c_path  = os.path.abspath(os.path.join(c_path, '..'))
+        test_path = os.path.join(c_path, 'settings.json')
+        if os.path.exists(test_path):
+            with open(test_path, 'r') as rr:
+                settings = json.load(rr)
+                settings_default.update(settings)
+
+    return settings_default
+
+def exec_file_remove_logic(file_path):
+    result = RESULT_TYPE.NOTHING
+    cset = get_cset_for_file_matching(file_path)
+
+    settings = get_ok_to_delete(cset['.mp4'])
+    logger.info(f"EVALUATING LOGIC WITH SETTINGS: "+str(settings))
+    if settings['class_threshold'] > 1:
+        logger.info(f"THRESHOLD SET TO ABOVE 1, SKIPPING: {file_path}")
+        result = RESULT_TYPE.HAS_OBJS
+        return
+    
+    logger.info(f"EXECUTING_LOGIC :{file_path}")
+    json_check = '.json.orin'
+    if '.has_objs' in cset:
+        logger.info(f"HAS_OBJS :{file_path}")
+        result = RESULT_TYPE.HAS_OBJS
+        return result
+
+    
+    if json_check in cset and os.path.exists(cset[ json_check]):
+        logger.info(f"HAS_JSON :{file_path}")    
+        if os.path.getsize(cset[json_check]) == 0:
+            det_data = {'scores':[]}
+        else:
+            with open(cset[json_check],'r') as jj:
+                det_data = json.load(jj)
+
+        thresh = settings['class_threshold']
+        frames_with_dets_thresh = settings['frames_with_dets_threshold']
+
+        all_scores = list()
+        for x in det_data['scores']:
+            sc_in = [y['score'] for y in x['detections']]
+            sc_in.append(0)
+            all_scores.append(max(sc_in))
+        
+        frames_above_thresh = sum([x>thresh for x in all_scores ])
+        num_frames = len(det_data['scores'])
+
+        ratio_with_frames = frames_above_thresh / (num_frames + 1)
+        if ratio_with_frames > frames_with_dets_thresh:
+            cpath = os.path.splitext(cset['.mp4'])[0] + '.has_objs.orin'
+            with open(cpath,'w') as cc:
+                pass
+
+
+            logger.info(f"HAS_OBJECTS_DETECTED_WILL_NOT_REMOVE :{pfm(file_path)}")
+            result = RESULT_TYPE.KEPT
+
+        else:
+            logger.info(f"OBJECTS_NOT_DETECTED_WILL_REMOVE :{pfm(file_path)}")
+            for ff, cf in cset.items():
+                if cf.endswith(json_check) or cf.endswith('.jpg'):
+                    pass
+                else:
+                    logger.info(f"REMOVING :{cf}")
+                    os.remove(cf)
+
+            result = RESULT_TYPE.REMOVED
+
+    else:
+        result = RESULT_TYPE.NO_JSON
+        logger.info(f"NO_JSON_DONT_REMOVE :{pfm(file_path)}")
+
+ 
+    return result
+
+
+for msg in consumer:
+    key = msg.key
+    value = msg.value
+
+    purge_reason = exec_file_remove_logic(value['filepath'])
+    d_send = {'value':msg.value, 'key':msg.key}
+    if purge_reason in [RESULT_TYPE.KEPT, RESULT_TYPE.HAS_OBJS]:
+        producer.send(kwq.TOPICS.videos_with_nuggets, **d_send)
+    elif purge_reason == RESULT_TYPE.REMOVED:
+        producer.send(kwq.TOPICS.videos_without_nuggets, **d_send)
+    elif purge_reason == RESULT_TYPE.NO_JSON:
+        producer.send(kwq.TOPICS.videos_no_json, **d_send)
+    
+# %%
+ 
--- a/orin/03_put_into_vectordb/put_into_vectordb.py
+++ b/orin/03_put_into_vectordb/put_into_vectordb.py
@@ -0,0 +1,115 @@
+import glob
+from pymilvus import MilvusClient
+from pymilvus.client.types import LoadState
+
+import argparse
+import os
+import json
+from CommonCode.util import exit_if_not_ipython, get_cset_for_file_matching, is_ipython
+from CommonCode import kwq
+from CommonCode.settings import get_logger, LogColorize
+from kafka import TopicPartition
+from kafka.structs import OffsetAndMetadata
+from pprint import pprint
+import numpy as np
+pfm = LogColorize.embeds_in_db
+logger = get_logger(__name__,'/var/log/ml_vision_logs/03_put_into_vectordb', stdout=True, systemd=False)
+
+input_topic = kwq.TOPICS.videos_with_nuggets
+
+client_id='embedding_place_in_db_1'
+group_id = client_id
+
+consumer = kwq.create_consumer(input_topic, group_id = group_id, client_id = client_id)
+c_part = TopicPartition(input_topic, 0)
+consumer.assign([c_part])
+
+
+producer = kwq.producer
+model_type = 'ViT-L-16-SigLIP2-512'
+
+def get_db_embed_done_path(vpath):
+    return os.path.splitext(vpath)[0]+'.oclip.orin.indb'
+
+
+#video_file_to_upload='/srv/ftp/ptz/2025/04/14/PTZBackRight_00_20250414063817.mp4'
+
+
+def get_vec_path(vpath):
+    return os.path.splitext(vpath)[0]+'.oclip.orin'
+
+def get_date(vpath):
+    split_entries = os.path.splitext(vpath)[0].split('/')
+    return ''.join(split_entries[-4:-1])
+
+def get_camera_name(vpath):
+    split_entries = os.path.splitext(vpath)[0].split('/')
+    return split_entries[split_entries.index('ftp')+1]
+
+def upload_vector_file(video_file_to_upload):
+    client = MilvusClient(
+        uri="http://localhost:19530"
+    )
+
+    db_done_path = get_db_embed_done_path(video_file_to_upload)
+    if os.path.exists(db_done_path):
+        print('Already exists in DB, skipping upload')
+#        return
+
+    video_file_to_upload = get_vec_path(video_file_to_upload)
+
+    with open(video_file_to_upload,'r') as jj:
+        vecs = json.load(jj)
+
+
+    embeds = [x['score'] for x in vecs['scores']]
+    fr_nums =  [x['frame'] for x in vecs['scores']]
+
+    fname_root = video_file_to_upload.rsplit('/',1)[-1].split('.')[0]
+    fc = fname_root.split('_')[-1]
+
+#    data = list()
+    filepath = video_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0]
+
+    data_v2 = list()
+    date = get_date(filepath)
+    for embed, frame_num in zip(embeds, fr_nums):
+        fg = '{0:05g}'.format(frame_num)
+        id_num = int(fc+fg)
+        embeds_as_np = np.asarray(embed, dtype=np.float16)
+        to_put_2 = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embeds_as_np, date=str(date))
+        data_v2.append(to_put_2)
+
+    cam_name = get_camera_name(video_file_to_upload)
+    client.insert(collection_name = f'nuggets_{cam_name}_so400m_siglip2', data=data_v2)
+    client.close()
+
+    
+    with open(db_done_path,'w') as ff:
+        ff.write("")
+        
+    print(f'Inserting into DB, {video_file_to_upload}')
+
+
+
+for msg in consumer:
+    key = msg.key
+    value = msg.value
+    file_path = value['filepath']
+    success = False
+    try:
+        upload_vector_file(value['filepath'])
+        success = True
+        logger.info(f"SUCCESS_UPLOADING :{pfm(file_path)}")
+    except Exception as e:
+        logger.info(f"ERROR_UPLOADING :{pfm(file_path)} + {e}")
+
+    d_send = {'value':msg.value, 'key':msg.key}
+
+    if success:
+        send_topic = kwq.TOPICS.videos_embedding_in_db
+    else:
+        send_topic = kwq.TOPICS.videos_embedding_in_db_fail
+        
+
+    producer.send(send_topic, **d_send)
--- a/orin/old_01_do_obj_det
+++ b/orin/old_01_do_obj_det