# %% import sys, os sys.path.append("/home/thebears/Web/Nuggets/SearchInterface/SearchUtil") sys.path.append("/home/thebears/Web/Nuggets/SearchInterface/VectorService/util") import embed_scores as ES # %% query = 'Cat and human' c_dir = '/srv/ftp_tcc/leopards1/2025/09/08' threshold=0.10 results = ES.calculate_embedding_score_in_folder(c_dir, threshold, query) print(len(results['videos'])) # %% c_dir = '/srv/ftp_tcc/leopards1/2025/09/08' query_vector = None og_dir = c_dir if query_vector is None: query_vector = ES.get_query_vector(query) candidate_dirs = list() candidate_dirs.append(og_dir) candidate_dirs.append(og_dir.replace('/srv/ftp_tcc','/mnt/hdd_24tb_1/videos/ftp')) candidate_dirs.append(og_dir.replace('/srv/ftp','/mnt/hdd_24tb_1/videos/ftp')) c_dir = None for candidate in candidate_dirs: if os.path.exists(candidate): c_dir = candidate break if c_dir is None: # return [] pass from embed_scores import * redis_key = 'helllo' vec_cache_str = md5(query_vector).hexdigest() cache_file_loc = os.path.join(c_dir, 'embedding_scores@'+str(threshold)+'@'+vec_cache_str+'.pkl') vec_rep = get_vector_representation(c_dir, redis_key = redis_key) query_scores = get_scores_embedding_c_dir(c_dir, tuple(query_vector.tolist()[0])) video_json_info = list() idces_keep = np.where(query_scores > threshold)[0] video_id = vec_rep['idces'][idces_keep] videos_that_match = np.unique(video_id) id_extract_video_level = np.where(np.isin(vec_rep['idces'], videos_that_match))[0] idces_split = np.where(np.diff(vec_rep['idces'][id_extract_video_level]) !=0)[0] + 1 subset_timestampsF = np.split(vec_rep['timestamps'][id_extract_video_level], idces_split) for idx, subset_t in enumerate(subset_timestampsF): if len(subset_t) == 0: continue min_t = min(subset_t) max_t = max(subset_t) print(idx, max_t - min_t) idces_curr = np.where(np.logical_and(vec_rep['timestamps'] > min_t , vec_rep['timestamps'] < max_t))[0] if len(idces_curr) == 0: continue unq_vids = np.unique(vec_rep['idces'][idces_curr]) subset_idx = np.where(np.isin(vec_rep['idces'],unq_vids))[0] subset_idces = vec_rep['idces'][subset_idx] subset_timestamps = vec_rep['timestamps'][subset_idx] subset_scores = query_scores[subset_idx] idx_split = np.where(np.diff(vec_rep['idces'][subset_idx]) !=0)[0]+1 split_idces = np.split(subset_idces, idx_split) split_timestamps = np.split(subset_timestamps, idx_split) split_scores = np.split(subset_scores, idx_split) split_files = [vec_rep['source_files'][x[0]] for x in split_idces] for s_file, s_scores, s_tstamps, s_idces in zip(split_files, split_scores, split_timestamps, split_idces): start_time = float(min(s_tstamps)) end_time = float(max(s_tstamps)) frame_time = (s_tstamps - start_time).tolist() embed_scores = s_scores.tolist() c_data = {'file_name': str(s_file), 'start_time':start_time, 'end_time':end_time, 'embed_scores':{'time':frame_time, 'score':embed_scores}} video_json_info.append(c_data) print(len(video_json_info)) # %% query = 'A cat and a human' c_dirs = ['/mnt/hdd_24tb_1/videos/ftp/leopards2/2025/08/26','/srv/ftp_tcc/leopards1/2025/08/27','/srv/ftp_tcc/leopards1/2025/08/28','/srv/ftp_tcc/leopards1/2025/08/29'] threshold = 0.10 folder_scores = ES.calculate_embedding_score_in_folders( tuple(c_dirs), threshold = threshold, query = query ) folder_scores['breaks'] = ES.add_breaks_between_videos(folder_scores) # %% target_tstamp = 1756332686.5805347 matching_file = None for video_file in folder_scores['videos']: start_time = video_file['start_time'] end_time = video_file['end_time'] if target_tstamp > start_time and target_tstamp < end_time: matching_file = video_file if matching_file is not None: fname = video_file['file_name'] offset = target_tstamp - start_time pelse: fname = 'None Found' offset = -1 web_name = os.path.basename(fname) # %% import embed_scores as ES result = ES.get_matching_file_for_tstamp(target_tstamp + 500, folder_scores) print(result) # %% import requests folder_scores = requests.get('http://192.168.1.242:5004/videos.json').json() print(len( # %% folder_scores = requests.get('http://192.168.1.242:5004/videos.json', params={'threshold':0.09}).json() print(len(folder_scores['videos'])) # %% new_folder_scores = folder_scores.copy() import lttb min_rows = 15 factor = 0.1 for x in new_folder_scores['videos']: data = np.asarray( [x['embed_scores']['time'], x['embed_scores']['score']]) amt = max(min_rows, int(factor*data.shape[1])) if data.shape[1] > amt: sampled = lttb.downsample(data.T, amt) else: sampled = data.T time = sampled[:,0].tolist() scores = sampled[:,1].tolist() # %% import pickle cache_file_loc = '/srv/ftp_tcc/leopards1/2025/09/09/embedding_scores@0.1@de376b3b6e90315477571ef6e82e841c.pkl' c_dir = os.path.dirname(cache_file_loc) # %% with open(cache_file_loc,'rb') as f: video_json_info = pickle.load(f) files_in_cache = {os.path.splitext(os.path.basename(x['file_name']))[0] for x in video_json_info} lsd_dir = os.listdir(c_dir) files_on_disk = {x.split('.')[0] for x in lsd_dir if x.endswith('oclip_embeds.npz')} print(len(files_on_disk), len(files_in_cache)) p# %% import embed_scores as ES a_mov = '/srv/ftp_tcc/leopards1/2025/09/09/Leopards1_00_20250909045221.mp4'