183 lines
5.3 KiB
Python
183 lines
5.3 KiB
Python
# %%
|
|
import sys, os
|
|
sys.path.append("/home/thebears/Web/Nuggets/SearchInterface/SearchUtil")
|
|
sys.path.append("/home/thebears/Web/Nuggets/SearchInterface/VectorService/util")
|
|
import embed_scores as ES
|
|
# %%
|
|
query = 'Cat and human'
|
|
c_dir = '/srv/ftp_tcc/leopards1/2025/09/08'
|
|
threshold=0.10
|
|
|
|
results = ES.calculate_embedding_score_in_folder(c_dir, threshold, query)
|
|
print(len(results['videos']))
|
|
|
|
# %%
|
|
c_dir = '/srv/ftp_tcc/leopards1/2025/09/08'
|
|
query_vector = None
|
|
og_dir = c_dir
|
|
|
|
if query_vector is None:
|
|
query_vector = ES.get_query_vector(query)
|
|
|
|
candidate_dirs = list()
|
|
candidate_dirs.append(og_dir)
|
|
candidate_dirs.append(og_dir.replace('/srv/ftp_tcc','/mnt/hdd_24tb_1/videos/ftp'))
|
|
candidate_dirs.append(og_dir.replace('/srv/ftp','/mnt/hdd_24tb_1/videos/ftp'))
|
|
|
|
c_dir = None
|
|
for candidate in candidate_dirs:
|
|
if os.path.exists(candidate):
|
|
c_dir = candidate
|
|
break
|
|
if c_dir is None:
|
|
# return []
|
|
pass
|
|
from embed_scores import *
|
|
redis_key = 'helllo'
|
|
vec_cache_str = md5(query_vector).hexdigest()
|
|
cache_file_loc = os.path.join(c_dir, 'embedding_scores@'+str(threshold)+'@'+vec_cache_str+'.pkl')
|
|
|
|
|
|
|
|
vec_rep = get_vector_representation(c_dir, redis_key = redis_key)
|
|
query_scores = get_scores_embedding_c_dir(c_dir, tuple(query_vector.tolist()[0]))
|
|
|
|
video_json_info = list()
|
|
idces_keep = np.where(query_scores > threshold)[0]
|
|
|
|
video_id = vec_rep['idces'][idces_keep]
|
|
videos_that_match = np.unique(video_id)
|
|
|
|
id_extract_video_level = np.where(np.isin(vec_rep['idces'], videos_that_match))[0]
|
|
|
|
idces_split = np.where(np.diff(vec_rep['idces'][id_extract_video_level]) !=0)[0] + 1
|
|
subset_timestampsF = np.split(vec_rep['timestamps'][id_extract_video_level], idces_split)
|
|
|
|
|
|
|
|
for idx, subset_t in enumerate(subset_timestampsF):
|
|
if len(subset_t) == 0:
|
|
continue
|
|
|
|
min_t = min(subset_t)
|
|
max_t = max(subset_t)
|
|
print(idx, max_t - min_t)
|
|
idces_curr = np.where(np.logical_and(vec_rep['timestamps'] > min_t , vec_rep['timestamps'] < max_t))[0]
|
|
if len(idces_curr) == 0:
|
|
continue
|
|
|
|
unq_vids = np.unique(vec_rep['idces'][idces_curr])
|
|
subset_idx = np.where(np.isin(vec_rep['idces'],unq_vids))[0]
|
|
|
|
subset_idces = vec_rep['idces'][subset_idx]
|
|
subset_timestamps = vec_rep['timestamps'][subset_idx]
|
|
subset_scores = query_scores[subset_idx]
|
|
idx_split = np.where(np.diff(vec_rep['idces'][subset_idx]) !=0)[0]+1
|
|
|
|
split_idces = np.split(subset_idces, idx_split)
|
|
split_timestamps = np.split(subset_timestamps, idx_split)
|
|
split_scores = np.split(subset_scores, idx_split)
|
|
split_files = [vec_rep['source_files'][x[0]] for x in split_idces]
|
|
|
|
for s_file, s_scores, s_tstamps, s_idces in zip(split_files, split_scores, split_timestamps, split_idces):
|
|
start_time = float(min(s_tstamps))
|
|
end_time = float(max(s_tstamps))
|
|
|
|
frame_time = (s_tstamps - start_time).tolist()
|
|
embed_scores = s_scores.tolist()
|
|
|
|
c_data = {'file_name': str(s_file), 'start_time':start_time, 'end_time':end_time, 'embed_scores':{'time':frame_time, 'score':embed_scores}}
|
|
video_json_info.append(c_data)
|
|
|
|
|
|
print(len(video_json_info))
|
|
|
|
# %%
|
|
query = 'A cat and a human'
|
|
c_dirs = ['/mnt/hdd_24tb_1/videos/ftp/leopards2/2025/08/26','/srv/ftp_tcc/leopards1/2025/08/27','/srv/ftp_tcc/leopards1/2025/08/28','/srv/ftp_tcc/leopards1/2025/08/29']
|
|
|
|
threshold = 0.10
|
|
folder_scores = ES.calculate_embedding_score_in_folders( tuple(c_dirs), threshold = threshold, query = query )
|
|
folder_scores['breaks'] = ES.add_breaks_between_videos(folder_scores)
|
|
# %%
|
|
target_tstamp = 1756332686.5805347
|
|
|
|
|
|
matching_file = None
|
|
for video_file in folder_scores['videos']:
|
|
start_time = video_file['start_time']
|
|
end_time = video_file['end_time']
|
|
|
|
if target_tstamp > start_time and target_tstamp < end_time:
|
|
matching_file = video_file
|
|
|
|
if matching_file is not None:
|
|
fname = video_file['file_name']
|
|
offset = target_tstamp - start_time
|
|
pelse:
|
|
fname = 'None Found'
|
|
offset = -1
|
|
|
|
web_name = os.path.basename(fname)
|
|
# %%
|
|
|
|
|
|
|
|
|
|
|
|
import embed_scores as ES
|
|
|
|
result = ES.get_matching_file_for_tstamp(target_tstamp + 500, folder_scores)
|
|
print(result)
|
|
# %%
|
|
|
|
import requests
|
|
folder_scores = requests.get('http://192.168.1.242:5004/videos.json').json()
|
|
print(len(
|
|
|
|
# %%
|
|
folder_scores = requests.get('http://192.168.1.242:5004/videos.json', params={'threshold':0.09}).json()
|
|
print(len(folder_scores['videos']))
|
|
# %%
|
|
|
|
new_folder_scores = folder_scores.copy()
|
|
import lttb
|
|
min_rows = 15
|
|
factor = 0.1
|
|
for x in new_folder_scores['videos']:
|
|
data = np.asarray( [x['embed_scores']['time'], x['embed_scores']['score']])
|
|
amt = max(min_rows, int(factor*data.shape[1]))
|
|
|
|
if data.shape[1] > amt:
|
|
sampled = lttb.downsample(data.T, amt)
|
|
else:
|
|
sampled = data.T
|
|
|
|
time = sampled[:,0].tolist()
|
|
scores = sampled[:,1].tolist()
|
|
|
|
|
|
# %%
|
|
|
|
import pickle
|
|
cache_file_loc = '/srv/ftp_tcc/leopards1/2025/09/09/embedding_scores@0.1@de376b3b6e90315477571ef6e82e841c.pkl'
|
|
c_dir = os.path.dirname(cache_file_loc)
|
|
|
|
|
|
|
|
# %%
|
|
with open(cache_file_loc,'rb') as f:
|
|
video_json_info = pickle.load(f)
|
|
|
|
|
|
files_in_cache = {os.path.splitext(os.path.basename(x['file_name']))[0] for x in video_json_info}
|
|
lsd_dir = os.listdir(c_dir)
|
|
files_on_disk = {x.split('.')[0] for x in lsd_dir if x.endswith('oclip_embeds.npz')}
|
|
print(len(files_on_disk), len(files_in_cache))
|
|
|
|
|
|
|
|
p# %%
|
|
import embed_scores as ES
|
|
a_mov = '/srv/ftp_tcc/leopards1/2025/09/09/Leopards1_00_20250909045221.mp4'
|