import pickle cache_files = ['/mnt/hdd_24tb_1/videos/ftp/leopards1/2025/09/12/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/mnt/hdd_24tb_1/videos/ftp/leopards1/2025/09/13/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/14/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/15/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/16/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/17/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/18/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl', '/srv/ftp_tcc/leopards1/2025/09/19/embedding_scores@0.0@926895f71538e3683e9af0956af94cf4.pkl'] import time from datetime import timedelta, datetime start_time = time.time() all_c = list() start_time = 1757892175.042 end_time = 1757894197.548 def check_if_overlap(start_1, end_1, start_2, end_2): ff = sorted([[start_1, end_1],[start_2, end_2]],key=lambda x: x[0]) return ff[0][1] > ff[1][0] def get_cache_data(start_time, end_time, cache_files): targvals = [start_time, end_time] for f in cache_files: fold_start_time = datetime(*[int(x) for x in f.split('/')[-4:-1]]).timestamp() fold_end_time = fold_start_time + 86400.0 has_overlap = check_if_overlap( start_time, end_time, fold_start_time, fold_end_time) if not has_overlap: continue print(f'Loading {f}') with open(f,'rb') as ff: all_c.append(pickle.load(ff)) return all_c st = time.time() all_cach = get_cache_data(start_time, end_time, cache_files) vids = list() for c_c in all_cach: vids.extend(c_c['videos']) data_filt = list() for v in vids: if check_if_overlap( v['start_time'], v['end_time'], start_time, end_time): data_filt.append(v) time_vec = np.hstack([ np.asarray(f['embed_scores']['time'])+f['start_time'] for f in data_filt]) score_vec = np.hstack([f['embed_scores']['score'] for f in data_filt]) s_time, s_ind = np.unique(time_vec, return_index=True) s_score = score_vec[s_ind] out_array = np.asarray([s_time, s_score]).T.tolist()