Files
deepstream/compare.py
2025-06-30 14:19:58 -04:00

47 lines
1.0 KiB
Python

import json
import numpy as np
#datum = np.load('dump.npz.npy')
datum = np.load('dump_so400m.npy')
with open('dump.json','r') as rr:
js = json.load(rr)
emb_dict = dict()
for embed in js[1]:
fr = embed['frame_number']
vec = embed['vector']
emb_dict[fr] = np.asarray(vec)
def cosine_sim(emb_in_1, emb_in_2):
emb_in_1 = emb_in_1.astype(np.float32)
emb_in_2 = emb_in_2.astype(np.float32)
emb1_norm = np.linalg.norm(emb_in_1)
emb2_norm = np.linalg.norm(emb_in_2)
dot_prod = np.dot(emb_in_1, emb_in_2)
similarity = dot_prod/(emb1_norm * emb2_norm)
return np.round(emb1_norm,5),np.round( emb2_norm,5), np.round( dot_prod,5),np.round( similarity,5)
arr_in_deepstream = np.asarray([y for _,y in emb_dict.items()])
normed = np.divide(datum.T, np.linalg.norm(datum, axis=1)).T
print('_________________________')
print(len(emb_dict))
print(len(datum))
for fr, emb in emb_dict.items():
emb1 = np.linalg.norm(emb)
emb2 = np.linalg.norm(datum[fr])
print( cosine_sim(emb, datum[fr]))