This commit is contained in:
2024-02-10 21:27:11 -05:00
commit cc448ab44b
2 changed files with 191 additions and 0 deletions

92
search_me.py Normal file
View File

@@ -0,0 +1,92 @@
do_load = True
from qdrant_client import QdrantClient
import numpy as np
from bottle import route, run, template, request, debug
# %%
if do_load:
from lavis.models import load_model_and_preprocess, model_zoo
import torch
device = 'cpu'
model, vis_processors, txt_processors = load_model_and_preprocess("clip_feature_extractor", model_type="ViT-B-16", is_eval=True, device=device)
model.eval()
collection_name="nuggets_clip"
# %%
client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True)
# %%
from bottle import route, run, template
@route('/get_text_match')
def get_matches():
query = request.query.get('query','A large bird eating corn')
# averaged = request.query.get('averaged',False)
# %%
max_age = request.query.get('age',5);
# %%
max_age = 5
# %%
averaged = False
if isinstance(query, str):
averaged = bool(averaged)
num_videos = int(request.query.get('num_videos',5))
print(query, num_videos, averaged)
if do_load:
with torch.no_grad():
text_input = txt_processors['eval'](query)
sample = {'text_input':text_input}
vec = model.extract_features( sample)
vec_search = vec.cpu().numpy().squeeze().tolist()
else:
pass
if averaged:
col_name=collection_name+'_averaged'
else:
col_name = collection_name
if True:
for i in range(num_videos, 100, 10):
results = client.search(collection_name = col_name,
query_vector = vec_search, limit=i)
num_video_got = len(set([x.payload['filepath'] for x in results]))
if num_video_got >= num_videos:
break
else:
results = client.search(collection_name = col_name,
query_vector = vec_search, limit=1)
def linux_to_win_path(form):
form = form.replace('/srv','file://192.168.1.242/thebears/Videos/merged/')
return form
def normalize_to_merged(path):
path = path.replace('/srv/ftp','/mergedfs/ftp')
path = path.replace('/mnt/archive2/videos/ftp','/mergedfs/ftp')
return path
resul = list()
for x in results:
pload =dict( x.payload)
pload['filepath'] = normalize_to_merged(pload['filepath'])
pload['score'] = x.score
pload['winpath'] = linux_to_win_path(pload['filepath'])
resul.append(pload)
# %%
return_this = {'query':query,'num_videos':num_videos,'results':resul}
return return_this
debug(True)
run(host='0.0.0.0', port=53003)

99
test_clip_scores.py Normal file
View File

@@ -0,0 +1,99 @@
print('hello')
# %%
file_path = '/srv/ftp/sidefeeder/2024/01/26/sidefeeder_01_20240126103234.mp4'
from score_videos_embedding import *
# %%
score_video(file_path)
# %%
file_path = os.path.normpath(file_path)
nv_vid = nv_video(file_path, 32, frame_interval=10)
vec_path = get_vec_path(nv_vid.vid_path)
transforms = vis_processors["eval"].transform.transforms
all_results = list()
if True:
tensor_np, frame_num = nv_vid.get_next_batch()
tensor = tensor_np.permute([0, 3, 1, 2]).to(torch.float32) / 255
tensor_pre1 = transforms[0](tensor)
tensor_pre2 = transforms[1](tensor_pre1)
tensor_mean = transforms[4](tensor_pre2)
sample = {"image": tensor_mean}
with torch.no_grad():
imp = model.extract_features(sample)
vector_to_store = imp.cpu().numpy()
all_results.append([vector_to_store, frame_num])
with torch.no_grad():
imp_text = model.extract_features(sample_text)
# %%
fwe = torch.nn.functional.cosine_similarity(imp, imp_text[3,:])
# %%
L = imp_text[-1].cpu().numpy().tolist()
client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True)
# %%
results = client.search(collection_name = collection_name,
query_vector = L, limit=1, with_vectors=True)
results[0].payload['filepath'] == file_path
vec_s = imp[frame_num.index(389)]
vpath = get_vec_path(file_path)
# %%
import numpy as np
out = np.load(vpath)
out['embeds']
idx_look = out['frame_numbers'].tolist().index(389)
# %%
out = score_video(file_path)
print(np.average(np.abs(out[0][0] - imp[0].detach().cpu().numpy())))
# %%
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True)
# %%
L = np.random.random(512).tolist()
# %%
from qdrant_client.models import Filter, FieldCondition, Range, MatchText
from datetime import datetime, timedelta
collection_name = 'nuggets_clip'
query = 'hello'
max_age = 1
max_date = datetime.now()
min_date = max_date - timedelta(days=max_age)
days_step = (max_date- min_date).days
date_arrays = list()
for i in range(days_step):
date_arrays.append(min_date + timedelta(days=i))
string_filter = list()
for cand_date in date_arrays:
string_filter.append(cand_date.strftime('_%Y%m%d'))
should_list = list()
for str_filt in string_filter:
ccond = FieldCondition(key='filepath',match=MatchText(text=str_filt))
should_list.append(ccond)
# %%
must = [FieldCondition(key="filepath",match=MatchText(text="sidefeeder")),
FieldCondition(key="filepath",match=MatchText(text="_20240207"))
]
results = client.search(collection_name = collection_name, query_vector = L, limit=10,
query_filter = Filter(should=should_list))