commit cc448ab44babcaf0053dc0d64ca70d5c92f94423 Author: Ishan S. Patel Date: Sat Feb 10 21:27:11 2024 -0500 YACWC diff --git a/search_me.py b/search_me.py new file mode 100644 index 0000000..90ced97 --- /dev/null +++ b/search_me.py @@ -0,0 +1,92 @@ +do_load = True +from qdrant_client import QdrantClient +import numpy as np +from bottle import route, run, template, request, debug +# %% +if do_load: + from lavis.models import load_model_and_preprocess, model_zoo + import torch + device = 'cpu' + model, vis_processors, txt_processors = load_model_and_preprocess("clip_feature_extractor", model_type="ViT-B-16", is_eval=True, device=device) + model.eval() + collection_name="nuggets_clip" + +# %% +client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True) +# %% +from bottle import route, run, template + +@route('/get_text_match') +def get_matches(): + query = request.query.get('query','A large bird eating corn') +# averaged = request.query.get('averaged',False) +# %% + max_age = request.query.get('age',5); +# %% + max_age = 5 + + + +# %% + averaged = False + if isinstance(query, str): + averaged = bool(averaged) + + num_videos = int(request.query.get('num_videos',5)) + print(query, num_videos, averaged) + + if do_load: + with torch.no_grad(): + text_input = txt_processors['eval'](query) + sample = {'text_input':text_input} + vec = model.extract_features( sample) + vec_search = vec.cpu().numpy().squeeze().tolist() + else: + pass + + + + if averaged: + col_name=collection_name+'_averaged' + else: + col_name = collection_name + + if True: + for i in range(num_videos, 100, 10): + results = client.search(collection_name = col_name, + query_vector = vec_search, limit=i) + num_video_got = len(set([x.payload['filepath'] for x in results])) + if num_video_got >= num_videos: + break + else: + results = client.search(collection_name = col_name, + query_vector = vec_search, limit=1) + + + + def linux_to_win_path(form): + form = form.replace('/srv','file://192.168.1.242/thebears/Videos/merged/') + return form + + def normalize_to_merged(path): + path = path.replace('/srv/ftp','/mergedfs/ftp') + path = path.replace('/mnt/archive2/videos/ftp','/mergedfs/ftp') + return path + + resul = list() + for x in results: + pload =dict( x.payload) + pload['filepath'] = normalize_to_merged(pload['filepath']) + pload['score'] = x.score + pload['winpath'] = linux_to_win_path(pload['filepath']) + resul.append(pload) + +# %% + return_this = {'query':query,'num_videos':num_videos,'results':resul} + return return_this + + + + +debug(True) +run(host='0.0.0.0', port=53003) diff --git a/test_clip_scores.py b/test_clip_scores.py new file mode 100644 index 0000000..29e31fa --- /dev/null +++ b/test_clip_scores.py @@ -0,0 +1,99 @@ +print('hello') + +# %% +file_path = '/srv/ftp/sidefeeder/2024/01/26/sidefeeder_01_20240126103234.mp4' + +from score_videos_embedding import * + +# %% +score_video(file_path) +# %% +file_path = os.path.normpath(file_path) +nv_vid = nv_video(file_path, 32, frame_interval=10) +vec_path = get_vec_path(nv_vid.vid_path) +transforms = vis_processors["eval"].transform.transforms +all_results = list() +if True: + tensor_np, frame_num = nv_vid.get_next_batch() + tensor = tensor_np.permute([0, 3, 1, 2]).to(torch.float32) / 255 + tensor_pre1 = transforms[0](tensor) + tensor_pre2 = transforms[1](tensor_pre1) + tensor_mean = transforms[4](tensor_pre2) + +sample = {"image": tensor_mean} +with torch.no_grad(): + imp = model.extract_features(sample) + vector_to_store = imp.cpu().numpy() + +all_results.append([vector_to_store, frame_num]) + +with torch.no_grad(): + imp_text = model.extract_features(sample_text) +# %% + +fwe = torch.nn.functional.cosine_similarity(imp, imp_text[3,:]) +# %% + +L = imp_text[-1].cpu().numpy().tolist() +client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True) +# %% +results = client.search(collection_name = collection_name, + query_vector = L, limit=1, with_vectors=True) + + +results[0].payload['filepath'] == file_path + +vec_s = imp[frame_num.index(389)] +vpath = get_vec_path(file_path) +# %% +import numpy as np +out = np.load(vpath) + +out['embeds'] +idx_look = out['frame_numbers'].tolist().index(389) +# %% +out = score_video(file_path) +print(np.average(np.abs(out[0][0] - imp[0].detach().cpu().numpy()))) +# %% +from qdrant_client import QdrantClient +client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True) +# %% + +L = np.random.random(512).tolist() +# %% +from qdrant_client.models import Filter, FieldCondition, Range, MatchText +from datetime import datetime, timedelta + + + + +collection_name = 'nuggets_clip' + +query = 'hello' +max_age = 1 +max_date = datetime.now() +min_date = max_date - timedelta(days=max_age) + + +days_step = (max_date- min_date).days +date_arrays = list() +for i in range(days_step): + date_arrays.append(min_date + timedelta(days=i)) + +string_filter = list() +for cand_date in date_arrays: + string_filter.append(cand_date.strftime('_%Y%m%d')) + +should_list = list() +for str_filt in string_filter: + ccond = FieldCondition(key='filepath',match=MatchText(text=str_filt)) + should_list.append(ccond) +# %% +must = [FieldCondition(key="filepath",match=MatchText(text="sidefeeder")), + FieldCondition(key="filepath",match=MatchText(text="_20240207")) + ] + +results = client.search(collection_name = collection_name, query_vector = L, limit=10, + query_filter = Filter(should=should_list)) + +