do_load = True from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, Range, MatchText from datetime import datetime, timedelta import numpy as np import traceback from bottle import route, run, template, request, debug import open_clip import torch # %% if do_load: model_name = 'ViT-SO400M-14-SigLIP-384' pretrained_name = 'webli' model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained_name) device = 'cpu' model.eval() tokenizer = open_clip.get_tokenizer(model_name) TIMEOUT=2 # %% collection_name = "nuggets_so400m" client = QdrantClient(host="localhost", grpc_port=6334, prefer_grpc=True, timeout=TIMEOUT) # %% from bottle import route, run, template @route('/get_text_match') def get_matches(): valid_cameras = {'sidefeeder','ptz','railing','hummingbird','pond'} query = request.query.get('query','A large bird eating corn') cameras = request.query.get('cameras','sidefeeder') cams = set(cameras.split(',')).intersection(valid_cameras) max_age = int(request.query.get('age',5)); print({'Cameras':cams,'Max Age':max_age,'Query':query}) # %% max_date = datetime.now() min_date = max_date - timedelta(days=(max_age)) days_step = (max_date- min_date).days date_arrays = list() for i in range(days_step): date_arrays.append(max_date - timedelta(days=i)) # %% string_filter = list() for cand_date in date_arrays: string_filter.append(cand_date.strftime('%Y/%m/%d')) should_list = list() if max_age == 0: string_filter=[''] for str_filt in string_filter: for cam in cams: str_use = cam+'/'+str_filt print(str_use) ccond = FieldCondition(key='filepath',match=MatchText(text=str_use)) should_list.append(ccond) condition_dict = Filter(should = should_list) if len(should_list) == 0: return_this = {'query':query,'num_videos':0,'results':[]} return return_this averaged = False if isinstance(query, str): averaged = bool(averaged) num_videos = int(request.query.get('num_videos',5)) if do_load: with torch.no_grad(): text_tokenized = tokenizer(query) vec = model.encode_text(text_tokenized) # text_input = txt_processors['eval'](query) # sample = {'text_input':text_input} # vec = model.extract_features( sample) vec_search = vec.cpu().numpy().squeeze().tolist() else: sz_vec= client.get_collection(collection_name).config.params.vectors.size vec_search = np.random.random(sz_vec).tolist() if averaged: col_name=collection_name+'_averaged' else: col_name = collection_name # %% try: error = '' if True: for i in range(num_videos, 100, 10): results = client.search(collection_name = col_name, query_vector = vec_search, limit=i, query_filter=condition_dict, timeout=TIMEOUT) num_video_got = len(set([x.payload['filepath'] for x in results])) if num_video_got >= num_videos: break else: results = client.search(collection_name = col_name, query_vector = vec_search, limit=1, query_filter=condition_dict, timeout=TIMEOUT) except Exception as e: print(traceback.format_exc()) error = traceback.format_exc() results = []; def linux_to_win_path(form): form = form.replace('/srv','file://192.168.1.242/thebears/Videos/merged/') return form def normalize_to_merged(path): path = path.replace('/srv/ftp','/mergedfs/ftp') path = path.replace('/mnt/archive2/videos/ftp','/mergedfs/ftp') return path resul = list() for x in results: pload =dict( x.payload) pload['filepath'] = normalize_to_merged(pload['filepath']) pload['score'] = x.score pload['winpath'] = linux_to_win_path(pload['filepath']) resul.append(pload) # %% return_this = {'query':query,'num_videos':num_videos,'results':resul,'error':error} return return_this debug(True) run(host='0.0.0.0', port=53003, server='bjoern')