from pymilvus import MilvusClient, DataType import numpy as np import time from pymilvus.client.types import LoadState client = MilvusClient( uri="http://localhost:19530" ) res = client.get_load_state( collection_name="nuggets_so400m" ) if res['state'] == LoadState.Loaded: pass else: client.load_collection(collection_name = 'nuggets_so400m') for i in range(10): time.sleep(1) if res['state'] == LoadState.Loaded: break def get_vec_path(vpath): return os.path.splitext(vpath)[0]+'.oclip_embeds.npz' def get_db_embed_done_path(vpath): return os.path.splitext(vpath)[0]+'.db_has_oclip_embeds' def upload_vector_file(vector_file_to_upload): if os.path.exists(get_embed_done_path(vector_file_to_upload)): print('Already exists in DB, skipping upload') return vector_file_to_upload = get_vec_path(vector_file_to_upload) vf = np.load(vector_file_to_upload) embeds = vf['embeds'] fr_nums = vf['frame_numbers'] fname_root = vector_file_to_upload.rsplit('/',1)[-1].split('.')[0] fc = fname_root.split('_')[-1] data = list() filepath = vector_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0] for embed, frame_num in zip(embeds, fr_nums): fg = '{0:05g}'.format(frame_num) id_num = int(fc+fg) to_put = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embed) data.append(to_put) client.insert(collection_name = 'nuggets_so400m', data = data) print(f'Inserting into DB, {vector_file_to_upload}') with open(get_embed_done_path(vector_file_to_upload),'w') as ff: ff.write(str(time.time())) root_path = '/srv/ftp/railing/2024' to_put = list() for root, dirs, files in os.walk(root_path): for x in files: if x.endswith('oclip_embeds.npz'): to_put.append(os.path.join(root, x)) for x in to_put: upload_vector_file(x)