76 lines
1.9 KiB
Python
76 lines
1.9 KiB
Python
from pymilvus import MilvusClient, DataType
|
|
import numpy as np
|
|
import time
|
|
from pymilvus.client.types import LoadState
|
|
client = MilvusClient(
|
|
uri="http://localhost:19530"
|
|
)
|
|
|
|
|
|
res = client.get_load_state(
|
|
collection_name="nuggets_so400m"
|
|
)
|
|
if res['state'] == LoadState.Loaded:
|
|
pass
|
|
else:
|
|
client.load_collection(collection_name = 'nuggets_so400m')
|
|
for i in range(10):
|
|
time.sleep(1)
|
|
if res['state'] == LoadState.Loaded:
|
|
break
|
|
|
|
|
|
def get_vec_path(vpath):
|
|
return os.path.splitext(vpath)[0]+'.oclip_embeds.npz'
|
|
|
|
def get_db_embed_done_path(vpath):
|
|
return os.path.splitext(vpath)[0]+'.db_has_oclip_embeds'
|
|
|
|
|
|
def upload_vector_file(vector_file_to_upload):
|
|
if os.path.exists(get_embed_done_path(vector_file_to_upload)):
|
|
print('Already exists in DB, skipping upload')
|
|
return
|
|
|
|
vector_file_to_upload = get_vec_path(vector_file_to_upload)
|
|
vf = np.load(vector_file_to_upload)
|
|
|
|
embeds = vf['embeds']
|
|
fr_nums = vf['frame_numbers']
|
|
|
|
fname_root = vector_file_to_upload.rsplit('/',1)[-1].split('.')[0]
|
|
fc = fname_root.split('_')[-1]
|
|
|
|
data = list()
|
|
filepath = vector_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0]
|
|
|
|
for embed, frame_num in zip(embeds, fr_nums):
|
|
fg = '{0:05g}'.format(frame_num)
|
|
id_num = int(fc+fg)
|
|
to_put = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embed)
|
|
data.append(to_put)
|
|
|
|
client.insert(collection_name = 'nuggets_so400m', data = data)
|
|
print(f'Inserting into DB, {vector_file_to_upload}')
|
|
|
|
with open(get_embed_done_path(vector_file_to_upload),'w') as ff:
|
|
ff.write(str(time.time()))
|
|
|
|
|
|
|
|
|
|
|
|
root_path = '/srv/ftp/railing/2024'
|
|
to_put = list()
|
|
for root, dirs, files in os.walk(root_path):
|
|
for x in files:
|
|
if x.endswith('oclip_embeds.npz'):
|
|
to_put.append(os.path.join(root, x))
|
|
|
|
|
|
for x in to_put:
|
|
upload_vector_file(x)
|
|
|
|
|
|
|