YACWC
This commit is contained in:
75
milvus_migrate/upload_from_folder.py
Normal file
75
milvus_migrate/upload_from_folder.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from pymilvus import MilvusClient, DataType
|
||||
import numpy as np
|
||||
import time
|
||||
from pymilvus.client.types import LoadState
|
||||
client = MilvusClient(
|
||||
uri="http://localhost:19530"
|
||||
)
|
||||
|
||||
|
||||
res = client.get_load_state(
|
||||
collection_name="nuggets_so400m"
|
||||
)
|
||||
if res['state'] == LoadState.Loaded:
|
||||
pass
|
||||
else:
|
||||
client.load_collection(collection_name = 'nuggets_so400m')
|
||||
for i in range(10):
|
||||
time.sleep(1)
|
||||
if res['state'] == LoadState.Loaded:
|
||||
break
|
||||
|
||||
|
||||
def get_vec_path(vpath):
|
||||
return os.path.splitext(vpath)[0]+'.oclip_embeds.npz'
|
||||
|
||||
def get_db_embed_done_path(vpath):
|
||||
return os.path.splitext(vpath)[0]+'.db_has_oclip_embeds'
|
||||
|
||||
|
||||
def upload_vector_file(vector_file_to_upload):
|
||||
if os.path.exists(get_embed_done_path(vector_file_to_upload)):
|
||||
print('Already exists in DB, skipping upload')
|
||||
return
|
||||
|
||||
vector_file_to_upload = get_vec_path(vector_file_to_upload)
|
||||
vf = np.load(vector_file_to_upload)
|
||||
|
||||
embeds = vf['embeds']
|
||||
fr_nums = vf['frame_numbers']
|
||||
|
||||
fname_root = vector_file_to_upload.rsplit('/',1)[-1].split('.')[0]
|
||||
fc = fname_root.split('_')[-1]
|
||||
|
||||
data = list()
|
||||
filepath = vector_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0]
|
||||
|
||||
for embed, frame_num in zip(embeds, fr_nums):
|
||||
fg = '{0:05g}'.format(frame_num)
|
||||
id_num = int(fc+fg)
|
||||
to_put = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embed)
|
||||
data.append(to_put)
|
||||
|
||||
client.insert(collection_name = 'nuggets_so400m', data = data)
|
||||
print(f'Inserting into DB, {vector_file_to_upload}')
|
||||
|
||||
with open(get_embed_done_path(vector_file_to_upload),'w') as ff:
|
||||
ff.write(str(time.time()))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
root_path = '/srv/ftp/railing/2024'
|
||||
to_put = list()
|
||||
for root, dirs, files in os.walk(root_path):
|
||||
for x in files:
|
||||
if x.endswith('oclip_embeds.npz'):
|
||||
to_put.append(os.path.join(root, x))
|
||||
|
||||
|
||||
for x in to_put:
|
||||
upload_vector_file(x)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user