YACWC
This commit is contained in:
38
ngt_migrate/create_index.py
Normal file
38
ngt_migrate/create_index.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import ngtpy
|
||||
dim = 1152
|
||||
index_path = b"/mnt/ssd_nvm/ngt/openclip_so400m"
|
||||
if not os.path.exists(index_path):
|
||||
ngtpy.create(index_path, dim)
|
||||
print(f'Created index at {index_path}')
|
||||
|
||||
index = ngtpy.Index(index_path)
|
||||
# %%
|
||||
|
||||
import os
|
||||
to_add_to_index = list()
|
||||
for root, dirs, files in os.walk('/mergedfs/ftp'):
|
||||
for x in files:
|
||||
if x.endswith('.oclip_embeds.npz'):
|
||||
to_add_to_index.append(os.path.join(root, x))
|
||||
|
||||
# %%
|
||||
import numpy as np
|
||||
import progressbar
|
||||
# %%
|
||||
total_vecs = 0
|
||||
import progressbar
|
||||
bar = progressbar.ProgressBar(max_value = len(to_add_to_index))
|
||||
for idx, to_add in enumerate(to_add_to_index):
|
||||
try:
|
||||
emb_vec = np.load(to_add)['embeds']
|
||||
total_vecs+= emb_vec.shape[0]
|
||||
index.batch_insert(emb_vec)
|
||||
index.save()
|
||||
except:
|
||||
pass
|
||||
|
||||
bar.update(idx)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user