39 lines
833 B
Python
39 lines
833 B
Python
import ngtpy
|
|
dim = 1152
|
|
index_path = b"/mnt/ssd_nvm/ngt/openclip_so400m"
|
|
if not os.path.exists(index_path):
|
|
ngtpy.create(index_path, dim)
|
|
print(f'Created index at {index_path}')
|
|
|
|
index = ngtpy.Index(index_path)
|
|
# %%
|
|
|
|
import os
|
|
to_add_to_index = list()
|
|
for root, dirs, files in os.walk('/mergedfs/ftp'):
|
|
for x in files:
|
|
if x.endswith('.oclip_embeds.npz'):
|
|
to_add_to_index.append(os.path.join(root, x))
|
|
|
|
# %%
|
|
import numpy as np
|
|
import progressbar
|
|
# %%
|
|
total_vecs = 0
|
|
import progressbar
|
|
bar = progressbar.ProgressBar(max_value = len(to_add_to_index))
|
|
for idx, to_add in enumerate(to_add_to_index):
|
|
try:
|
|
emb_vec = np.load(to_add)['embeds']
|
|
total_vecs+= emb_vec.shape[0]
|
|
index.batch_insert(emb_vec)
|
|
index.save()
|
|
except:
|
|
pass
|
|
|
|
bar.update(idx)
|
|
|
|
|
|
|
|
|