Files
vector_search/ngt_migrate/create_index.py
2025-04-17 15:55:54 -04:00

39 lines
833 B
Python

import ngtpy
dim = 1152
index_path = b"/mnt/ssd_nvm/ngt/openclip_so400m"
if not os.path.exists(index_path):
ngtpy.create(index_path, dim)
print(f'Created index at {index_path}')
index = ngtpy.Index(index_path)
# %%
import os
to_add_to_index = list()
for root, dirs, files in os.walk('/mergedfs/ftp'):
for x in files:
if x.endswith('.oclip_embeds.npz'):
to_add_to_index.append(os.path.join(root, x))
# %%
import numpy as np
import progressbar
# %%
total_vecs = 0
import progressbar
bar = progressbar.ProgressBar(max_value = len(to_add_to_index))
for idx, to_add in enumerate(to_add_to_index):
try:
emb_vec = np.load(to_add)['embeds']
total_vecs+= emb_vec.shape[0]
index.batch_insert(emb_vec)
index.save()
except:
pass
bar.update(idx)