This commit is contained in:
2025-04-17 15:55:54 -04:00
parent 5ca30d5e11
commit 79f9a1dbc2
12 changed files with 611 additions and 80 deletions

View File

@@ -0,0 +1,15 @@
from pymilvus import MilvusClient, DataType
import numpy as np
client = MilvusClient(
uri="http://localhost:19530"
)
cname = 'nuggets_so400m'
client.get_collection_stats('nuggets_so400m')
vec = np.random.random(1152).astype(np.float16)
client.search

View File

@@ -0,0 +1,56 @@
from pymilvus import MilvusClient, DataType
# 1. Set up a Milvus client
client = MilvusClient(
uri="http://localhost:19530"
)
client.get_collection_stats('nuggets_so400m')
# %%
schema = MilvusClient.create_schema(
auto_id=False,
enable_dynamic_field=False,
)
schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
schema.add_field(field_name="frame_number", datatype=DataType.INT32)
schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1152)
index_params = client.prepare_index_params()
index_params.add_index(
field_name="primary_id",
index_type="STL_SORT")
index_params.add_index(
field_name="filepath",
index_type="Trie")
index_params.add_index(
field_name="so400m",
index_type="IVF_FLAT",
metric_type="COSINE",
params={ "nlist": 128 })
client.create_collection(
collection_name="nuggets_so400m",
schema=schema,
index_params=index_params
)
# %%
res = client.get_load_state(
collection_name="nuggets_so400m"
)
res = client.load_collection(collection_name="nuggets_so400m")

View File

@@ -0,0 +1,66 @@
from pymilvus import MilvusClient, DataType
client = MilvusClient(
uri="http://localhost:19530"
)
for x in client.list_collections():
client.drop_collection(x)
# %%
import os
out = os.listdir('/mergedfs/ftp/')
# %%
for cam in out:
schema = MilvusClient.create_schema(
auto_id=False,
nenable_dynamic_field=False,
)
schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
schema.add_field(field_name="frame_number", datatype=DataType.INT32)
schema.add_field(field_name="date", datatype=DataType.VARCHAR, max_length=len('20241220'), is_partition_key=True)
schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1152)
index_params = client.prepare_index_params()
index_params.add_index(
field_name="primary_id",
index_type="STL_SORT")
index_params.add_index(
field_name="filepatph",
index_type="Trie")
index_params.add_index(
field_name="so400m",
index_type="IVF_SQ8",
metric_type="COSINE",
params={'nlist':128})
index_params.add_index(
field_name='date',
index_type='Trie')
client.create_collection(
collection_name=f"nuggets_{cam}_so400m",
schema=schema,
index_params=index_params
)
print(cam)
# %%
res = client.get_load_state(
collection_name="nuggets_so400m"
)
res = client.load_collection(collection_name="nuggets_so400m")

View File

@@ -0,0 +1,64 @@
from pymilvus import MilvusClient, DataType
client = MilvusClient(
uri="http://localhost:19530"
)
#for x in client.list_collections():
# client.drop_collection(x)
# %%
import os
out = os.listdir('/mergedfs/ftp/')
# %%
for cam in out:
schema = MilvusClient.create_schema(
auto_id=False,
nenable_dynamic_field=False,
)
schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
schema.add_field(field_name="frame_number", datatype=DataType.INT32)
schema.add_field(field_name="date", datatype=DataType.VARCHAR, max_length=len('20241220'), is_partition_key=True)
schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1024)
index_params = client.prepare_index_params()
index_params.add_index(
field_name="primary_id",
index_type="STL_SORT")
index_params.add_index(
field_name="filepath",
index_type="Trie")
index_params.add_index(
field_name="so400m",
index_type="IVF_SQ8",
metric_type="COSINE",
params={'nlist':128})
index_params.add_index(
field_name='date',
index_type='Trie')
client.create_collection(
collection_name=f"nuggets_{cam}_so400m_siglip2",
schema=schema,
index_params=index_params
)
print(cam)
#res = client.get_load_state( collection_name="nuggets_ptz_so400m_siglip2")
#res = client.load_collection(collection_name="nuggets_so400m")

View File

@@ -0,0 +1,25 @@
from pymilvus import MilvusClient, DataType
import numpy as np
# 1. Set up a Milvus client
client = MilvusClient(uri="http://localhost:19530")
cname = "nuggets_so400m"
ou = client.get_collection_stats(cname)
import random
vec = [random.random() for x in range(1152)]
# %%
from prettyprinter import cpprint
vec = random.
out = client.search(
collection_name=cname,
limit = 100,
data=[vec],
output_fields=["filepath", "frame_number"],
filter='(filepath like "%2024/09/20%") or (filepath like "%2024/09/23%")'
)
cpprint([x['entity']['filepath'] for x in out[0]])

View File

@@ -0,0 +1,75 @@
from pymilvus import MilvusClient, DataType
import numpy as np
import time
from pymilvus.client.types import LoadState
client = MilvusClient(
uri="http://localhost:19530"
)
res = client.get_load_state(
collection_name="nuggets_so400m"
)
if res['state'] == LoadState.Loaded:
pass
else:
client.load_collection(collection_name = 'nuggets_so400m')
for i in range(10):
time.sleep(1)
if res['state'] == LoadState.Loaded:
break
def get_vec_path(vpath):
return os.path.splitext(vpath)[0]+'.oclip_embeds.npz'
def get_db_embed_done_path(vpath):
return os.path.splitext(vpath)[0]+'.db_has_oclip_embeds'
def upload_vector_file(vector_file_to_upload):
if os.path.exists(get_embed_done_path(vector_file_to_upload)):
print('Already exists in DB, skipping upload')
return
vector_file_to_upload = get_vec_path(vector_file_to_upload)
vf = np.load(vector_file_to_upload)
embeds = vf['embeds']
fr_nums = vf['frame_numbers']
fname_root = vector_file_to_upload.rsplit('/',1)[-1].split('.')[0]
fc = fname_root.split('_')[-1]
data = list()
filepath = vector_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0]
for embed, frame_num in zip(embeds, fr_nums):
fg = '{0:05g}'.format(frame_num)
id_num = int(fc+fg)
to_put = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embed)
data.append(to_put)
client.insert(collection_name = 'nuggets_so400m', data = data)
print(f'Inserting into DB, {vector_file_to_upload}')
with open(get_embed_done_path(vector_file_to_upload),'w') as ff:
ff.write(str(time.time()))
root_path = '/srv/ftp/railing/2024'
to_put = list()
for root, dirs, files in os.walk(root_path):
for x in files:
if x.endswith('oclip_embeds.npz'):
to_put.append(os.path.join(root, x))
for x in to_put:
upload_vector_file(x)