YACWC

2025-04-17 15:55:54 -04:00
parent 5ca30d5e11
commit 79f9a1dbc2
12 changed files with 611 additions and 80 deletions
--- a/milvus_migrate/dashboard
+++ b/milvus_migrate/dashboard
@@ -0,0 +1,15 @@
+from pymilvus import MilvusClient, DataType
+import numpy as np
+
+client = MilvusClient(
+    uri="http://localhost:19530"
+)
+cname = 'nuggets_so400m'
+client.get_collection_stats('nuggets_so400m')
+
+
+
+vec = np.random.random(1152).astype(np.float16)
+
+
+client.search
--- a/milvus_migrate/create_collection.py
+++ b/milvus_migrate/create_collection.py
@@ -0,0 +1,56 @@
+from pymilvus import MilvusClient, DataType
+
+# 1. Set up a Milvus client
+client = MilvusClient(
+    uri="http://localhost:19530"
+)
+client.get_collection_stats('nuggets_so400m')
+# %%
+
+schema = MilvusClient.create_schema(
+    auto_id=False,
+    enable_dynamic_field=False,
+)
+schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
+schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
+schema.add_field(field_name="frame_number", datatype=DataType.INT32)
+schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1152)
+
+
+index_params = client.prepare_index_params()
+
+
+index_params.add_index(
+    field_name="primary_id",
+    index_type="STL_SORT")
+
+index_params.add_index(
+    field_name="filepath",
+    index_type="Trie")
+
+index_params.add_index(
+    field_name="so400m", 
+    index_type="IVF_FLAT",
+    metric_type="COSINE",
+    params={ "nlist": 128 })
+
+
+
+client.create_collection(
+    collection_name="nuggets_so400m",
+    schema=schema,
+    index_params=index_params
+)
+
+# %%
+res = client.get_load_state(
+    collection_name="nuggets_so400m"
+)
+
+
+res = client.load_collection(collection_name="nuggets_so400m")
+
+
+
+
+
--- a/milvus_migrate/create_collection_v2.py
+++ b/milvus_migrate/create_collection_v2.py
@@ -0,0 +1,66 @@
+from pymilvus import MilvusClient, DataType
+
+
+client = MilvusClient(
+    uri="http://localhost:19530"
+)
+for x in client.list_collections():
+    client.drop_collection(x)
+
+# %%
+
+import os
+out = os.listdir('/mergedfs/ftp/')
+
+# %%
+for cam in out:
+    schema = MilvusClient.create_schema(
+        auto_id=False,
+        nenable_dynamic_field=False,
+    )
+    schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
+    schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
+    schema.add_field(field_name="frame_number", datatype=DataType.INT32)
+    schema.add_field(field_name="date", datatype=DataType.VARCHAR, max_length=len('20241220'), is_partition_key=True)
+    schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1152)
+
+
+    index_params = client.prepare_index_params()
+
+    index_params.add_index(
+        field_name="primary_id",
+        index_type="STL_SORT")
+
+    index_params.add_index(
+        field_name="filepatph",
+        index_type="Trie")
+
+    index_params.add_index(
+        field_name="so400m", 
+        index_type="IVF_SQ8",
+        metric_type="COSINE",
+        params={'nlist':128})
+
+    index_params.add_index(
+        field_name='date',
+        index_type='Trie')
+
+    client.create_collection(
+        collection_name=f"nuggets_{cam}_so400m",
+        schema=schema,
+        index_params=index_params
+    )
+    print(cam)
+
+# %%
+res = client.get_load_state(
+    collection_name="nuggets_so400m"
+)
+
+
+res = client.load_collection(collection_name="nuggets_so400m")
+
+
+
+
+
--- a/milvus_migrate/create_collection_v3.py
+++ b/milvus_migrate/create_collection_v3.py
@@ -0,0 +1,64 @@
+from pymilvus import MilvusClient, DataType
+
+
+client = MilvusClient(
+    uri="http://localhost:19530"
+)
+#for x in client.list_collections():
+#    client.drop_collection(x)
+
+
+
+
+# %%
+
+import os
+out = os.listdir('/mergedfs/ftp/')
+
+# %%
+for cam in out:
+    schema = MilvusClient.create_schema(
+        auto_id=False,
+        nenable_dynamic_field=False,
+    )
+    schema.add_field(field_name="primary_id",datatype=DataType.INT64, is_primary=True)
+    schema.add_field(field_name="filepath", datatype=DataType.VARCHAR, max_length=128)
+    schema.add_field(field_name="frame_number", datatype=DataType.INT32)
+    schema.add_field(field_name="date", datatype=DataType.VARCHAR, max_length=len('20241220'), is_partition_key=True)
+    schema.add_field(field_name="so400m", datatype=DataType.FLOAT16_VECTOR, dim=1024)
+
+
+    index_params = client.prepare_index_params()
+
+    index_params.add_index(
+        field_name="primary_id",
+        index_type="STL_SORT")
+
+    index_params.add_index(
+        field_name="filepath",
+        index_type="Trie")
+
+    index_params.add_index(
+        field_name="so400m", 
+        index_type="IVF_SQ8",
+        metric_type="COSINE",
+        params={'nlist':128})
+
+    index_params.add_index(
+        field_name='date',
+        index_type='Trie')
+
+    client.create_collection(
+        collection_name=f"nuggets_{cam}_so400m_siglip2",
+        schema=schema,
+        index_params=index_params
+    )
+    print(cam)
+
+#res = client.get_load_state( collection_name="nuggets_ptz_so400m_siglip2")
+#res = client.load_collection(collection_name="nuggets_so400m")
+
+
+
+
+
--- a/milvus_migrate/search_try.py
+++ b/milvus_migrate/search_try.py
@@ -0,0 +1,25 @@
+from pymilvus import MilvusClient, DataType
+import numpy as np
+
+# 1. Set up a Milvus client
+client = MilvusClient(uri="http://localhost:19530")
+cname = "nuggets_so400m"
+ou = client.get_collection_stats(cname)
+
+import random
+vec = [random.random() for x in range(1152)]
+# %%
+
+from prettyprinter import cpprint
+vec = random.
+out = client.search(
+        collection_name=cname,
+        limit = 100,
+        data=[vec],
+        output_fields=["filepath", "frame_number"],
+        filter='(filepath like "%2024/09/20%") or (filepath like "%2024/09/23%")'
+    )
+
+
+
+cpprint([x['entity']['filepath'] for x in out[0]])
--- a/milvus_migrate/upload_from_folder.py
+++ b/milvus_migrate/upload_from_folder.py
@@ -0,0 +1,75 @@
+from pymilvus import MilvusClient, DataType
+import numpy as np
+import time
+from pymilvus.client.types import LoadState
+client = MilvusClient(
+    uri="http://localhost:19530"
+)
+
+
+res = client.get_load_state(
+    collection_name="nuggets_so400m"
+)
+if res['state'] == LoadState.Loaded:
+    pass
+else:
+    client.load_collection(collection_name = 'nuggets_so400m')
+    for i in range(10):
+        time.sleep(1)
+        if res['state'] == LoadState.Loaded:
+            break
+
+
+def get_vec_path(vpath):
+    return os.path.splitext(vpath)[0]+'.oclip_embeds.npz'
+
+def get_db_embed_done_path(vpath):
+    return os.path.splitext(vpath)[0]+'.db_has_oclip_embeds'
+
+
+def upload_vector_file(vector_file_to_upload):
+    if os.path.exists(get_embed_done_path(vector_file_to_upload)):
+        print('Already exists in DB, skipping upload')
+        return
+
+    vector_file_to_upload = get_vec_path(vector_file_to_upload)
+    vf = np.load(vector_file_to_upload)
+
+    embeds = vf['embeds']
+    fr_nums = vf['frame_numbers']
+
+    fname_root = vector_file_to_upload.rsplit('/',1)[-1].split('.')[0]
+    fc = fname_root.split('_')[-1]
+
+    data = list()
+    filepath = vector_file_to_upload.replace('/srv/ftp/','').replace('/mergedfs/ftp','').split('.')[-0]
+
+    for embed, frame_num in zip(embeds, fr_nums):
+        fg = '{0:05g}'.format(frame_num)
+        id_num = int(fc+fg)
+        to_put = dict(primary_id= id_num, filepath=filepath, frame_number = int(frame_num), so400m=embed)
+        data.append(to_put)
+
+    client.insert(collection_name = 'nuggets_so400m', data = data)
+    print(f'Inserting into DB, {vector_file_to_upload}')
+
+    with open(get_embed_done_path(vector_file_to_upload),'w') as ff:
+        ff.write(str(time.time()))
+
+
+
+
+
+root_path = '/srv/ftp/railing/2024'
+to_put = list()
+for root, dirs, files in os.walk(root_path):
+    for x in files:
+        if x.endswith('oclip_embeds.npz'):
+            to_put.append(os.path.join(root, x))
+
+
+for x in to_put:
+    upload_vector_file(x)
+
+
+