Files
SearchInterface/VectorService/vector_service.py
2026-03-07 11:37:37 -05:00

145 lines
4.5 KiB
Python

import uvicorn.protocols.http.httptools_impl
# orig_data_received = uvicorn.protocols.http.httptools_impl.HttpToolsProtocol.data_received
# def debug_data_received(self, data):
# try:
# print("RAW INVALID DATA RECEIVED: %r", data)
# except Exception:
# pass
# return orig_data_received(self, data)
# uvicorn.protocols.http.httptools_impl.HttpToolsProtocol.data_received = debug_data_received
from typing import Union, Optional, List
from fastapi.exceptions import RequestValidationError
from pydantic import BaseModel
from fastapi import FastAPI, Request, Depends
from CommonCode.settings import get_logger
from CommonCode.video_meta import FTPVideo
import logging
from fastapi.responses import JSONResponse, StreamingResponse
import os
import numpy as np
from fastapi import FastAPI, Request, status
import sys
import json
import time
from util import embed_scores as ES
from fastapi_server_session import SessionManager, RedisSessionInterface, Session
import redis
from datetime import timedelta
app = FastAPI()
session_manager = SessionManager(
interface=RedisSessionInterface(redis.from_url("redis://localhost"))
)
logger = get_logger(
__name__,
"/var/log/vector_search_logs/main_embed_scores",
stdout=True,
systemd=False,
level=logging.INFO,
)
r = redis.Redis(host="localhost", port=6379, db=15)
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
exc_str = f"{exc}".replace("\n", " ").replace(" ", " ")
logging.error(f"{request}: {exc_str}")
content = {"status_code": 10422, "message": exc_str, "data": None}
return JSONResponse(
content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY
)
@app.get("/video_embeddings")
def get_video_embeddings(video_path: str):
llvec = ES.get_embeddings_for_a_file(video_path)
if llvec is not None:
llvec = dict(llvec)
llvec["embeds"] = llvec["embeds"].tolist()
llvec["frame_numbers"] = llvec["frame_numbers"].tolist()
return llvec
@app.get("/prompt_embedding")
def get_prompt_embedding(prompt: str):
return ES.get_query_vector(prompt)
@app.get("/match_scores")
def get_clip_scores(video_path: str, prompt: str):
results = ES.get_clip_scores(video_path, prompt)
return results.T.tolist()
class VideosPostRequest(BaseModel):
query: str = "A cat and a human"
threshold: float = 0.10
c_dirs: Optional[List[str]] = None
task_id: str = "compute_log"
@app.post("/videos.json")
async def videos_json(
vpr: VideosPostRequest,
session: Session = Depends(session_manager.use_session),
):
query = vpr.query
threshold = vpr.threshold
c_dirs = vpr.c_dirs
task_id = vpr.task_id
if c_dirs is None:
c_dirs = [
# "/mnt/hdd_24tb_1/videos/ftp/leopards2/2025/08/26",
# "/srv/ftp_tcc/leopards1/2025/08/27",
# "/srv/ftp_tcc/leopards1/2025/08/28",
# "/srv/ftp_tcc/leopards1/2025/08/29",
# "/srv/ftp_tcc/leopards1/2025/08/30",
# "/srv/ftp_tcc/leopards1/2025/08/31",
# "/srv/ftp_tcc/leopards1/2025/09/01",
# "/srv/ftp_tcc/leopards1/2025/09/02",
# "/srv/ftp_tcc/leopards1/2025/09/03",
# "/srv/ftp_tcc/leopards1/2025/09/04",
# "/srv/ftp_tcc/leopards1/2025/09/05",
# "/srv/ftp_tcc/leopards1/2025/09/06",
# "/srv/ftp_tcc/leopards1/2025/09/07",
"/srv/ftp_tcc/leopards1/2025/09/08",
"/srv/ftp_tcc/leopards1/2025/09/09",
"/srv/ftp_tcc/leopards1/2025/09/10",
"/srv/ftp_tcc/leopards1/2025/09/11",
]
# print(','.join([str(x) for x in c_dirs]))
# message = {'task':'SCHEDULED','when':[str(x) for x in c_dirs], 'time':time.time()}
# r.rpush(task_id, json.dumps(message))?
for x in c_dirs:
message = {"task": "QUEUEING", "when": str(x), "time": time.time()}
r.rpush(task_id, json.dumps(message))
folder_scores = ES.calculate_embedding_score_in_folders(
tuple(c_dirs), threshold=threshold, query=query, redis_key=task_id
)
# if p_hits != ES.calculate_embedding_score_in_folders.cache_info().hits:
# logger.info("FROM CACHE")
# else:pp
# logger.info("COMPUTED FROM SCRATCH")
folder_scores["breaks"] = ES.add_breaks_between_videos(folder_scores)
folder_scores["videos"] = ES.collapse_scores_to_maxmin_avg(folder_scores)
return folder_scores
class ClickEvent(BaseModel):
timestamp: float
class ClickResponse(BaseModel):
path: str
timeoffset: float