172 lines
4.6 KiB
Python
172 lines
4.6 KiB
Python
import time
|
|
|
|
import cv2
|
|
import numpy
|
|
import numpy as np
|
|
import onnxruntime as rt
|
|
import open_clip
|
|
import pycuda.autoinit
|
|
import pycuda.driver as cuda
|
|
import tensorrt as trt
|
|
import torch
|
|
import os
|
|
from cuda import cuda as ccuda
|
|
from cuda import cudart
|
|
|
|
cmd = "filesrc location=/home/thebears/local/source/full.mp4 ! qtdemux name=demux demux.video_0 ! queue ! h265parse ! nvv4l2decoder ! nvvidconv ! videoscale method=1 add-borders=false ! video/x-raw,width=1280,height=1280 ! appsink sync=false"
|
|
|
|
cap = cv2.VideoCapture(cmd, cv2.CAP_GSTREAMER)
|
|
|
|
st = time.time()
|
|
fr = 0
|
|
|
|
|
|
arrays_to_score = list()
|
|
array = list()
|
|
while True:
|
|
good, frf = cap.read()
|
|
fr += 1
|
|
print(good, fr)
|
|
if not good:
|
|
break
|
|
|
|
|
|
array.append(frf)
|
|
|
|
if len(array) > 8:
|
|
arrays_to_score.append(torch.from_numpy(np.asarray(array)))
|
|
array = list()
|
|
break
|
|
|
|
|
|
if len(array) > 0:
|
|
arrays_to_score.append(torch.from_numpy(np.asarray(array)))
|
|
|
|
|
|
et = time.time()
|
|
|
|
print(et - st, fr / (st - et))
|
|
|
|
|
|
|
|
|
|
|
|
# %%
|
|
pretrained_name = "webli"
|
|
model_name = "ViT-L-16-SigLIP2-512"
|
|
|
|
#model_name, pretrained_name = ('ViT-B-16-quickgelu', 'openai')
|
|
|
|
model, _, preprocess = open_clip.create_model_and_transforms(
|
|
model_name, pretrained=pretrained_name
|
|
)
|
|
|
|
|
|
# %%
|
|
with torch.no_grad():
|
|
et = time.time()
|
|
if True:
|
|
# tensor_raw = arrays_to_score[0][0,:,:,0:3][None,:,:,:]
|
|
# tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
|
tensor_perm = torch.rand(1,3,512,512)
|
|
tensor_reshaped = preprocess.transforms[0](tensor_perm)
|
|
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
|
else:
|
|
tensor_raw = torch.concat(arrays_to_score)[0:4, :, :, 0:3]
|
|
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
|
tensor_reshaped = preprocess.transforms[1](preprocess.transforms[0](tensor_perm))
|
|
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
|
imp = model.encode_image(tensor_mean)
|
|
st = time.time()
|
|
# print((st - et) / tensor_raw.shape[0], tensor_raw.shape[0]/(st - et) )
|
|
|
|
from_model_on_gpu = imp.cpu().numpy()
|
|
|
|
# %%
|
|
|
|
# %%
|
|
#ONNX_FILE_PATH = "/home/thebears/local/source/engine_siglip2_512.onnx"
|
|
#ONNX_FILE_PATH = "/home/thebears/local/source/engine_small.onnx"
|
|
ONNX_FILE_PATH = "in_docker.onnx"
|
|
torch.onnx.export(
|
|
model.visual,
|
|
tensor_mean,
|
|
ONNX_FILE_PATH,
|
|
input_names=["input"],
|
|
output_names=["output"],
|
|
)
|
|
# %%
|
|
X_test = tensor_mean.cpu().numpy()
|
|
sess = rt.InferenceSession(
|
|
ONNX_FILE_PATH, providers=rt.get_available_providers())
|
|
input_name = sess.get_inputs()[0].name
|
|
pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})[0]
|
|
print(pred_onx)
|
|
|
|
def norm(v):
|
|
return np.divide(v.T,np.linalg.norm(v,axis=1)).T
|
|
|
|
print(np.dot(norm(pred_onx), norm(from_model_on_gpu).T))
|
|
|
|
# %%
|
|
TRT_LOGGER = trt.Logger()
|
|
def build_engine_from_onnx(onnx_file_path, use_fp16=True):
|
|
"""
|
|
Convert ONNX model to TensorRT engine with FP16 precision
|
|
|
|
Args:
|
|
onnx_file_path: Path to ONNX model
|
|
use_fp16: Boolean to enable FP16 mode
|
|
|
|
Returns:
|
|
TensorRT engine
|
|
"""
|
|
# Logger to capture info/warning/errors
|
|
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
|
|
|
|
# Create builder and network
|
|
builder = trt.Builder(TRT_LOGGER)
|
|
network = builder.create_network(
|
|
1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
|
)
|
|
|
|
# Create ONNX parser and parse model
|
|
parser = trt.OnnxParser(network, TRT_LOGGER)
|
|
with open(onnx_file_path, "rb") as model:
|
|
if not parser.parse(model.read()):
|
|
for error in range(parser.num_errors):
|
|
print(f"ONNX parse error: {parser.get_error(error)}")
|
|
raise ValueError(f"Failed to parse ONNX file: {onnx_file_path}")
|
|
|
|
# Create optimization config
|
|
config = builder.create_builder_config()
|
|
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20) # 1 MiB
|
|
# Enable FP16 precision if requested and if the GPU supports it
|
|
if use_fp16:
|
|
if builder.platform_has_fast_fp16:
|
|
# config.set_flag(trt.BuilderFlag.FP16)
|
|
print("FP16 enabled successfully")
|
|
else:
|
|
print("Warning: GPU doesn't support fast FP16, using FP32 instead")
|
|
|
|
# Set optimization profile for dynamic shapes (if needed)
|
|
# If you have dynamic shapes, you'd need to set min/opt/max dimensions
|
|
|
|
# Build and serialize engine
|
|
engine = builder.build_serialized_network(network, config)
|
|
|
|
return engine
|
|
|
|
|
|
|
|
ENGINE_FILE_PATH = os.path.splitext(ONNX_FILE_PATH)[0]+'.trt'
|
|
|
|
|
|
engine = build_engine_from_onnx(ONNX_FILE_PATH, use_fp16=False)
|
|
|
|
with open(ENGINE_FILE_PATH, "wb") as f:
|
|
f.write(engine)
|
|
|
|
|
|
# %%
|