import time from datetime import datetime import cv2 import numpy import numpy as np import onnxruntime as rt import open_clip import pycuda.autoinit import pycuda.driver as cuda import tensorrt as trt import torch from cuda import cuda as ccuda from cuda import cudart cmd = "filesrc location=/home/thebears/local/source/short.mp4 ! qtdemux name=demux demux.video_0 ! queue ! h265parse ! nvv4l2decoder ! nvvidconv ! videoscale method=1 add-borders=false ! video/x-raw,width=1280,height=1280 ! appsink sync=false" cap = cv2.VideoCapture(cmd, cv2.CAP_GSTREAMER) st = time.time() fr = 0 arrays_to_score = list() imgs = list() array = list() while True: good, frf = cap.read() fr += 1 print(good, fr) if not good: break array.append(frf) imgs.append(frf) if len(array) > 8: arrays_to_score.append(torch.from_numpy(np.asarray(array))) array = list() if len(array) > 0: arrays_to_score.append(torch.from_numpy(np.asarray(array))) et = time.time() print(et - st, fr / (st - et)) # %% from datetime import datetime pretrained_name = "webli" #model_name = "ViT-L-16-SigLIP2-512" model_name = 'ViT-SO400M-16-SigLIP2-512' rt_dir ='/home/thebears/local/source/models/' os.makedirs(rt_dir, exist_ok=True) fname = model_name.replace('-','_').lower() + '_'+datetime.now().strftime('%Y%m%d') ONNX_FILE_PATH=os.path.join(rt_dir, fname + '.onnx') ENGINE_FILE_PATH = os.path.splitext(ONNX_FILE_PATH)[0]+'.engine' # %% model, _, preprocess = open_clip.create_model_and_transforms( model_name, pretrained=pretrained_name ) # %% model_gpu = model.cuda() scores = list() all_means = list() with torch.no_grad(): for fr_num, img in enumerate(imgs): tensor_raw = torch.tensor(img[None,:,:,0:3]) tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255 tensor_reshaped = preprocess.transforms[0](tensor_perm) tensor_mean = preprocess.transforms[-1](tensor_reshaped) all_means.append(tensor_mean) imp = model_gpu.encode_image(tensor_mean.cuda()) print(fr_num) scores.append((fr_num, imp.detach().cpu().numpy())) # %% np.save('dump_so400m',np.concatenate([x[1] for x in scores])) # %% with torch.no_grad(): et = time.time() if True: tensor_raw = torch.concat(arrays_to_score)[0:4, :, :, 0:3] tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255 tensor_reshaped = preprocess.transforms[0](tensor_perm) tensor_mean = preprocess.transforms[-1](tensor_reshaped) else: tensor_raw = torch.concat(arrays_to_score)[0, :, :, 0:3] tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255 tensor_reshaped = preprocess.transforms[1](preprocess.transforms[0](tensor_perm)) tensor_mean = preprocess.transforms[-1](tensor_reshaped) #imp = model.encode_image(tensor_mean) imp = model_gpu.encode_image(tensor_mean.cuda()) st = time.time() print((st - et) / tensor_raw.shape[0], tensor_raw.shape[0]/(st - et) ) from_model_on_gpu = imp.detach().cpu().numpy() # %% torch.onnx.export( model.visual.cuda(), tensor_mean.cuda(), ONNX_FILE_PATH, input_names=["input"], output_names=["output"], ) X_test = tensor_mean.cpu().numpy() sess = rt.InferenceSession( ONNX_FILE_PATH, providers=rt.get_available_providers()) input_name = sess.get_inputs()[0].name pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) def norm(v): return np.divide(v.T,np.linalg.norm(v,axis=1)).T print(np.dot(norm(pred_onx), norm(from_model_on_gpu).T)) TRT_LOGGER = trt.Logger() def build_engine_from_onnx(onnx_file_path, use_fp16=True): """ Convert ONNX model to TensorRT engine with FP16 precision Args: onnx_file_path: Path to ONNX model use_fp16: Boolean to enable FP16 mode Returns: TensorRT engine """ # Logger to capture info/warning/errors TRT_LOGGER = trt.Logger(trt.Logger.WARNING) # Create builder and network builder = trt.Builder(TRT_LOGGER) network = builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) ) # Create ONNX parser and parse model parser = trt.OnnxParser(network, TRT_LOGGER) with open(onnx_file_path, "rb") as model: if not parser.parse(model.read()): for error in range(parser.num_errors): print(f"ONNX parse error: {parser.get_error(error)}") raise ValueError(f"Failed to parse ONNX file: {onnx_file_path}") # Create optimization config config = builder.create_builder_config() config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20) # 1 MiB # Enable FP16 precision if requested and if the GPU supports it if use_fp16: if builder.platform_has_fast_fp16: config.set_flag(trt.BuilderFlag.FP16) print("FP16 enabled successfully") else: print("Warning: GPU doesn't support fast FP16, using FP32 instead") # Set optimization profile for dynamic shapes (if needed) # If you have dynamic shapes, you'd need to set min/opt/max dimensions # Build and serialize engine engine = builder.build_serialized_network(network, config) return engine engine = build_engine_from_onnx(ONNX_FILE_PATH, use_fp16=True) with open(ENGINE_FILE_PATH, "wb") as f: f.write(engine) # %%