Compare commits
4 Commits
1d87063655
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| faff84dd4c | |||
| 74eff0a1fa | |||
| c8dbef2c0f | |||
| 21b7ccb794 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -3,4 +3,7 @@
|
|||||||
*.pt
|
*.pt
|
||||||
*.onnx
|
*.onnx
|
||||||
*.trt
|
*.trt
|
||||||
|
*.whl
|
||||||
|
*.npy
|
||||||
|
*.npz
|
||||||
runs/
|
runs/
|
||||||
|
|||||||
BIN
__pycache__/model_runner.cpython-310.pyc
Normal file
BIN
__pycache__/model_runner.cpython-310.pyc
Normal file
Binary file not shown.
25
compare.py
25
compare.py
@@ -1,6 +1,9 @@
|
|||||||
import numpy as np
|
|
||||||
import json
|
import json
|
||||||
datum = np.load('dump.npz.npy')
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
#datum = np.load('dump.npz.npy')
|
||||||
|
datum = np.load('dump_so400m.npy')
|
||||||
|
|
||||||
|
|
||||||
with open('dump.json','r') as rr:
|
with open('dump.json','r') as rr:
|
||||||
@@ -29,25 +32,15 @@ def cosine_sim(emb_in_1, emb_in_2):
|
|||||||
|
|
||||||
|
|
||||||
arr_in_deepstream = np.asarray([y for _,y in emb_dict.items()])
|
arr_in_deepstream = np.asarray([y for _,y in emb_dict.items()])
|
||||||
|
|
||||||
normed = np.divide(datum.T, np.linalg.norm(datum, axis=1)).T
|
normed = np.divide(datum.T, np.linalg.norm(datum, axis=1)).T
|
||||||
print('_________________________')
|
print('_________________________')
|
||||||
|
|
||||||
|
|
||||||
|
print(len(emb_dict))
|
||||||
|
print(len(datum))
|
||||||
for fr, emb in emb_dict.items():
|
for fr, emb in emb_dict.items():
|
||||||
emb1 = np.linalg.norm(emb)
|
emb1 = np.linalg.norm(emb)
|
||||||
emb2 = np.linalg.norm(datum[fr])
|
emb2 = np.linalg.norm(datum[fr])
|
||||||
# print( cosine_sim(emb, datum[fr]))
|
print( cosine_sim(emb, datum[fr]))
|
||||||
|
|
||||||
|
|
||||||
print('Deepstream and Actual norm')
|
|
||||||
print(np.max(np.dot(arr_in_deepstream, normed.T),axis=1))
|
|
||||||
|
|
||||||
print('_________________________')
|
|
||||||
for dat in datum:
|
|
||||||
# print(cosine_sim(dat, datum[0]))
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
#print(cosine_sim(datum[fr], datum[fr+1]))
|
|
||||||
|
|
||||||
#print(cosine_sim(emb_dict[fr], emb_dict[fr+1]))
|
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ def embed_results_probe(pad, info, u_data, list_add, frame_num=0):
|
|||||||
if True:
|
if True:
|
||||||
for i in range(tensor_meta.num_output_layers):
|
for i in range(tensor_meta.num_output_layers):
|
||||||
layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
|
layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
|
||||||
if layer.layerName == "embedding":
|
if layer.layerName == "output":
|
||||||
|
|
||||||
ptr = ctypes.cast(
|
ptr = ctypes.cast(
|
||||||
pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float)
|
pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float)
|
||||||
@@ -395,8 +395,8 @@ if True:
|
|||||||
streammux_embed.set_property("width", target_width_embed)
|
streammux_embed.set_property("width", target_width_embed)
|
||||||
streammux_embed.set_property("height", target_height_embed)
|
streammux_embed.set_property("height", target_height_embed)
|
||||||
streammux_embed.set_property("batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC)
|
streammux_embed.set_property("batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC)
|
||||||
streammux_embed.set_property("enable-padding", 1)
|
streammux_embed.set_property("enable-padding", 0)
|
||||||
streammux_embed.set_property("batch-size", 4)
|
streammux_embed.set_property("batch-size", 1)
|
||||||
|
|
||||||
nugget_embed = Gst.ElementFactory.make("nvinfer", "primary-inference")
|
nugget_embed = Gst.ElementFactory.make("nvinfer", "primary-inference")
|
||||||
nugget_embed.set_property(
|
nugget_embed.set_property(
|
||||||
@@ -426,7 +426,7 @@ if True:
|
|||||||
# capsfilter.link(tee)
|
# capsfilter.link(tee)
|
||||||
nvvidconv.link(tee)
|
nvvidconv.link(tee)
|
||||||
|
|
||||||
if True:
|
if False:
|
||||||
pipeline.add(queue_detect)
|
pipeline.add(queue_detect)
|
||||||
pipeline.add(streammux_detect)
|
pipeline.add(streammux_detect)
|
||||||
pipeline.add(nugget_detector)
|
pipeline.add(nugget_detector)
|
||||||
@@ -442,7 +442,7 @@ if True:
|
|||||||
os.environ["GST_DEBUG_DUMP_DOT_DIR"] = "/tmp"
|
os.environ["GST_DEBUG_DUMP_DOT_DIR"] = "/tmp"
|
||||||
os.putenv("GST_DEBUG_DUMP_DIR_DIR", "/tmp")
|
os.putenv("GST_DEBUG_DUMP_DIR_DIR", "/tmp")
|
||||||
|
|
||||||
if False:
|
if True:
|
||||||
|
|
||||||
pipeline.add(queue_embed)
|
pipeline.add(queue_embed)
|
||||||
pipeline.add(streammux_embed)
|
pipeline.add(streammux_embed)
|
||||||
@@ -522,7 +522,13 @@ if True:
|
|||||||
pass
|
pass
|
||||||
# cleanup
|
# cleanup
|
||||||
pipeline.set_state(Gst.State.NULL)
|
pipeline.set_state(Gst.State.NULL)
|
||||||
# return detector_list, embed_list
|
|
||||||
|
# return detector_list, embed_list\\
|
||||||
|
out = [detector_list, embed_list ]
|
||||||
|
import json
|
||||||
|
with open("dump.json", "w") as ff:
|
||||||
|
json.dump([out[0], out[1]], ff)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@@ -537,6 +543,3 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
with open("dump.json", "w") as ff:
|
|
||||||
json.dump([out[0], out[1]], ff)
|
|
||||||
sys.exit()
|
|
||||||
|
|||||||
336
min_repro.py
Normal file
336
min_repro.py
Normal file
@@ -0,0 +1,336 @@
|
|||||||
|
import io
|
||||||
|
|
||||||
|
import tensorrt as trt
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class AttentionUsingScaledDotProduct(nn.Module):
|
||||||
|
"""
|
||||||
|
An alternative implementation of the Attention layer using `F.scaled_dot_product_attention`, which is ~50% faster,
|
||||||
|
but doesn't compile correctly when using TensorRT v10.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dim,
|
||||||
|
num_heads=8,
|
||||||
|
qkv_bias=False,
|
||||||
|
qk_scale=None,
|
||||||
|
attn_drop=0.0,
|
||||||
|
proj_drop=0.0,
|
||||||
|
attn_head_dim=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
if attn_head_dim is not None:
|
||||||
|
head_dim = attn_head_dim
|
||||||
|
all_head_dim = head_dim * self.num_heads
|
||||||
|
self.scale = qk_scale or head_dim**-0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
|
||||||
|
if qkv_bias:
|
||||||
|
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
else:
|
||||||
|
self.q_bias = None
|
||||||
|
self.v_bias = None
|
||||||
|
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(all_head_dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv_bias = None
|
||||||
|
if self.q_bias is not None:
|
||||||
|
qkv_bias = torch.cat(
|
||||||
|
(
|
||||||
|
self.q_bias,
|
||||||
|
torch.zeros_like(self.v_bias, requires_grad=False),
|
||||||
|
self.v_bias,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
|
||||||
|
qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k, v = qkv.unbind(0)
|
||||||
|
|
||||||
|
x = F.scaled_dot_product_attention(
|
||||||
|
q,
|
||||||
|
k,
|
||||||
|
v,
|
||||||
|
dropout_p=self.attn_drop.p if self.training else 0.0,
|
||||||
|
scale=self.scale,
|
||||||
|
)
|
||||||
|
|
||||||
|
x = x.transpose(1, 2).reshape(B, N, -1)
|
||||||
|
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class ExplicitAttention(nn.Module):
|
||||||
|
"""
|
||||||
|
The explicit, original version of the Attention layer from the VideoMAEv2 codebase.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dim,
|
||||||
|
num_heads=8,
|
||||||
|
qkv_bias=False,
|
||||||
|
qk_scale=None,
|
||||||
|
attn_drop=0.0,
|
||||||
|
proj_drop=0.0,
|
||||||
|
attn_head_dim=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
if attn_head_dim is not None:
|
||||||
|
head_dim = attn_head_dim
|
||||||
|
all_head_dim = head_dim * self.num_heads
|
||||||
|
self.scale = qk_scale or head_dim**-0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
|
||||||
|
if qkv_bias:
|
||||||
|
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
else:
|
||||||
|
self.q_bias = None
|
||||||
|
self.v_bias = None
|
||||||
|
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(all_head_dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv_bias = None
|
||||||
|
if self.q_bias is not None:
|
||||||
|
qkv_bias = torch.cat(
|
||||||
|
(
|
||||||
|
self.q_bias,
|
||||||
|
torch.zeros_like(self.v_bias, requires_grad=False),
|
||||||
|
self.v_bias,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
|
||||||
|
qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k, v = qkv.unbind(0)
|
||||||
|
|
||||||
|
q = q * self.scale
|
||||||
|
attn = q @ k.transpose(-2, -1)
|
||||||
|
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
|
||||||
|
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class AttentionUsingMHAForward(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dim,
|
||||||
|
num_heads=8,
|
||||||
|
qkv_bias=False,
|
||||||
|
qk_scale=None,
|
||||||
|
attn_drop=0.0,
|
||||||
|
proj_drop=0.0,
|
||||||
|
attn_head_dim=None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
if attn_head_dim is not None:
|
||||||
|
head_dim = attn_head_dim
|
||||||
|
all_head_dim = head_dim * self.num_heads
|
||||||
|
self.scale = qk_scale or head_dim**-0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
|
||||||
|
if qkv_bias:
|
||||||
|
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
|
||||||
|
else:
|
||||||
|
self.q_bias = None
|
||||||
|
self.v_bias = None
|
||||||
|
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(all_head_dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv_bias = None
|
||||||
|
if self.q_bias is not None:
|
||||||
|
qkv_bias = torch.cat(
|
||||||
|
(
|
||||||
|
self.q_bias,
|
||||||
|
torch.zeros_like(self.v_bias, requires_grad=False),
|
||||||
|
self.v_bias,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
|
||||||
|
qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k, v = qkv.unbind(0)
|
||||||
|
|
||||||
|
# MHA expects [sequence, batch, embed_dim].
|
||||||
|
x_t = x.transpose(0, 1) # => [N, B, C]
|
||||||
|
|
||||||
|
attn_out, _ = F.multi_head_attention_forward(
|
||||||
|
x_t,
|
||||||
|
x_t,
|
||||||
|
x_t,
|
||||||
|
embed_dim_to_check=C,
|
||||||
|
num_heads=self.num_heads,
|
||||||
|
# Since use_separate_proj_weight=False (default), then according to the docs:
|
||||||
|
# "in_proj_weight will be used, which is a combination of q_proj_weight, k_proj_weight, v_proj_weight."
|
||||||
|
in_proj_weight=self.qkv.weight,
|
||||||
|
in_proj_bias=qkv_bias,
|
||||||
|
bias_k=None,
|
||||||
|
bias_v=None,
|
||||||
|
add_zero_attn=False,
|
||||||
|
dropout_p=self.attn_drop.p,
|
||||||
|
out_proj_weight=self.proj.weight,
|
||||||
|
out_proj_bias=self.proj.bias,
|
||||||
|
training=self.training,
|
||||||
|
key_padding_mask=None,
|
||||||
|
need_weights=False,
|
||||||
|
attn_mask=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Transpose back to [B, N, C].
|
||||||
|
x = attn_out.transpose(0, 1)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def onnx_to_trt(onnx_bytes: bytes) -> bytes:
|
||||||
|
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
|
||||||
|
builder = trt.Builder(TRT_LOGGER)
|
||||||
|
|
||||||
|
network = builder.create_network()
|
||||||
|
parser = trt.OnnxParser(network, TRT_LOGGER)
|
||||||
|
|
||||||
|
parser.parse(onnx_bytes)
|
||||||
|
|
||||||
|
config = builder.create_builder_config()
|
||||||
|
config.builder_optimization_level = 0
|
||||||
|
|
||||||
|
engine = builder.build_serialized_network(network, config)
|
||||||
|
|
||||||
|
return engine
|
||||||
|
|
||||||
|
|
||||||
|
def build_trt_module(model, x):
|
||||||
|
onnx_bytes = io.BytesIO()
|
||||||
|
|
||||||
|
torch.onnx.export(
|
||||||
|
model,
|
||||||
|
(x,),
|
||||||
|
onnx_bytes,
|
||||||
|
export_params=True,
|
||||||
|
opset_version=17,
|
||||||
|
do_constant_folding=True,
|
||||||
|
input_names=["x"],
|
||||||
|
output_names=["y"],
|
||||||
|
)
|
||||||
|
|
||||||
|
trt_engine = onnx_to_trt(onnx_bytes.getvalue())
|
||||||
|
return trt_engine
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#@torch.inference_mode()
|
||||||
|
#def main():
|
||||||
|
with torch.no_grad():
|
||||||
|
torch.manual_seed(0)
|
||||||
|
|
||||||
|
EMB_DIM = 384
|
||||||
|
x = torch.rand((6, 1568, EMB_DIM))
|
||||||
|
|
||||||
|
explicit_attention = ExplicitAttention(EMB_DIM)
|
||||||
|
sdpa = AttentionUsingScaledDotProduct(EMB_DIM)
|
||||||
|
mha_fwd = AttentionUsingMHAForward(EMB_DIM)
|
||||||
|
|
||||||
|
# Use the same params for all.
|
||||||
|
sdpa.load_state_dict(explicit_attention.state_dict())
|
||||||
|
mha_fwd.load_state_dict(explicit_attention.state_dict())
|
||||||
|
|
||||||
|
sdpa_torch_y = sdpa(x)
|
||||||
|
explicit_attention_torch_y = explicit_attention(x)
|
||||||
|
mha_fwd_torch_y = mha_fwd(x)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"Torch: [explicit<->sdpa] Is allclose?",
|
||||||
|
sdpa_torch_y.allclose(explicit_attention_torch_y, atol=0.0001),
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"Torch: [explicit<->mha_fwd] Is allclose?",
|
||||||
|
mha_fwd_torch_y.allclose(explicit_attention_torch_y, atol=0.0001),
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"Torch: [explicit<->sdpa] Total difference:",
|
||||||
|
(sdpa_torch_y - explicit_attention_torch_y).abs().sum(),
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"Torch: [explicit<->mha_fwd] Total difference:",
|
||||||
|
(mha_fwd_torch_y - explicit_attention_torch_y).abs().sum(),
|
||||||
|
)
|
||||||
|
assert sdpa_torch_y.allclose(explicit_attention_torch_y, atol=0.0001), "Precheck"
|
||||||
|
assert mha_fwd_torch_y.allclose(explicit_attention_torch_y, atol=0.0001), "Precheck"
|
||||||
|
# %%
|
||||||
|
|
||||||
|
explicit_attention_trt = build_trt_module(explicit_attention, x)
|
||||||
|
with open('explicit_attention_trt.trt','wb') as ea:
|
||||||
|
ea.write(explicit_attention_trt)
|
||||||
|
|
||||||
|
sdpa_trt_model = build_trt_module(sdpa, x)
|
||||||
|
with open('sdpa_trt.trt','wb') as ea:
|
||||||
|
ea.write(sdpa_trt_model)
|
||||||
|
|
||||||
|
mha_fwd_trt_model = build_trt_module(mha_fwd, x)
|
||||||
|
with open('mha_trt.trt','wb') as ea:
|
||||||
|
ea.write(mha_fwd_trt_model)
|
||||||
|
# %%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
explicit_attention_y = explicit_attention_trt(x.cuda())
|
||||||
|
sdpa_y = sdpa_trt_model(x.cuda())
|
||||||
|
mha_fwd_y = mha_fwd_trt_model(x.cuda())
|
||||||
|
|
||||||
|
print(
|
||||||
|
"TRT: [explicit<->sdpa] Is allclose?",
|
||||||
|
sdpa_y.allclose(explicit_attention_y, atol=0.0001),
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"TRT: [explicit<->sdpa] Total difference:",
|
||||||
|
(sdpa_y - explicit_attention_y).abs().sum(),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"TRT: [explicit<->mha_fwd] Is allclose?",
|
||||||
|
mha_fwd_y.allclose(explicit_attention_y, atol=0.0001),
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"TRT: [explicit<->mha_fwd] Total difference:",
|
||||||
|
(mha_fwd_y - explicit_attention_y).abs().sum(),
|
||||||
|
)
|
||||||
|
|
||||||
|
print("TRT: Explicit Attention:", explicit_attention_y[0, 0, :32])
|
||||||
|
print("TRT: Scaled Dot Product Attention:", sdpa_y[0, 0, :32])
|
||||||
|
print("TRT: MHA Forward:", mha_fwd_y[0, 0, :32])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
8
ml_run.py
Normal file
8
ml_run.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from model_runner import ModelRunner
|
||||||
|
mr = ModelRunner()
|
||||||
|
# %%
|
||||||
|
mr.init_model_det()
|
||||||
|
mr.init_model_clip()
|
||||||
|
# %%
|
||||||
|
scored_results = mr.score_video('/home/thebears/source/ml_code/short.mp4')
|
||||||
|
print(scored_results)
|
||||||
310
model_runner.py
Normal file
310
model_runner.py
Normal file
@@ -0,0 +1,310 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, "/home/thebears/source/models/yolov7")
|
||||||
|
import time
|
||||||
|
import base64 as b64
|
||||||
|
from datetime import datetime
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import json
|
||||||
|
from pymediainfo import MediaInfo
|
||||||
|
import inspect
|
||||||
|
import open_clip
|
||||||
|
import sys
|
||||||
|
import torch
|
||||||
|
import yaml
|
||||||
|
from models.experimental import attempt_load
|
||||||
|
from utils.general import check_img_size, non_max_suppression
|
||||||
|
from torchvision import transforms
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import os
|
||||||
|
device = torch.device("cuda")
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
class ModelRunner:
|
||||||
|
def __init__(self):
|
||||||
|
self.pretrained_name = "webli"
|
||||||
|
self.model_name = "ViT-SO400M-16-SigLIP2-512"
|
||||||
|
self.det_root_path = "/home/thebears/source/model_weights"
|
||||||
|
|
||||||
|
def init_model_clip(self):
|
||||||
|
if hasattr(self, 'clip_preprocess'):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
model_name = self.model_name
|
||||||
|
pretrained_name = self.pretrained_name
|
||||||
|
|
||||||
|
clip_model, _, clip_preprocess_og = open_clip.create_model_and_transforms(
|
||||||
|
model_name, pretrained=pretrained_name
|
||||||
|
)
|
||||||
|
tokenizer = open_clip.get_tokenizer("hf-hub:timm/" + model_name)
|
||||||
|
clip_model = clip_model.half().to(device)
|
||||||
|
clip_dtype = next(clip_model.parameters()).dtype
|
||||||
|
clip_img_size = clip_preprocess_og.transforms[0].size
|
||||||
|
clip_model.encode_image(
|
||||||
|
torch.rand(1, 3, *clip_img_size, dtype=clip_dtype, device=device))
|
||||||
|
clip_preprocess = transforms.Compose(
|
||||||
|
[clip_preprocess_og.transforms[x] for x in [0, 3]]
|
||||||
|
)
|
||||||
|
self.clip_model = clip_model
|
||||||
|
self.clip_preprocess_og = clip_preprocess_og
|
||||||
|
self.clip_tokenizer = tokenizer
|
||||||
|
self.clip_dtype = clip_dtype
|
||||||
|
self.clip_img_size = clip_img_size
|
||||||
|
self.clip_preprocess = clip_preprocess
|
||||||
|
|
||||||
|
def init_model_det(self):
|
||||||
|
if hasattr(self, 'det_model'):
|
||||||
|
return
|
||||||
|
|
||||||
|
det_root_path = self.det_root_path
|
||||||
|
det_model_weights_root = os.path.join(det_root_path, "yolov7")
|
||||||
|
det_model_weights_path = os.path.join(det_model_weights_root, "best.pt")
|
||||||
|
det_data_yaml_path = os.path.join(det_model_weights_root, "inaturalist.yaml")
|
||||||
|
det_model = attempt_load(det_model_weights_path, map_location=device)
|
||||||
|
det_model = det_model.half().to(device)
|
||||||
|
|
||||||
|
det_dtype = next(det_model.parameters()).dtype
|
||||||
|
det_imgsz = 1280
|
||||||
|
det_stride = int(det_model.stride.max())
|
||||||
|
det_imgsz = check_img_size(det_imgsz, s=det_stride)
|
||||||
|
_ = det_model(
|
||||||
|
torch.zeros(1, 3, det_imgsz, det_imgsz, dtype=det_dtype).to(device)
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(det_data_yaml_path, "r") as ff:
|
||||||
|
det_model_info = yaml.safe_load(ff)
|
||||||
|
det_labels = det_model_info["names"]
|
||||||
|
|
||||||
|
self.det_dtype = det_dtype
|
||||||
|
self.det_imgsz = det_imgsz
|
||||||
|
self.det_stride = det_stride
|
||||||
|
self.det_model_info = det_model_info
|
||||||
|
self.det_labels = det_labels
|
||||||
|
self.det_model = det_model
|
||||||
|
|
||||||
|
def get_det_vid_preprocessor(self, vid_h, vid_w):
|
||||||
|
if not hasattr(self, "_det_vid_preprocessors"):
|
||||||
|
self._det_vid_preprocessors = dict()
|
||||||
|
self.curr_det_vid_preprocessor = None
|
||||||
|
dict_key = (vid_h, vid_w)
|
||||||
|
det_stride = self.det_stride
|
||||||
|
if dict_key in self._det_vid_preprocessors:
|
||||||
|
self.curr_det_vid_preprocessor = self._det_vid_preprocessors[dict_key]
|
||||||
|
return self.curr_det_vid_preprocessor
|
||||||
|
|
||||||
|
target_max = self.det_imgsz
|
||||||
|
|
||||||
|
if vid_h > vid_w:
|
||||||
|
target_h = target_max
|
||||||
|
target_w = target_max * vid_w / vid_h
|
||||||
|
elif vid_h == vid_w:
|
||||||
|
target_h = target_max
|
||||||
|
target_w = target_max
|
||||||
|
elif vid_h < vid_w:
|
||||||
|
target_h = target_max * vid_h / vid_w
|
||||||
|
target_w = target_max
|
||||||
|
|
||||||
|
target_h = int(target_h)
|
||||||
|
target_w = int(target_w)
|
||||||
|
|
||||||
|
pad_amt = [None, None, None, None]
|
||||||
|
if target_w % det_stride != 0:
|
||||||
|
off = det_stride - target_w % det_stride
|
||||||
|
new_w = target_w + off
|
||||||
|
pad_diff = new_w - target_w
|
||||||
|
pad_left = round(pad_diff / 2)
|
||||||
|
pad_right = pad_diff - pad_left
|
||||||
|
pad_amt[0] = pad_left
|
||||||
|
pad_amt[2] = pad_right
|
||||||
|
else:
|
||||||
|
pad_amt[0] = 0
|
||||||
|
pad_amt[2] = 0
|
||||||
|
|
||||||
|
if target_h % det_stride != 0:
|
||||||
|
off = det_stride - target_h % det_stride
|
||||||
|
new_h = target_h + off
|
||||||
|
pad_diff = new_h - target_h
|
||||||
|
pad_up = round(pad_diff / 2)
|
||||||
|
pad_down = pad_diff - pad_up
|
||||||
|
pad_amt[1] = pad_up
|
||||||
|
pad_amt[3] = pad_down
|
||||||
|
else:
|
||||||
|
pad_amt[1] = 0
|
||||||
|
pad_amt[3] = 0
|
||||||
|
|
||||||
|
det_vid_preprocess = transforms.Compose(
|
||||||
|
[transforms.Resize((target_h, target_w)), transforms.Pad(pad_amt, fill=127)]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.target_h = target_h
|
||||||
|
self.target_w = target_w
|
||||||
|
self.pad_amt = pad_amt
|
||||||
|
|
||||||
|
self._det_vid_preprocessors[dict_key] = det_vid_preprocess
|
||||||
|
self.curr_det_vid_preprocessor = self._det_vid_preprocessors[dict_key]
|
||||||
|
return self.curr_det_vid_preprocessor
|
||||||
|
|
||||||
|
def score_frames_det(self, array_score, det_vid_preprocess=None):
|
||||||
|
det_model = self.det_model
|
||||||
|
if det_vid_preprocess is None:
|
||||||
|
det_vid_preprocess = self.curr_det_vid_preprocessor
|
||||||
|
|
||||||
|
frame_numbers = [x[0] for x in array_score]
|
||||||
|
frame_values = [x[1] for x in array_score]
|
||||||
|
frame_as_tensor = (
|
||||||
|
torch.from_numpy(np.stack(frame_values)[:, :, :, 0:3])
|
||||||
|
.to(torch.float16)
|
||||||
|
.to(device)
|
||||||
|
.permute([0, 3, 1, 2])
|
||||||
|
)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
frame_for_model = det_vid_preprocess(frame_as_tensor).div(255)[
|
||||||
|
:, [2, 1, 0], :, :
|
||||||
|
]
|
||||||
|
det_preds = det_model(frame_for_model)[0]
|
||||||
|
det_pred_post_nms = non_max_suppression(det_preds, 0.25, 0.5)
|
||||||
|
det_cpu_pred = [x.detach().cpu().numpy() for x in det_pred_post_nms]
|
||||||
|
|
||||||
|
return {"det": det_cpu_pred, "fr#": frame_numbers}
|
||||||
|
|
||||||
|
def score_frames_clip(self, clip_array_score):
|
||||||
|
frame_numbers = [x[0] for x in clip_array_score]
|
||||||
|
frame_values = [x[1] for x in clip_array_score]
|
||||||
|
frame_as_tensor = (
|
||||||
|
torch.from_numpy(np.stack(frame_values)[:, :, :, 0:3])
|
||||||
|
.to(torch.float16)
|
||||||
|
.to(device)
|
||||||
|
.permute([0, 3, 1, 2])
|
||||||
|
)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
frame_for_clip = self.clip_preprocess(frame_as_tensor[:, [0, 1, 2], :, :])
|
||||||
|
clip_pred = self.clip_model.encode_image(frame_for_clip).detach().cpu().numpy()
|
||||||
|
|
||||||
|
return {"clip": clip_pred, "fr#": frame_numbers}
|
||||||
|
|
||||||
|
def get_video_info(self, file_path):
|
||||||
|
file_info = MediaInfo.parse(file_path)
|
||||||
|
video_info = None
|
||||||
|
frame_count = 0
|
||||||
|
if len(file_info.video_tracks) > 0:
|
||||||
|
video_info = file_info.video_tracks[0]
|
||||||
|
|
||||||
|
video_info.frame_count = int(video_info.frame_count)
|
||||||
|
return video_info
|
||||||
|
|
||||||
|
def score_video(self, file_to_score, batch_size = 6, clip_interval = 10):
|
||||||
|
video_info = self.get_video_info(file_to_score)
|
||||||
|
vid_decoder = "h264parse"
|
||||||
|
if video_info.format.lower() == "HEVC".lower():
|
||||||
|
vid_decoder = "h265parse"
|
||||||
|
|
||||||
|
gst_cmd = "filesrc location={file_to_score} ! qtdemux name=demux demux.video_0 ! queue ! {vid_decoder} ! nvv4l2decoder ! nvvidconv ! videoscale method=1 add-borders=false ! video/x-raw,width=1280,height=1280 ! appsink sync=false".format(
|
||||||
|
file_to_score=file_to_score, vid_decoder=vid_decoder
|
||||||
|
)
|
||||||
|
cap_handle = cv2.VideoCapture(gst_cmd, cv2.CAP_GSTREAMER)
|
||||||
|
vid_h = video_info.height
|
||||||
|
vid_w = video_info.width
|
||||||
|
|
||||||
|
vid_preprocessor = self.get_det_vid_preprocessor(vid_h, vid_w)
|
||||||
|
target_w = self.target_w
|
||||||
|
target_h = self.target_h
|
||||||
|
pad_amt = self.pad_amt
|
||||||
|
|
||||||
|
|
||||||
|
array_score = list()
|
||||||
|
final_output = dict()
|
||||||
|
final_output["start_score_time"] = time.time()
|
||||||
|
final_output["num_frames"] = video_info.frame_count
|
||||||
|
st = time.time()
|
||||||
|
frame_numbers = list()
|
||||||
|
det_results = list()
|
||||||
|
clip_results = list()
|
||||||
|
clip_frame_numbers = list()
|
||||||
|
|
||||||
|
clip_array = list()
|
||||||
|
|
||||||
|
for i in range(video_info.frame_count):
|
||||||
|
success, frame_matrix = cap_handle.read()
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
break
|
||||||
|
|
||||||
|
array_score.append((i, frame_matrix))
|
||||||
|
|
||||||
|
if len(array_score) >= batch_size:
|
||||||
|
score_result = self.score_frames_det(array_score, det_vid_preprocess = vid_preprocessor)
|
||||||
|
det_results.extend(score_result["det"])
|
||||||
|
frame_numbers.extend(score_result["fr#"])
|
||||||
|
array_score = list()
|
||||||
|
|
||||||
|
if not (i % clip_interval):
|
||||||
|
clip_score_result = self.score_frames_clip([(i, frame_matrix)])
|
||||||
|
clip_results.extend(clip_score_result["clip"])
|
||||||
|
clip_frame_numbers.extend(clip_score_result["fr#"])
|
||||||
|
|
||||||
|
|
||||||
|
if len(array_score) > 0:
|
||||||
|
score_result = self.score_frames_det(array_score, det_vid_preprocess = vid_preprocessor)
|
||||||
|
det_results.extend(score_result["det"])
|
||||||
|
frame_numbers.extend(score_result["fr#"])
|
||||||
|
|
||||||
|
cap_handle.release()
|
||||||
|
|
||||||
|
|
||||||
|
final_output["end_score_time"] = time.time()
|
||||||
|
final_output["video"] = {
|
||||||
|
"w": vid_w,
|
||||||
|
"h": vid_h,
|
||||||
|
"path": file_to_score,
|
||||||
|
"target_w": target_w,
|
||||||
|
"target_h": target_h,
|
||||||
|
"pad_amt": pad_amt,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
final_output["scoring_fps"] = final_output["num_frames"] / (
|
||||||
|
final_output["end_score_time"] - final_output["start_score_time"]
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
final_output["scores"] = list()
|
||||||
|
|
||||||
|
|
||||||
|
clip_results_as_np = np.asarray(clip_results)
|
||||||
|
|
||||||
|
for frame_number, frame in zip(frame_numbers, det_results):
|
||||||
|
cframe_dict = dict()
|
||||||
|
cframe_dict["frame"] = frame_number
|
||||||
|
cframe_dict["detections"] = list()
|
||||||
|
|
||||||
|
for det in frame:
|
||||||
|
data = dict()
|
||||||
|
data["coords"] = [float(x) for x in list(det[0:4])]
|
||||||
|
data["score"] = float(det[4])
|
||||||
|
data["idx"] = int(det[5])
|
||||||
|
|
||||||
|
try:
|
||||||
|
data["name"] = det_labels[data["idx"]]
|
||||||
|
except:
|
||||||
|
data["name"] = "Code failed"
|
||||||
|
|
||||||
|
cframe_dict["detections"].append(data)
|
||||||
|
|
||||||
|
final_output["scores"].append(cframe_dict)
|
||||||
|
|
||||||
|
emb_dict = dict()
|
||||||
|
|
||||||
|
emb_dict["frame_numbers"] = clip_frame_numbers
|
||||||
|
emb_dict["array_size"] = clip_results_as_np.shape
|
||||||
|
emb_dict["array_dtype"] = str(clip_results_as_np.dtype)
|
||||||
|
emb_dict["array_binary"] = b64.b64encode(clip_results_as_np).decode()
|
||||||
|
|
||||||
|
final_output["embeds"] = emb_dict
|
||||||
|
|
||||||
|
return final_output
|
||||||
95638
report_dynamo_export.sarif
Normal file
95638
report_dynamo_export.sarif
Normal file
File diff suppressed because one or more lines are too long
@@ -1,4 +1,5 @@
|
|||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy
|
import numpy
|
||||||
@@ -12,7 +13,7 @@ import torch
|
|||||||
from cuda import cuda as ccuda
|
from cuda import cuda as ccuda
|
||||||
from cuda import cudart
|
from cuda import cudart
|
||||||
|
|
||||||
cmd = "filesrc location=/home/thebears/local/source/full.mp4 ! qtdemux name=demux demux.video_0 ! queue ! h265parse ! nvv4l2decoder ! nvvidconv ! videoscale method=1 add-borders=false ! video/x-raw,width=1280,height=1280 ! appsink sync=false"
|
cmd = "filesrc location=/home/thebears/local/source/short.mp4 ! qtdemux name=demux demux.video_0 ! queue ! h265parse ! nvv4l2decoder ! nvvidconv ! videoscale method=1 add-borders=false ! video/x-raw,width=1280,height=1280 ! appsink sync=false"
|
||||||
|
|
||||||
cap = cv2.VideoCapture(cmd, cv2.CAP_GSTREAMER)
|
cap = cv2.VideoCapture(cmd, cv2.CAP_GSTREAMER)
|
||||||
|
|
||||||
@@ -21,6 +22,7 @@ fr = 0
|
|||||||
|
|
||||||
|
|
||||||
arrays_to_score = list()
|
arrays_to_score = list()
|
||||||
|
imgs = list()
|
||||||
array = list()
|
array = list()
|
||||||
while True:
|
while True:
|
||||||
good, frf = cap.read()
|
good, frf = cap.read()
|
||||||
@@ -31,11 +33,11 @@ while True:
|
|||||||
|
|
||||||
|
|
||||||
array.append(frf)
|
array.append(frf)
|
||||||
|
imgs.append(frf)
|
||||||
if len(array) > 8:
|
if len(array) > 8:
|
||||||
arrays_to_score.append(torch.from_numpy(np.asarray(array)))
|
arrays_to_score.append(torch.from_numpy(np.asarray(array)))
|
||||||
array = list()
|
array = list()
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
if len(array) > 0:
|
if len(array) > 0:
|
||||||
@@ -45,55 +47,84 @@ if len(array) > 0:
|
|||||||
et = time.time()
|
et = time.time()
|
||||||
|
|
||||||
print(et - st, fr / (st - et))
|
print(et - st, fr / (st - et))
|
||||||
|
# %%
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
pretrained_name = "webli"
|
||||||
|
#model_name = "ViT-L-16-SigLIP2-512"
|
||||||
|
model_name = 'ViT-SO400M-16-SigLIP2-512'
|
||||||
|
rt_dir ='/home/thebears/local/source/models/'
|
||||||
|
|
||||||
|
os.makedirs(rt_dir, exist_ok=True)
|
||||||
|
fname = model_name.replace('-','_').lower() + '_'+datetime.now().strftime('%Y%m%d')
|
||||||
|
ONNX_FILE_PATH=os.path.join(rt_dir, fname + '.onnx')
|
||||||
|
ENGINE_FILE_PATH = os.path.splitext(ONNX_FILE_PATH)[0]+'.engine'
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
pretrained_name = "webli"
|
|
||||||
model_name = "ViT-L-16-SigLIP-512"
|
|
||||||
|
|
||||||
|
|
||||||
model_name = 'ViT-SO400M-16-SigLIP2-512'
|
|
||||||
ONNX_FILE_PATH = "/home/thebears/local/source/so400m_siglip2_512.onnx"
|
|
||||||
#model_name, pretrained_name = ('ViT-B-16-quickgelu', 'openai')
|
|
||||||
|
|
||||||
model, _, preprocess = open_clip.create_model_and_transforms(
|
model, _, preprocess = open_clip.create_model_and_transforms(
|
||||||
model_name, pretrained=pretrained_name
|
model_name, pretrained=pretrained_name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|
||||||
|
model_gpu = model.cuda()
|
||||||
|
scores = list()
|
||||||
|
all_means = list()
|
||||||
|
with torch.no_grad():
|
||||||
|
for fr_num, img in enumerate(imgs):
|
||||||
|
|
||||||
|
tensor_raw = torch.tensor(img[None,:,:,0:3])
|
||||||
|
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
||||||
|
tensor_reshaped = preprocess.transforms[0](tensor_perm)
|
||||||
|
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
||||||
|
all_means.append(tensor_mean)
|
||||||
|
imp = model_gpu.encode_image(tensor_mean.cuda())
|
||||||
|
|
||||||
|
print(fr_num)
|
||||||
|
scores.append((fr_num, imp.detach().cpu().numpy()))
|
||||||
|
# %%
|
||||||
|
np.save('dump_so400m',np.concatenate([x[1] for x in scores]))
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
et = time.time()
|
et = time.time()
|
||||||
|
|
||||||
if True:
|
if True:
|
||||||
tensor_raw = arrays_to_score[0][0,:,:,0:3][None,:,:,:]
|
|
||||||
|
tensor_raw = torch.concat(arrays_to_score)[0:4, :, :, 0:3]
|
||||||
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
||||||
tensor_reshaped = preprocess.transforms[0](tensor_perm)
|
tensor_reshaped = preprocess.transforms[0](tensor_perm)
|
||||||
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
||||||
else:
|
else:
|
||||||
tensor_raw = torch.concat(arrays_to_score)[0:4, :, :, 0:3]
|
tensor_raw = torch.concat(arrays_to_score)[0, :, :, 0:3]
|
||||||
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
tensor_perm = tensor_raw.permute([0, 3, 1, 2]).to(torch.float32) / 255
|
||||||
tensor_reshaped = preprocess.transforms[1](preprocess.transforms[0](tensor_perm))
|
tensor_reshaped = preprocess.transforms[1](preprocess.transforms[0](tensor_perm))
|
||||||
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
tensor_mean = preprocess.transforms[-1](tensor_reshaped)
|
||||||
imp = model.encode_image(tensor_mean)
|
#imp = model.encode_image(tensor_mean)
|
||||||
|
imp = model_gpu.encode_image(tensor_mean.cuda())
|
||||||
st = time.time()
|
st = time.time()
|
||||||
print((st - et) / tensor_raw.shape[0], tensor_raw.shape[0]/(st - et) )
|
print((st - et) / tensor_raw.shape[0], tensor_raw.shape[0]/(st - et) )
|
||||||
|
|
||||||
from_model_on_gpu = imp.cpu().numpy()
|
from_model_on_gpu = imp.detach().cpu().numpy()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
||||||
ENGINE_FILE_PATH = os.path.splitext(ONNX_FILE_PATH)[0]+'.trt'
|
|
||||||
torch.onnx.export(
|
torch.onnx.export(
|
||||||
model.visual,
|
model.visual.cuda(),
|
||||||
tensor_mean,
|
tensor_mean.cuda(),
|
||||||
ONNX_FILE_PATH,
|
ONNX_FILE_PATH,
|
||||||
input_names=["input"],
|
input_names=["input"],
|
||||||
output_names=["output"],
|
output_names=["output"],
|
||||||
)
|
)
|
||||||
# %%
|
|
||||||
X_test = tensor_mean.cpu().numpy()
|
X_test = tensor_mean.cpu().numpy()
|
||||||
sess = rt.InferenceSession(
|
sess = rt.InferenceSession(
|
||||||
ONNX_FILE_PATH, providers=rt.get_available_providers())
|
ONNX_FILE_PATH, providers=rt.get_available_providers())
|
||||||
@@ -106,7 +137,7 @@ def norm(v):
|
|||||||
|
|
||||||
print(np.dot(norm(pred_onx), norm(from_model_on_gpu).T))
|
print(np.dot(norm(pred_onx), norm(from_model_on_gpu).T))
|
||||||
|
|
||||||
# %%
|
|
||||||
TRT_LOGGER = trt.Logger()
|
TRT_LOGGER = trt.Logger()
|
||||||
def build_engine_from_onnx(onnx_file_path, use_fp16=True):
|
def build_engine_from_onnx(onnx_file_path, use_fp16=True):
|
||||||
"""
|
"""
|
||||||
@@ -142,7 +173,7 @@ def build_engine_from_onnx(onnx_file_path, use_fp16=True):
|
|||||||
# Enable FP16 precision if requested and if the GPU supports it
|
# Enable FP16 precision if requested and if the GPU supports it
|
||||||
if use_fp16:
|
if use_fp16:
|
||||||
if builder.platform_has_fast_fp16:
|
if builder.platform_has_fast_fp16:
|
||||||
# config.set_flag(trt.BuilderFlag.FP16)
|
config.set_flag(trt.BuilderFlag.FP16)
|
||||||
print("FP16 enabled successfully")
|
print("FP16 enabled successfully")
|
||||||
else:
|
else:
|
||||||
print("Warning: GPU doesn't support fast FP16, using FP32 instead")
|
print("Warning: GPU doesn't support fast FP16, using FP32 instead")
|
||||||
@@ -160,7 +191,7 @@ def build_engine_from_onnx(onnx_file_path, use_fp16=True):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
engine = build_engine_from_onnx(ONNX_FILE_PATH, use_fp16=False)
|
engine = build_engine_from_onnx(ONNX_FILE_PATH, use_fp16=True)
|
||||||
|
|
||||||
with open(ENGINE_FILE_PATH, "wb") as f:
|
with open(ENGINE_FILE_PATH, "wb") as f:
|
||||||
f.write(engine)
|
f.write(engine)
|
||||||
|
|||||||
Reference in New Issue
Block a user