29 lines
767 B
Python
29 lines
767 B
Python
import open_clip
|
|
#model_name = 'ViT-SO400M-14-SigLIP-384'
|
|
model_name = 'ViT-L-16-SigLIP2-512'
|
|
pretrained_name = 'webli'
|
|
|
|
model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained_name)
|
|
model.visual
|
|
|
|
sz_temp = (1,3,*model.visual.image_size)
|
|
|
|
|
|
import torch
|
|
|
|
r_in = torch.randn(sz_temp)
|
|
model.visual.eval()
|
|
with torch.no_grad():
|
|
torch_out = model.visual(r_in)
|
|
|
|
onnx_file_path = 'siglip2_512.onnx'
|
|
|
|
torch.onnx.export(model.visual,
|
|
r_in,
|
|
onnx_file_path,
|
|
export_params=True,
|
|
do_constant_folding=True,
|
|
input_names = ['input'],
|
|
output_names = ['output'])
|
|
#dynamic_axes={'input': {2 : 'height', 3 : 'width'}}
|