DeepFaceLive/xlib/onnxruntime/InferenceSession.py

36 lines
1.1 KiB
Python

import onnx
import onnxruntime as rt
from io import BytesIO
from .device import ORTDeviceInfo
def InferenceSession_with_device(onnx_model_or_path, device_info : ORTDeviceInfo):
"""
Construct onnxruntime.InferenceSession with this Device.
device_info ORTDeviceInfo
can raise Exception
"""
if isinstance(onnx_model_or_path, onnx.ModelProto):
b = BytesIO()
onnx.save(onnx_model_or_path, b)
onnx_model_or_path = b.getvalue()
device_ep = device_info.get_execution_provider()
if device_ep not in rt.get_available_providers():
raise Exception(f'{device_ep} is not avaiable in onnxruntime')
ep_flags = {}
if device_ep in ['CUDAExecutionProvider','DmlExecutionProvider']:
ep_flags['device_id'] = device_info.get_index()
sess_options = rt.SessionOptions()
sess_options.log_severity_level = 4
sess_options.log_verbosity_level = -1
if device_ep == 'DmlExecutionProvider':
sess_options.enable_mem_pattern = False
sess = rt.InferenceSession(onnx_model_or_path, providers=[ (device_ep, ep_flags) ], sess_options=sess_options)
return sess