mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-03-12 20:42:45 -07:00
312 lines
11 KiB
Python
312 lines
11 KiB
Python
import sys
|
|
import ctypes
|
|
import os
|
|
import json
|
|
import numpy as np
|
|
|
|
#you can set DFL_TF_MIN_REQ_CAP manually for your build
|
|
#the reason why we cannot check tensorflow.version is it requires import tensorflow
|
|
tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35))
|
|
|
|
class device:
|
|
backend = None
|
|
class Config():
|
|
force_gpu_idx = -1
|
|
multi_gpu = False
|
|
force_gpu_idxs = None
|
|
choose_worst_gpu = False
|
|
gpu_idxs = []
|
|
gpu_names = []
|
|
gpu_compute_caps = []
|
|
gpu_vram_gb = []
|
|
allow_growth = True
|
|
use_fp16 = False
|
|
cpu_only = False
|
|
backend = None
|
|
def __init__ (self, force_gpu_idx = -1,
|
|
multi_gpu = False,
|
|
force_gpu_idxs = None,
|
|
choose_worst_gpu = False,
|
|
allow_growth = True,
|
|
use_fp16 = False,
|
|
cpu_only = False,
|
|
**in_options):
|
|
|
|
self.backend = device.backend
|
|
self.use_fp16 = use_fp16
|
|
self.cpu_only = cpu_only
|
|
|
|
if not self.cpu_only:
|
|
self.cpu_only = (self.backend == "tensorflow-cpu")
|
|
|
|
if not self.cpu_only:
|
|
self.force_gpu_idx = force_gpu_idx
|
|
self.multi_gpu = multi_gpu
|
|
self.force_gpu_idxs = force_gpu_idxs
|
|
self.choose_worst_gpu = choose_worst_gpu
|
|
self.allow_growth = allow_growth
|
|
|
|
self.gpu_idxs = []
|
|
|
|
if force_gpu_idxs is not None:
|
|
for idx in force_gpu_idxs.split(','):
|
|
idx = int(idx)
|
|
if device.isValidDeviceIdx(idx):
|
|
self.gpu_idxs.append(idx)
|
|
else:
|
|
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
|
|
if gpu_idx != -1:
|
|
if self.multi_gpu:
|
|
self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
|
|
if len(self.gpu_idxs) <= 1:
|
|
self.multi_gpu = False
|
|
else:
|
|
self.gpu_idxs = [gpu_idx]
|
|
|
|
self.cpu_only = (len(self.gpu_idxs) == 0)
|
|
|
|
|
|
if not self.cpu_only:
|
|
self.gpu_names = []
|
|
self.gpu_compute_caps = []
|
|
self.gpu_vram_gb = []
|
|
for gpu_idx in self.gpu_idxs:
|
|
self.gpu_names += [device.getDeviceName(gpu_idx)]
|
|
self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
|
|
self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
|
|
self.cpu_only = (len(self.gpu_idxs) == 0)
|
|
else:
|
|
self.gpu_names = ['CPU']
|
|
self.gpu_compute_caps = [99]
|
|
self.gpu_vram_gb = [0]
|
|
|
|
if self.cpu_only:
|
|
self.backend = "tensorflow-cpu"
|
|
|
|
@staticmethod
|
|
def getValidDeviceIdxsEnumerator():
|
|
if device.backend == "plaidML":
|
|
for i in range(plaidML_devices_count):
|
|
yield i
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
yield dev['index']
|
|
|
|
@staticmethod
|
|
def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
|
|
result = []
|
|
if device.backend == "plaidML":
|
|
for i in device.getValidDeviceIdxsEnumerator():
|
|
if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
|
|
result.append (i)
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
if dev['total_mem'] >= totalmemsize_gb*1024*1024*1024:
|
|
result.append ( dev['index'] )
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def getValidDevicesIdxsWithNamesList():
|
|
if device.backend == "plaidML":
|
|
return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
|
|
elif device.backend == "tensorflow":
|
|
return [ ( dev['index'], dev['name'] ) for dev in cuda_devices ]
|
|
elif device.backend == "tensorflow-cpu":
|
|
return [ (0, 'CPU') ]
|
|
|
|
@staticmethod
|
|
def getDeviceVRAMTotalGb (idx):
|
|
if device.backend == "plaidML":
|
|
if idx < plaidML_devices_count:
|
|
return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
if idx == dev['index']:
|
|
return round ( dev['total_mem'] / (1024*1024*1024) )
|
|
return 0
|
|
|
|
@staticmethod
|
|
def getBestValidDeviceIdx():
|
|
if device.backend == "plaidML":
|
|
idx = -1
|
|
idx_mem = 0
|
|
for i in device.getValidDeviceIdxsEnumerator():
|
|
total = plaidML_devices[i]['globalMemSize']
|
|
if total > idx_mem:
|
|
idx = i
|
|
idx_mem = total
|
|
|
|
return idx
|
|
elif device.backend == "tensorflow":
|
|
idx = -1
|
|
idx_mem = 0
|
|
for dev in cuda_devices:
|
|
if dev['total_mem'] > idx_mem:
|
|
idx = dev['index']
|
|
idx_mem = dev['total_mem']
|
|
|
|
return idx
|
|
|
|
@staticmethod
|
|
def getWorstValidDeviceIdx():
|
|
if device.backend == "plaidML":
|
|
idx = -1
|
|
idx_mem = sys.maxsize
|
|
for i in device.getValidDeviceIdxsEnumerator():
|
|
total = plaidML_devices[i]['globalMemSize']
|
|
if total < idx_mem:
|
|
idx = i
|
|
idx_mem = total
|
|
|
|
return idx
|
|
elif device.backend == "tensorflow":
|
|
idx = -1
|
|
idx_mem = sys.maxsize
|
|
for dev in cuda_devices:
|
|
if dev['total_mem'] < idx_mem:
|
|
idx = dev['index']
|
|
idx_mem = dev['total_mem']
|
|
|
|
return idx
|
|
|
|
@staticmethod
|
|
def isValidDeviceIdx(idx):
|
|
if device.backend == "plaidML":
|
|
return idx in [*device.getValidDeviceIdxsEnumerator()]
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
if idx == dev['index']:
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def getDeviceIdxsEqualModel(idx):
|
|
if device.backend == "plaidML":
|
|
result = []
|
|
idx_name = plaidML_devices[idx]['description']
|
|
for i in device.getValidDeviceIdxsEnumerator():
|
|
if plaidML_devices[i]['description'] == idx_name:
|
|
result.append (i)
|
|
|
|
return result
|
|
elif device.backend == "tensorflow":
|
|
result = []
|
|
idx_name = device.getDeviceName(idx)
|
|
for dev in cuda_devices:
|
|
if dev['name'] == idx_name:
|
|
result.append ( dev['index'] )
|
|
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def getDeviceName (idx):
|
|
if device.backend == "plaidML":
|
|
if idx < plaidML_devices_count:
|
|
return plaidML_devices[idx]['description']
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
if dev['index'] == idx:
|
|
return dev['name']
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def getDeviceID (idx):
|
|
if device.backend == "plaidML":
|
|
if idx < plaidML_devices_count:
|
|
return plaidML_devices[idx]['id'].decode()
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def getDeviceComputeCapability(idx):
|
|
if device.backend == "plaidML":
|
|
return 99
|
|
elif device.backend == "tensorflow":
|
|
for dev in cuda_devices:
|
|
if dev['index'] == idx:
|
|
return dev['cc']
|
|
return 0
|
|
|
|
plaidML_build = os.environ.get("DFL_PLAIDML_BUILD", "0") == "1"
|
|
plaidML_devices = None
|
|
plaidML_devices_count = 0
|
|
cuda_devices = None
|
|
|
|
if plaidML_build:
|
|
if plaidML_devices is None:
|
|
plaidML_devices = []
|
|
# Using plaidML OpenCL backend to determine system devices
|
|
try:
|
|
os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
|
|
import plaidml
|
|
ctx = plaidml.Context()
|
|
for d in plaidml.devices(ctx, return_all=True)[0]:
|
|
details = json.loads(d.details)
|
|
if details['type'] == 'CPU': #skipping opencl-CPU
|
|
continue
|
|
plaidML_devices += [ {'id':d.id,
|
|
'globalMemSize' : int(details['globalMemSize']),
|
|
'description' : d.description.decode()
|
|
}]
|
|
ctx.shutdown()
|
|
except:
|
|
pass
|
|
plaidML_devices_count = len(plaidML_devices)
|
|
if plaidML_devices_count != 0:
|
|
device.backend = "plaidML"
|
|
else:
|
|
if cuda_devices is None:
|
|
cuda_devices = []
|
|
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
|
cuda = None
|
|
for libname in libnames:
|
|
try:
|
|
cuda = ctypes.CDLL(libname)
|
|
except:
|
|
continue
|
|
else:
|
|
break
|
|
|
|
if cuda is not None:
|
|
nGpus = ctypes.c_int()
|
|
name = b' ' * 200
|
|
cc_major = ctypes.c_int()
|
|
cc_minor = ctypes.c_int()
|
|
freeMem = ctypes.c_size_t()
|
|
totalMem = ctypes.c_size_t()
|
|
|
|
result = ctypes.c_int()
|
|
device_t = ctypes.c_int()
|
|
context = ctypes.c_void_p()
|
|
error_str = ctypes.c_char_p()
|
|
|
|
if cuda.cuInit(0) == 0 and \
|
|
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
|
|
for i in range(nGpus.value):
|
|
if cuda.cuDeviceGet(ctypes.byref(device_t), i) != 0 or \
|
|
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device_t) != 0 or \
|
|
cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device_t) != 0:
|
|
continue
|
|
|
|
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device_t) == 0:
|
|
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
|
|
cc = cc_major.value * 10 + cc_minor.value
|
|
if cc >= tf_min_req_cap:
|
|
cuda_devices.append ( {'index':i,
|
|
'name':name.split(b'\0', 1)[0].decode(),
|
|
'total_mem':totalMem.value,
|
|
'free_mem':freeMem.value,
|
|
'cc':cc
|
|
}
|
|
)
|
|
cuda.cuCtxDetach(context)
|
|
|
|
if len(cuda_devices) != 0:
|
|
device.backend = "tensorflow"
|
|
|
|
if device.backend is None:
|
|
device.backend = "tensorflow-cpu"
|