DeepFaceLab/nnlib/device.py
2019-10-10 13:01:57 +04:00

312 lines
11 KiB
Python

import sys
import ctypes
import os
import json
import numpy as np
#you can set DFL_TF_MIN_REQ_CAP manually for your build
#the reason why we cannot check tensorflow.version is it requires import tensorflow
tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35))
class device:
backend = None
class Config():
force_gpu_idx = -1
multi_gpu = False
force_gpu_idxs = None
choose_worst_gpu = False
gpu_idxs = []
gpu_names = []
gpu_compute_caps = []
gpu_vram_gb = []
allow_growth = True
use_fp16 = False
cpu_only = False
backend = None
def __init__ (self, force_gpu_idx = -1,
multi_gpu = False,
force_gpu_idxs = None,
choose_worst_gpu = False,
allow_growth = True,
use_fp16 = False,
cpu_only = False,
**in_options):
self.backend = device.backend
self.use_fp16 = use_fp16
self.cpu_only = cpu_only
if not self.cpu_only:
self.cpu_only = (self.backend == "tensorflow-cpu")
if not self.cpu_only:
self.force_gpu_idx = force_gpu_idx
self.multi_gpu = multi_gpu
self.force_gpu_idxs = force_gpu_idxs
self.choose_worst_gpu = choose_worst_gpu
self.allow_growth = allow_growth
self.gpu_idxs = []
if force_gpu_idxs is not None:
for idx in force_gpu_idxs.split(','):
idx = int(idx)
if device.isValidDeviceIdx(idx):
self.gpu_idxs.append(idx)
else:
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
if gpu_idx != -1:
if self.multi_gpu:
self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
if len(self.gpu_idxs) <= 1:
self.multi_gpu = False
else:
self.gpu_idxs = [gpu_idx]
self.cpu_only = (len(self.gpu_idxs) == 0)
if not self.cpu_only:
self.gpu_names = []
self.gpu_compute_caps = []
self.gpu_vram_gb = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [device.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
self.cpu_only = (len(self.gpu_idxs) == 0)
else:
self.gpu_names = ['CPU']
self.gpu_compute_caps = [99]
self.gpu_vram_gb = [0]
if self.cpu_only:
self.backend = "tensorflow-cpu"
@staticmethod
def getValidDeviceIdxsEnumerator():
if device.backend == "plaidML":
for i in range(plaidML_devices_count):
yield i
elif device.backend == "tensorflow":
for dev in cuda_devices:
yield dev['index']
@staticmethod
def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
result = []
if device.backend == "plaidML":
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow":
for dev in cuda_devices:
if dev['total_mem'] >= totalmemsize_gb*1024*1024*1024:
result.append ( dev['index'] )
return result
@staticmethod
def getValidDevicesIdxsWithNamesList():
if device.backend == "plaidML":
return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow":
return [ ( dev['index'], dev['name'] ) for dev in cuda_devices ]
elif device.backend == "tensorflow-cpu":
return [ (0, 'CPU') ]
@staticmethod
def getDeviceVRAMTotalGb (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
elif device.backend == "tensorflow":
for dev in cuda_devices:
if idx == dev['index']:
return round ( dev['total_mem'] / (1024*1024*1024) )
return 0
@staticmethod
def getBestValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total > idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = 0
for dev in cuda_devices:
if dev['total_mem'] > idx_mem:
idx = dev['index']
idx_mem = dev['total_mem']
return idx
@staticmethod
def getWorstValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total < idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = sys.maxsize
for dev in cuda_devices:
if dev['total_mem'] < idx_mem:
idx = dev['index']
idx_mem = dev['total_mem']
return idx
@staticmethod
def isValidDeviceIdx(idx):
if device.backend == "plaidML":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow":
for dev in cuda_devices:
if idx == dev['index']:
return True
return False
@staticmethod
def getDeviceIdxsEqualModel(idx):
if device.backend == "plaidML":
result = []
idx_name = plaidML_devices[idx]['description']
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['description'] == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow":
result = []
idx_name = device.getDeviceName(idx)
for dev in cuda_devices:
if dev['name'] == idx_name:
result.append ( dev['index'] )
return result
@staticmethod
def getDeviceName (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['description']
elif device.backend == "tensorflow":
for dev in cuda_devices:
if dev['index'] == idx:
return dev['name']
return None
@staticmethod
def getDeviceID (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['id'].decode()
return None
@staticmethod
def getDeviceComputeCapability(idx):
if device.backend == "plaidML":
return 99
elif device.backend == "tensorflow":
for dev in cuda_devices:
if dev['index'] == idx:
return dev['cc']
return 0
plaidML_build = os.environ.get("DFL_PLAIDML_BUILD", "0") == "1"
plaidML_devices = None
plaidML_devices_count = 0
cuda_devices = None
if plaidML_build:
if plaidML_devices is None:
plaidML_devices = []
# Using plaidML OpenCL backend to determine system devices
try:
os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
import plaidml
ctx = plaidml.Context()
for d in plaidml.devices(ctx, return_all=True)[0]:
details = json.loads(d.details)
if details['type'] == 'CPU': #skipping opencl-CPU
continue
plaidML_devices += [ {'id':d.id,
'globalMemSize' : int(details['globalMemSize']),
'description' : d.description.decode()
}]
ctx.shutdown()
except:
pass
plaidML_devices_count = len(plaidML_devices)
if plaidML_devices_count != 0:
device.backend = "plaidML"
else:
if cuda_devices is None:
cuda_devices = []
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
cuda = None
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except:
continue
else:
break
if cuda is not None:
nGpus = ctypes.c_int()
name = b' ' * 200
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
freeMem = ctypes.c_size_t()
totalMem = ctypes.c_size_t()
result = ctypes.c_int()
device_t = ctypes.c_int()
context = ctypes.c_void_p()
error_str = ctypes.c_char_p()
if cuda.cuInit(0) == 0 and \
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
for i in range(nGpus.value):
if cuda.cuDeviceGet(ctypes.byref(device_t), i) != 0 or \
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device_t) != 0 or \
cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device_t) != 0:
continue
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device_t) == 0:
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
cc = cc_major.value * 10 + cc_minor.value
if cc >= tf_min_req_cap:
cuda_devices.append ( {'index':i,
'name':name.split(b'\0', 1)[0].decode(),
'total_mem':totalMem.value,
'free_mem':freeMem.value,
'cc':cc
}
)
cuda.cuCtxDetach(context)
if len(cuda_devices) != 0:
device.backend = "tensorflow"
if device.backend is None:
device.backend = "tensorflow-cpu"