mirror of
https://github.com/RfidResearchGroup/proxmark3.git
synced 2025-08-12 17:47:22 -07:00
move thread scheduler threads.c, move opencl engine in opencl.c, minor fixes
This commit is contained in:
parent
0e18443b0e
commit
3ecebacdd2
7 changed files with 753 additions and 724 deletions
|
@ -151,7 +151,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn
|
|||
unsigned int tmp_sel = (unsigned int) strtoul(next, NULL, 10);
|
||||
if (errno == EINVAL || errno == ERANGE ||
|
||||
(tmp_sel < 1 || tmp_sel > 16)) {
|
||||
printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
|
||||
printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn
|
|||
} else {
|
||||
out[0] = (unsigned int) strtoul(in, NULL, 10);
|
||||
if (errno == EINVAL || errno == ERANGE) {
|
||||
printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
|
||||
printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
|
||||
return false;
|
||||
}
|
||||
*out_cnt = 1;
|
||||
|
@ -191,7 +191,6 @@ int main(int argc, char **argv) {
|
|||
unsigned int profile_selected = 2;
|
||||
unsigned int queue_type = 0;
|
||||
|
||||
uint32_t target = 0;
|
||||
uint32_t **matches_found = NULL;
|
||||
uint64_t **matches = NULL;
|
||||
|
||||
|
@ -211,7 +210,7 @@ int main(int argc, char **argv) {
|
|||
// 0: gpu, 1: cpu, 2: all
|
||||
device_types_selected = (unsigned int) strtoul(optarg, NULL, 10);
|
||||
if (device_types_selected > 2) {
|
||||
printf("! Invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n");
|
||||
printf("Error: invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n");
|
||||
usage(argv[0]);
|
||||
}
|
||||
break;
|
||||
|
@ -222,7 +221,7 @@ int main(int argc, char **argv) {
|
|||
case 'P':
|
||||
profile_selected = (unsigned int) strtoul(optarg, NULL, 10);
|
||||
if (profile_selected > 10) {
|
||||
printf("! Invalid PROFILE argument (accepted valuee: from 0 to 10)\n");
|
||||
printf("Error: invalid PROFILE argument (accepted valuee: from 0 to 10)\n");
|
||||
usage(argv[0]);
|
||||
}
|
||||
break;
|
||||
|
@ -233,7 +232,7 @@ int main(int argc, char **argv) {
|
|||
// 0: forward, 1: reverse, 2: random
|
||||
queue_type = (unsigned int) strtoul(optarg, NULL, 10);
|
||||
if (queue_type != QUEUE_TYPE_FORWARD && queue_type != QUEUE_TYPE_REVERSE && queue_type != QUEUE_TYPE_RANDOM) {
|
||||
printf("! Invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n");
|
||||
printf("Error: invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n");
|
||||
usage(argv[0]);
|
||||
}
|
||||
break;
|
||||
|
@ -298,13 +297,13 @@ int main(int argc, char **argv) {
|
|||
|
||||
printf("Device types selected : %s\n", (device_types_selected == CL_DEVICE_TYPE_GPU) ? "GPU" : (device_types_selected == CL_DEVICE_TYPE_CPU) ? "CPU" : "ALL");
|
||||
printf("Scheduler selected : %s\n", (thread_scheduler_type_selected == 0) ? "sequential" : "async");
|
||||
printf("Profile selected : %d\n", profile_selected);
|
||||
printf("Profile selected : %u\n", profile_selected);
|
||||
}
|
||||
|
||||
if (!show) {
|
||||
if ((argc - optind) < 5) {
|
||||
#if DEBUGME > 0
|
||||
printf("! Invalid extra arguments\n");
|
||||
printf("Error: invalid extra arguments\n");
|
||||
#endif
|
||||
usage(argv[0]);
|
||||
}
|
||||
|
@ -313,41 +312,41 @@ int main(int argc, char **argv) {
|
|||
switch (e) {
|
||||
case 0: // UID
|
||||
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid UID length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid UID length\n"); usage(argv[0]); }
|
||||
uid = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
|
||||
} else {
|
||||
if (strlen(argv[optind]) != 8) { printf("! Invalid UID length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 8) { printf("Error: invalid UID length\n"); usage(argv[0]); }
|
||||
uid = (uint32_t) rev32(hexreversetoulong(argv[optind]));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: // nR1
|
||||
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); }
|
||||
nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
|
||||
} else {
|
||||
if (strlen(argv[optind]) != 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); }
|
||||
nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind]));
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: // aR1
|
||||
if (strlen(argv[optind]) != 8) { printf("! Invalid aR1 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 8) { printf("Error: invalid aR1 length\n"); usage(argv[0]); }
|
||||
aR1 = (uint32_t) strtoul(argv[optind], NULL, 16);
|
||||
break;
|
||||
|
||||
case 3: // nR2
|
||||
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); }
|
||||
nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
|
||||
} else {
|
||||
if (strlen(argv[optind]) != 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); }
|
||||
nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind]));
|
||||
}
|
||||
break;
|
||||
|
||||
case 4: // aR2
|
||||
if (strlen(argv[optind]) != 8) { printf("! Invalid aR2 length\n"); usage(argv[0]); }
|
||||
if (strlen(argv[optind]) != 8) { printf("Error: invalid aR2 length\n"); usage(argv[0]); }
|
||||
aR2 = (uint32_t) strtoul(argv[optind], NULL, 16);
|
||||
break;
|
||||
|
||||
|
@ -371,7 +370,7 @@ int main(int argc, char **argv) {
|
|||
if (!show) {
|
||||
if (verbose) printf("uid: %u, aR2: %u, nR1: %u, nR2: %u\n", checks[0], checks[1], checks[2], checks[3]);
|
||||
|
||||
target = ~aR1;
|
||||
uint32_t target = ~aR1;
|
||||
// bitslice inverse target bits
|
||||
bitslice(~target, keystream);
|
||||
|
||||
|
@ -446,389 +445,33 @@ int main(int argc, char **argv) {
|
|||
close(fd);
|
||||
}
|
||||
|
||||
// now discover and set up compute device(s)
|
||||
int err = 0;
|
||||
cl_uint ocl_platform_cnt = 0;
|
||||
unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16
|
||||
|
||||
cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id));
|
||||
if (!ocl_platforms) {
|
||||
printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(ocl_platforms)
|
||||
|
||||
// enum platforms
|
||||
err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformIDs() failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (ocl_platform_cnt == 0) {
|
||||
printf("No platforms found, exit\n");
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
// allocate memory to hold info about platforms/devices
|
||||
compute_platform_ctx_t *cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t));
|
||||
if (!cd_ctx) {
|
||||
printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(err);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(cd_ctx)
|
||||
|
||||
cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION };
|
||||
unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info);
|
||||
|
||||
cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS };
|
||||
unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info);
|
||||
|
||||
unsigned int info_idx = 0;
|
||||
size_t tmp_len = 0;
|
||||
char *tmp_buf = NULL;
|
||||
|
||||
unsigned int global_device_id = 0;
|
||||
size_t selected_platforms_cnt = 0;
|
||||
size_t selected_devices_cnt = 0;
|
||||
compute_platform_ctx_t *cd_ctx = NULL;
|
||||
|
||||
if (show) verbose = true;
|
||||
|
||||
if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt);
|
||||
|
||||
for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) {
|
||||
cd_ctx[platform_idx].platform_id = ocl_platforms[platform_idx];
|
||||
cd_ctx[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0);
|
||||
|
||||
if (cd_ctx[platform_idx].selected) selected_platforms_cnt++;
|
||||
|
||||
if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1);
|
||||
|
||||
for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) {
|
||||
cl_platform_info ocl_info = ocl_platforms_info[info_idx];
|
||||
|
||||
err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (tmp_len > 0) {
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(tmp_buf)
|
||||
|
||||
err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformInfo(param) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
} else {
|
||||
tmp_len = 4;
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(tmp_buf)
|
||||
|
||||
strncpy(tmp_buf, "N/A\0", tmp_len);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version";
|
||||
|
||||
printf("%14s: %s\n", tmp_info_desc, tmp_buf);
|
||||
}
|
||||
|
||||
switch (info_idx) {
|
||||
case 0:
|
||||
strncpy(cd_ctx[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
|
||||
break;
|
||||
case 1:
|
||||
strncpy(cd_ctx[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 2:
|
||||
strncpy(cd_ctx[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (info_idx == 1) {
|
||||
// todo: do the same this devices
|
||||
if (!strncmp(tmp_buf, "NVIDIA", 6)) cd_ctx[platform_idx].is_nv = true;
|
||||
else if (!strncmp(tmp_buf, "Apple", 5)) { cd_ctx[platform_idx].is_apple = true; cd_ctx[platform_idx].warning = true; }
|
||||
else if (!strncmp(tmp_buf, "Intel", 5)) cd_ctx[platform_idx].is_intel = true;
|
||||
}
|
||||
|
||||
MEMORY_FREE_DEL(tmp_buf)
|
||||
}
|
||||
|
||||
if (!show && verbose) {
|
||||
printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].selected) ? "yes" : "no");
|
||||
if (cd_ctx[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning");
|
||||
}
|
||||
|
||||
// enum devices with this platform
|
||||
unsigned int ocl_device_cnt = 0;
|
||||
unsigned int ocl_device_max = MAX_OPENCL_DEVICES;
|
||||
|
||||
cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id));
|
||||
if (!ocl_devices) {
|
||||
printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(ocl_devices)
|
||||
|
||||
err = clGetDeviceIDs(cd_ctx[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt);
|
||||
if (ocl_device_cnt == 0) {
|
||||
if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx);
|
||||
cd_ctx[platform_idx].device_cnt = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt);
|
||||
|
||||
cd_ctx[platform_idx].device_cnt = ocl_device_cnt;
|
||||
|
||||
for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) {
|
||||
memset(&cd_ctx[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t));
|
||||
cl_device_id ocl_device = ocl_devices[device_idx];
|
||||
cd_ctx[platform_idx].device[device_idx].platform_id = cd_ctx[platform_idx].platform_id;
|
||||
|
||||
if (verbose) printf("---- * ID: %u\n", global_device_id + 1);
|
||||
|
||||
for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) {
|
||||
cl_device_info ocl_dev_info = ocl_devices_info[info_idx];
|
||||
|
||||
if (info_idx == 0) {
|
||||
cl_device_type device_type;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (device_type & CL_DEVICE_TYPE_GPU) cd_ctx[platform_idx].device[device_idx].is_gpu = 1;
|
||||
|
||||
if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other");
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected);
|
||||
global_device_id++;
|
||||
if (cd_ctx[platform_idx].device[device_idx].selected) selected_devices_cnt++;
|
||||
continue;
|
||||
} else if (info_idx == 5) {
|
||||
cl_device_local_mem_type local_mem_type;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) {
|
||||
if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global");
|
||||
|
||||
if (cd_ctx[platform_idx].is_apple) {
|
||||
if (strncmp(cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
|
||||
} else if (cd_ctx[platform_idx].is_nv) cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
|
||||
/*
|
||||
// swap the 'if' comment for enable local memory with apple gpu's (my Iris crash, abort 6)
|
||||
// if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple && !cd_ctx[platform_idx].device[device_idx].is_gpu))
|
||||
if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple))
|
||||
{
|
||||
cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
|
||||
}
|
||||
*/
|
||||
} else {
|
||||
if (verbose) printf("%14s: None\n", "Local Mem Type");
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %s\n", "Local Mem Opt", (cd_ctx[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no");
|
||||
|
||||
continue;
|
||||
} else if (info_idx == 6) {
|
||||
size_t wis[3] = { 0 };
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]);
|
||||
|
||||
#if APPLE_GPU_BROKEN == 1
|
||||
if (wis[1] < GLOBAL_WS_1 && cd_ctx[platform_idx].device[device_idx].is_apple_gpu) {
|
||||
cd_ctx[platform_idx].device[device_idx].unsupported = true;
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
} else if (info_idx == 7) {
|
||||
cl_uint cores = 0;
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %u\n", "Compute Units", cores);
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].compute_units = cores;
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp_len = 0;
|
||||
tmp_buf = NULL;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (tmp_len > 0) {
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(tmp_buf)
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(param) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
} else {
|
||||
tmp_len = 4;
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(tmp_buf)
|
||||
|
||||
strncpy(tmp_buf, "N/A\0", tmp_len);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor";
|
||||
|
||||
printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf);
|
||||
}
|
||||
|
||||
switch (info_idx) {
|
||||
case 1:
|
||||
strncpy(cd_ctx[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
|
||||
break;
|
||||
case 2:
|
||||
strncpy(cd_ctx[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 3:
|
||||
strncpy(cd_ctx[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 4:
|
||||
strncpy(cd_ctx[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (info_idx == 4) {
|
||||
if (!strncmp(tmp_buf, "Intel", 5) && cd_ctx[platform_idx].is_apple) {
|
||||
// disable hitag2 with apple platform and not apple device vendor (< Apple M1)
|
||||
ctx.force_hitag2_opencl = false;
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].is_apple_gpu = cd_ctx[platform_idx].device[device_idx].is_gpu;
|
||||
}
|
||||
|
||||
if (!strncmp(tmp_buf, "NVIDIA", 6) && cd_ctx[platform_idx].is_nv) {
|
||||
unsigned int sm_maj = 0, sm_min = 0;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0);
|
||||
err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err);
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].sm_maj = sm_maj;
|
||||
cd_ctx[platform_idx].device[device_idx].sm_min = sm_min;
|
||||
|
||||
if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min);
|
||||
|
||||
if (sm_maj >= 5) { // >= Maxwell
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3
|
||||
// Requires sm_50 or higher.
|
||||
cd_ctx[platform_idx].device[device_idx].have_lop3 = true;
|
||||
} else {
|
||||
cd_ctx[platform_idx].device[device_idx].warning = true;
|
||||
}
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].is_nv = true;
|
||||
} else {
|
||||
cd_ctx[platform_idx].device[device_idx].warning = true;
|
||||
}
|
||||
}
|
||||
|
||||
MEMORY_FREE_DEL(tmp_buf)
|
||||
}
|
||||
|
||||
if (!show && verbose) printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].device[device_idx].selected) ? "yes" : "no");
|
||||
|
||||
if (cd_ctx[platform_idx].device[device_idx].unsupported) {
|
||||
printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cd_ctx[platform_idx].device[device_idx].warning) {
|
||||
if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning");
|
||||
}
|
||||
|
||||
cd_ctx[platform_idx].device[device_idx].device_id = ocl_device;
|
||||
}
|
||||
MEMORY_FREE_DEL(ocl_devices)
|
||||
// now discover and set up compute device(s)
|
||||
if ((err = discoverDevices(profile_selected, device_types_selected, &ocl_platform_cnt, &selected_platforms_cnt, &selected_devices_cnt, &cd_ctx, plat_sel, plat_cnt, dev_sel, dev_cnt, verbose, show)) != 0)
|
||||
{
|
||||
printf ("Error: discoverDevices() failed\n");
|
||||
if (err < -5) free (cd_ctx);
|
||||
MEMORY_FREE_ALL
|
||||
exit (2);
|
||||
}
|
||||
MEMORY_FREE_DEL(ocl_platforms)
|
||||
|
||||
// new selection engine, need to support multi-gpu system (with the same platform)
|
||||
|
||||
if (verbose) printf("\n");
|
||||
|
||||
// new selection engine, need to support multi-gpu system (with the same platform)
|
||||
if (show) {
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(cd_ctx)
|
||||
|
||||
if (selected_platforms_cnt == 0) {
|
||||
printf("! No platform was selected ...\n");
|
||||
MEMORY_FREE_ALL
|
||||
|
@ -855,6 +498,11 @@ int main(int argc, char **argv) {
|
|||
for (q = 0; q < cd_ctx[w].device_cnt; q++) {
|
||||
if (!cd_ctx[w].device[q].selected) continue;
|
||||
|
||||
if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
|
||||
// disable hitag2 with apple platform and not apple device vendor (< Apple M1)
|
||||
ctx.force_hitag2_opencl = false;
|
||||
}
|
||||
|
||||
printf("%2zu - %s", z, cd_ctx[w].device[q].name);
|
||||
if (verbose) {
|
||||
printf(" (Lop3 %s, ", (cd_ctx[w].device[q].have_lop3) ? "yes" : "no");
|
||||
|
@ -989,14 +637,6 @@ int main(int argc, char **argv) {
|
|||
|
||||
MEMORY_FREE_ADD(ctx.local_ws)
|
||||
|
||||
if (!(ctx.profiles = (int *) calloc(selected_devices_cnt, sizeof(int)))) {
|
||||
printf("Error: calloc (ctx.profiles) failed (%d): %s\n", errno, strerror(errno));
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(ctx.profiles)
|
||||
|
||||
// show buidlog in case of error
|
||||
// todo: only for device models
|
||||
unsigned int build_errors = 0;
|
||||
|
@ -1121,7 +761,6 @@ int main(int argc, char **argv) {
|
|||
if (err != CL_SUCCESS) {
|
||||
printf("[%zu] Error: clGetProgramBuildInfo failed (%d)\n", z, err);
|
||||
continue;
|
||||
// exit (2);
|
||||
}
|
||||
|
||||
if (len == 0) continue;
|
||||
|
@ -1132,17 +771,13 @@ int main(int argc, char **argv) {
|
|||
if (!buffer) {
|
||||
printf("[%zu] Error: calloc (CL_PROGRAM_BUILD_LOG) failed (%d): %s\n", z, errno, strerror(errno));
|
||||
continue;
|
||||
// exit (2);
|
||||
}
|
||||
|
||||
MEMORY_FREE_ADD(buffer)
|
||||
|
||||
err = clGetProgramBuildInfo(ctx.programs[z], cd_ctx[w].device[q].device_id, CL_PROGRAM_BUILD_LOG, len, buffer, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("[%zu] clGetProgramBuildInfo() failed (%d)\n", z, err);
|
||||
MEMORY_FREE_DEL(buffer)
|
||||
free (buffer);
|
||||
continue;
|
||||
// exit (2);
|
||||
}
|
||||
|
||||
#if DEBUGME > 0
|
||||
|
@ -1152,7 +787,7 @@ int main(int argc, char **argv) {
|
|||
printf("[%zu] Build log (len %zu):\n--------\n%s\n--------\n", z, len, buffer);
|
||||
}
|
||||
|
||||
MEMORY_FREE_DEL(buffer)
|
||||
free (buffer);
|
||||
|
||||
build_logs++;
|
||||
#if DEBUGME == 0
|
||||
|
@ -1196,76 +831,13 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// z is device counter, dolphin counter as well
|
||||
|
||||
// setup, phase 2 (select lower profile)
|
||||
|
||||
int profile = 0xff;
|
||||
|
||||
g = 0;
|
||||
|
||||
for (w = 0; w < ocl_platform_cnt; w++) {
|
||||
if (!cd_ctx[w].selected) continue;
|
||||
|
||||
for (q = 0; q < cd_ctx[w].device_cnt; q++) {
|
||||
if (!cd_ctx[w].device[q].selected) continue;
|
||||
|
||||
ctx.profiles[g] = (int) profile_selected; // start with default
|
||||
|
||||
#if DEBUGME > 1
|
||||
printf("[debug] Initial profile for device %zu: %d\n", z, ctx.profiles[g]);
|
||||
#endif
|
||||
|
||||
// force profile to 0 with Apple GPU's to get it stable, and 1 for CPU
|
||||
if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
|
||||
if (cd_ctx[w].device[q].is_gpu) {
|
||||
if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel GPU's, 2 is the old 0
|
||||
} else {
|
||||
if (profile_selected > 3) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel CPU's, 3 is the old 1
|
||||
}
|
||||
}
|
||||
|
||||
// force profile to 0 with Intel GPU and 2 wih Intel CPU's
|
||||
if (cd_ctx[w].is_intel && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
|
||||
if (cd_ctx[w].device[q].is_gpu) {
|
||||
ctx.profiles[g] = 0; // Intel GPU, work better with a very slow profile
|
||||
} else {
|
||||
if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Intel CPU (2 is the old 0)
|
||||
}
|
||||
}
|
||||
|
||||
// force profile to 2 with NVIDIA GPU's with NVIDIA platform
|
||||
if (cd_ctx[w].is_nv && cd_ctx[w].device[q].is_gpu && !strncmp(cd_ctx[w].device[q].vendor, "NVIDIA", 6)) {
|
||||
if (profile_selected > 10) {
|
||||
// NVIDIA RTX 3090 perform better with 5
|
||||
ctx.profiles[g] = (cd_ctx[w].device[q].sm_maj >= 8) ? 5 : PROFILE_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
// probably unstested hw, set profile to 0
|
||||
if (profile_selected == 0xff) {
|
||||
profile_selected = 0;
|
||||
ctx.profiles[g] = 0;
|
||||
}
|
||||
|
||||
// with same devices will be selected the best
|
||||
// but for different devices in the same platform we need the worst for now (todo)
|
||||
if (ctx.profiles[q] < profile) profile = ctx.profiles[q];
|
||||
}
|
||||
}
|
||||
|
||||
// profile consistency check
|
||||
if (profile < 0 || profile > 10) {
|
||||
printf("! Error: the selected profile is not allowed (%d)\n", profile);
|
||||
MEMORY_FREE_OPENCL(ctx, z)
|
||||
MEMORY_FREE_LIST_Z(matches, z)
|
||||
MEMORY_FREE_LIST_Z(matches_found, z)
|
||||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
unsigned int profile = get_smallest_profile (cd_ctx, ocl_platform_cnt);
|
||||
|
||||
// setup, phase 3 (finis him)
|
||||
|
||||
// z is device counter, dolphin buggy counter as well
|
||||
|
||||
z = 0;
|
||||
|
||||
for (w = 0; w < ocl_platform_cnt; w++) {
|
||||
|
@ -1296,8 +868,7 @@ int main(int argc, char **argv) {
|
|||
MEMORY_FREE_ALL
|
||||
exit(2);
|
||||
}
|
||||
} else {
|
||||
// one
|
||||
} else { // one
|
||||
if (!(matches[z] = (uint64_t *) calloc(1, sizeof(uint64_t)))) {
|
||||
printf("[%zu] Error: calloc (matches) failed (%d): %s\n", z, errno, strerror(errno));
|
||||
MEMORY_FREE_OPENCL(ctx, z)
|
||||
|
@ -1374,7 +945,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates[z], CL_TRUE, 0, sizeof(uint16_t) * ((1 << 20) * 3), candidates, 0, NULL, NULL);
|
||||
// err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL);
|
||||
// err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("[%zu] Error: clEnqueueWriteBuffer(ctx.candidates) failed (%d)\n", z, err);
|
||||
MEMORY_FREE_OPENCL(ctx, z)
|
||||
|
@ -1492,17 +1063,14 @@ int main(int argc, char **argv) {
|
|||
t_arg[z].aR2 = aR2;
|
||||
t_arg[z].nR1 = nR1;
|
||||
t_arg[z].nR2 = nR2;
|
||||
t_arg[z].max_step = max_step;
|
||||
t_arg[z].max_slices = max_step;
|
||||
t_arg[z].ocl_ctx = &ctx;
|
||||
t_arg[z].device_id = z;
|
||||
t_arg[z].async = (ctx.thread_sched_type == THREAD_TYPE_ASYNC);
|
||||
t_arg[z].thread_ctx = &th_ctx;
|
||||
|
||||
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
|
||||
t_arg[z].matches = matches[z];
|
||||
t_arg[z].matches_found = matches_found[z];
|
||||
t_arg[z].status = TH_START;
|
||||
}
|
||||
t_arg[z].r = false;
|
||||
t_arg[z].matches = matches[z];
|
||||
t_arg[z].matches_found = matches_found[z];
|
||||
t_arg[z].status = TH_START;
|
||||
}
|
||||
|
||||
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
|
||||
|
@ -1550,202 +1118,20 @@ int main(int argc, char **argv) {
|
|||
printf("Attack 5 - opencl - start (Max Slices %u, %s order", max_step, wu_queue_strdesc(ctx.queue_ctx.queue_type));
|
||||
|
||||
if (!verbose) printf(")\n\n");
|
||||
else printf(", Profile %d, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no");
|
||||
else printf(", Profile %u, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no");
|
||||
|
||||
if (gettimeofday(&cpu_t_start, NULL) == -1) {
|
||||
printf("! gettimeofday(start) failed (%d): %s\n", errno, strerror(errno));
|
||||
printf("Error: gettimeofday(start) failed (%d): %s\n", errno, strerror(errno));
|
||||
show_overall_time = false;
|
||||
}
|
||||
|
||||
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
|
||||
// crack hitag key or die tryin'
|
||||
unsigned int th_cnt;
|
||||
|
||||
bool done = false;
|
||||
|
||||
do { // master
|
||||
th_cnt = 0;
|
||||
|
||||
for (z = 0; z < thread_count; z++) {
|
||||
#if TDEBUG >= 1 && DEBUGME == 1
|
||||
if (thread_count == 1) { printf("[%zu] get status from slave ...\n", z); fflush(stdout); }
|
||||
#endif
|
||||
|
||||
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
|
||||
thread_status_t cur_status = t_arg[z].status;
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
|
||||
#if TDEBUG >= 1 && DEBUGME == 1
|
||||
if (thread_count == 1) { printf("[%zu] slave status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); }
|
||||
#endif
|
||||
|
||||
if (found) {
|
||||
#if TDEBUG >= 3
|
||||
printf("[%zu] Processing exit logic\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if (cur_status < TH_FOUND_KEY) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] key found from another thread, set quit\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
if (cur_status == TH_WAIT) pthread_cond_signal(&th_ctx.thread_conds[z]);
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
} else {
|
||||
if (thread_count == 1) {
|
||||
th_cnt++;
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, thread_count);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur_status == TH_WAIT) {
|
||||
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
|
||||
|
||||
if (found) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] key is found in another thread 1\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wu_queue_done(&ctx.queue_ctx) != QUEUE_EMPTY) {
|
||||
t_arg[z].status = TH_PROCESSING;
|
||||
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] slave [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
pthread_cond_signal(&th_ctx.thread_conds[z]);
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
continue;
|
||||
} else {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] slave [%zu], max step reached. Quit.\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
cur_status = t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
|
||||
pthread_cond_signal(&th_ctx.thread_conds[z]);
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_status == TH_PROCESSING) {
|
||||
if (th_ctx.enable_condusleep) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] before pthread_cond_wait, TH_PROCESSING\n");
|
||||
#endif
|
||||
pthread_mutex_lock(&th_ctx.thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] slave [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_cond_wait(&th_ctx.thread_cond_usleep, &th_ctx.thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] slave [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if (t_arg[z].status == TH_FOUND_KEY) found = true;
|
||||
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] after pthread_cond_wait, TH_PROCESSING\n");
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] slave [%zu], the key is found. set TH_END from TH_PROCESSING\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_status == TH_ERROR) {
|
||||
// something went wrong
|
||||
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// todo, do more clean exit logic
|
||||
if (cur_status >= TH_FOUND_KEY) {
|
||||
th_cnt++;
|
||||
|
||||
if (cur_status == TH_FOUND_KEY) {
|
||||
thread_setEnd(&th_ctx, t_arg);
|
||||
found = true;
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (th_cnt == thread_count) done = true;
|
||||
|
||||
} while (!done);
|
||||
|
||||
// end of async engine
|
||||
} else if (ctx.thread_sched_type == THREAD_TYPE_SEQ) {
|
||||
uint32_t step = 0;
|
||||
bool quit = false;
|
||||
|
||||
for (step = 0; step < max_step; step += thread_count) {
|
||||
for (z = 0; z < thread_count; z++) {
|
||||
t_arg[z].r = found;
|
||||
t_arg[z].matches = matches[z];
|
||||
t_arg[z].matches_found = matches_found[z];
|
||||
}
|
||||
|
||||
if ((ret = thread_start(&th_ctx, t_arg)) != 0) {
|
||||
printf("Error: thread_start() failed (%d): %s\n", ret, thread_strerror(ret));
|
||||
thread_destroy(&th_ctx);
|
||||
MEMORY_FREE_OPENCL(ctx, z)
|
||||
MEMORY_FREE_LIST_Z(matches, z)
|
||||
MEMORY_FREE_LIST_Z(matches_found, z)
|
||||
MEMORY_FREE_ALL
|
||||
exit(3);
|
||||
}
|
||||
|
||||
// waiting threads return
|
||||
thread_stop(&th_ctx);
|
||||
|
||||
for (z = 0; z < th_ctx.thread_count; z++) {
|
||||
if (t_arg[z].r) found = true;
|
||||
|
||||
if (t_arg[z].err) {
|
||||
error = true;
|
||||
quit = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (found || quit) break;
|
||||
}
|
||||
// Hokuto Hyakuretsu Ken
|
||||
ret = thread_start_scheduler (&th_ctx, t_arg, &ctx.queue_ctx);
|
||||
if (ret < 0) {
|
||||
printf("Error: thread_start_scheduler() failed (%d): %s\n", ret, thread_strerror(ret));
|
||||
error = true;
|
||||
} else if (ret == 0) {
|
||||
found = true;
|
||||
}
|
||||
|
||||
// if found, show the key here
|
||||
|
@ -1755,7 +1141,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
if (thread_count > 1) printf("[%zu] ", y);
|
||||
|
||||
printf("Key found @ slice %lu/%lu: ", t_arg[y].slice, t_arg[y].max_step);
|
||||
printf("Key found @ slice %zu/%zu: ", t_arg[y].slice, t_arg[y].max_slices);
|
||||
for (int i = 0; i < 6; i++) {
|
||||
printf("%02X", (uint8_t)(t_arg[y].key & 0xff));
|
||||
t_arg[y].key = t_arg[y].key >> 8;
|
||||
|
@ -1770,14 +1156,13 @@ int main(int argc, char **argv) {
|
|||
if (gettimeofday(&cpu_t_end, NULL) == 0) {
|
||||
timersub(&cpu_t_end, &cpu_t_start, &cpu_t_result);
|
||||
} else {
|
||||
printf("! gettimeofday(end) failed (%d): %s\n", errno, strerror(errno));
|
||||
printf("Error. gettimeofday(end) failed (%d): %s\n", errno, strerror(errno));
|
||||
show_overall_time = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
printf("\nError. %s\n", (error) ? "something went wrong :(" : "Key not found :|");
|
||||
if (error) exit(-1);
|
||||
}
|
||||
|
||||
printf("\nAttack 5 - opencl - end");
|
||||
|
@ -1792,17 +1177,19 @@ int main(int argc, char **argv) {
|
|||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
thread_stop(&th_ctx);
|
||||
if (!error && th_ctx.type != THREAD_TYPE_SEQ) thread_stop(&th_ctx);
|
||||
|
||||
#if DEBUGME > 1
|
||||
printf("destroy threads\n");
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if ((ret = thread_destroy(&th_ctx)) != 0) {
|
||||
if (!error) {
|
||||
if ((ret = thread_destroy(&th_ctx)) != 0) {
|
||||
#if DEBUGME > 0
|
||||
printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret));
|
||||
printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUGME > 1
|
||||
|
|
|
@ -40,6 +40,452 @@ bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsi
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt)
|
||||
{
|
||||
unsigned int profile = 0xff;
|
||||
|
||||
size_t x = 0, y = 0;
|
||||
|
||||
for (x = 0; x < ocl_platform_cnt; x++) {
|
||||
if (!cd_ctx[x].selected) continue;
|
||||
|
||||
for (y = 0; y < cd_ctx[x].device_cnt; y++) {
|
||||
if (!cd_ctx[x].device[y].selected) continue;
|
||||
|
||||
#if DEBUGME > 1
|
||||
printf("[debug] Initial profile for device %zu: %d\n", z, cd_ctx[x].device[y].profile);
|
||||
#endif
|
||||
|
||||
// with same devices will be selected the best
|
||||
// but for different devices in the same platform we need the worst for now (todo)
|
||||
if (cd_ctx[x].device[y].profile < profile) profile = cd_ctx[x].device[y].profile;
|
||||
}
|
||||
}
|
||||
|
||||
// at worst, set profile to 0
|
||||
if (profile > 10) profile = 0;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *platform_detected_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show)
|
||||
{
|
||||
int err = 0;
|
||||
unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16
|
||||
cl_uint ocl_platform_cnt;
|
||||
|
||||
cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id));
|
||||
if (!ocl_platforms) {
|
||||
printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno));
|
||||
return -2;
|
||||
}
|
||||
|
||||
// enum platforms
|
||||
err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformIDs() failed (%d)\n", err);
|
||||
free (ocl_platforms);
|
||||
return -3;
|
||||
}
|
||||
|
||||
if (ocl_platform_cnt == 0) {
|
||||
printf("No platforms found, exit\n");
|
||||
free (ocl_platforms);
|
||||
return -4;
|
||||
}
|
||||
|
||||
// allocate memory to hold info about platforms/devices
|
||||
*cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t));
|
||||
if (*cd_ctx == NULL) {
|
||||
printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno));
|
||||
free (ocl_platforms);
|
||||
return -5;
|
||||
}
|
||||
|
||||
cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION };
|
||||
unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info);
|
||||
|
||||
cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS };
|
||||
unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info);
|
||||
|
||||
unsigned int info_idx = 0;
|
||||
size_t tmp_len = 0;
|
||||
char *tmp_buf = NULL;
|
||||
|
||||
unsigned int global_device_id = 0;
|
||||
|
||||
if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt);
|
||||
|
||||
for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) {
|
||||
(*cd_ctx)[platform_idx].platform_id = ocl_platforms[platform_idx];
|
||||
(*cd_ctx)[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0);
|
||||
|
||||
if ((*cd_ctx)[platform_idx].selected) (*selected_platforms_cnt)++;
|
||||
|
||||
if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1);
|
||||
|
||||
for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) {
|
||||
cl_platform_info ocl_info = ocl_platforms_info[info_idx];
|
||||
|
||||
err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -6;
|
||||
}
|
||||
|
||||
if (tmp_len > 0) {
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -7;
|
||||
}
|
||||
|
||||
err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetPlatformInfo(param) failed (%d)\n", err);
|
||||
free (tmp_buf);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -8;
|
||||
}
|
||||
} else {
|
||||
tmp_len = 4;
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -7;
|
||||
}
|
||||
|
||||
strncpy(tmp_buf, "N/A\0", tmp_len);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version";
|
||||
|
||||
printf("%14s: %s\n", tmp_info_desc, tmp_buf);
|
||||
}
|
||||
|
||||
switch (info_idx) {
|
||||
case 0:
|
||||
strncpy((*cd_ctx)[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
|
||||
break;
|
||||
case 1:
|
||||
strncpy((*cd_ctx)[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 2:
|
||||
strncpy((*cd_ctx)[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (info_idx == 1) {
|
||||
if (!strncmp(tmp_buf, "NVIDIA", 6)) (*cd_ctx)[platform_idx].is_nv = true;
|
||||
else if (!strncmp(tmp_buf, "Apple", 5)) { (*cd_ctx)[platform_idx].is_apple = true; (*cd_ctx)[platform_idx].warning = true; }
|
||||
else if (!strncmp(tmp_buf, "Intel", 5)) (*cd_ctx)[platform_idx].is_intel = true;
|
||||
else if (!strncmp(tmp_buf, "The pocl project", 16)) (*cd_ctx)[platform_idx].is_pocl = true;
|
||||
}
|
||||
|
||||
free (tmp_buf);
|
||||
}
|
||||
|
||||
if (!show && verbose) {
|
||||
printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].selected) ? "yes" : "no");
|
||||
if ((*cd_ctx)[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning");
|
||||
}
|
||||
|
||||
// enum devices with this platform
|
||||
unsigned int ocl_device_cnt = 0;
|
||||
unsigned int ocl_device_max = MAX_OPENCL_DEVICES;
|
||||
|
||||
cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id));
|
||||
if (!ocl_devices) {
|
||||
printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno));
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -7;
|
||||
}
|
||||
|
||||
err = clGetDeviceIDs((*cd_ctx)[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt);
|
||||
if (ocl_device_cnt == 0) {
|
||||
if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx);
|
||||
(*cd_ctx)[platform_idx].device_cnt = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -9;
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt);
|
||||
|
||||
(*cd_ctx)[platform_idx].device_cnt = ocl_device_cnt;
|
||||
|
||||
for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) {
|
||||
memset(&(*cd_ctx)[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t));
|
||||
cl_device_id ocl_device = ocl_devices[device_idx];
|
||||
(*cd_ctx)[platform_idx].device[device_idx].platform_id = (*cd_ctx)[platform_idx].platform_id;
|
||||
|
||||
if (verbose) printf("---- * ID: %u\n", global_device_id + 1);
|
||||
|
||||
for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) {
|
||||
cl_device_info ocl_dev_info = ocl_devices_info[info_idx];
|
||||
|
||||
if (info_idx == 0) {
|
||||
cl_device_type device_type;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
if (device_type & CL_DEVICE_TYPE_GPU) (*cd_ctx)[platform_idx].device[device_idx].is_gpu = 1;
|
||||
else if ((device_type & CL_DEVICE_TYPE_CPU) && (*cd_ctx)[platform_idx].is_pocl) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected;
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other");
|
||||
|
||||
(*cd_ctx)[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected);
|
||||
global_device_id++;
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].selected) (*selected_devices_cnt)++;
|
||||
continue;
|
||||
} else if (info_idx == 5) {
|
||||
cl_device_local_mem_type local_mem_type;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) {
|
||||
if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global");
|
||||
if ((*cd_ctx)[platform_idx].is_apple) {
|
||||
if (strncmp((*cd_ctx)[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true;
|
||||
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
|
||||
if (profile_selected > 2) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel GPU's
|
||||
} else {
|
||||
if (profile_selected > 3) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel CPU's
|
||||
}
|
||||
}
|
||||
} else if ((*cd_ctx)[platform_idx].is_nv) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true;
|
||||
}
|
||||
} else {
|
||||
if (verbose) printf("%14s: None\n", "Local Mem Type");
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %s\n", "Local Mem Opt", ((*cd_ctx)[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no");
|
||||
|
||||
continue;
|
||||
} else if (info_idx == 6) {
|
||||
size_t wis[3] = { 0 };
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]);
|
||||
|
||||
#if APPLE_GPU_BROKEN == 1
|
||||
if (wis[1] < GLOBAL_WS_1 && (*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].unsupported = true;
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
} else if (info_idx == 7) {
|
||||
cl_uint cores = 0;
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
if (verbose) printf("%14s: %u\n", "Compute Units", cores);
|
||||
|
||||
(*cd_ctx)[platform_idx].device[device_idx].compute_units = cores;
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp_len = 0;
|
||||
tmp_buf = NULL;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
if (tmp_len > 0) {
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -7;
|
||||
}
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(param) failed (%d)\n", err);
|
||||
free (tmp_buf);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
} else {
|
||||
tmp_len = 4;
|
||||
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
|
||||
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -7;
|
||||
}
|
||||
|
||||
strncpy(tmp_buf, "N/A\0", tmp_len);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor";
|
||||
|
||||
printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf);
|
||||
}
|
||||
|
||||
switch (info_idx) {
|
||||
case 1:
|
||||
strncpy((*cd_ctx)[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
|
||||
break;
|
||||
case 2:
|
||||
strncpy((*cd_ctx)[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 3:
|
||||
strncpy((*cd_ctx)[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
case 4:
|
||||
strncpy((*cd_ctx)[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (info_idx == 1) {
|
||||
// force profile to 0-1 with Jetson Nano
|
||||
if (strstr(tmp_buf, "Tegra") && (*cd_ctx)[platform_idx].is_pocl) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected;
|
||||
}
|
||||
} else if (info_idx == 4) {
|
||||
if (!strncmp(tmp_buf, "Intel", 5)) {
|
||||
if ((*cd_ctx)[platform_idx].is_apple) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu = (*cd_ctx)[platform_idx].device[device_idx].is_gpu;
|
||||
}
|
||||
|
||||
// force profile to 0 with Intel GPU and 2 wih Intel CPU's
|
||||
if ((*cd_ctx)[platform_idx].is_intel) {
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].profile = 0; // Intel GPU's, work better with a very slow profile
|
||||
} else {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 2) ? PROFILE_DEFAULT : profile_selected; // Intel CPU's
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!strncmp(tmp_buf, "NVIDIA", 6) && (*cd_ctx)[platform_idx].is_nv) {
|
||||
unsigned int sm_maj = 0, sm_min = 0;
|
||||
|
||||
err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0);
|
||||
err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err);
|
||||
free (tmp_buf);
|
||||
free (ocl_devices);
|
||||
free (*cd_ctx);
|
||||
free (ocl_platforms);
|
||||
return -10;
|
||||
}
|
||||
|
||||
(*cd_ctx)[platform_idx].device[device_idx].sm_maj = sm_maj;
|
||||
(*cd_ctx)[platform_idx].device[device_idx].sm_min = sm_min;
|
||||
|
||||
if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min);
|
||||
|
||||
if (sm_maj >= 5) { // >= Maxwell
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3
|
||||
// Requires sm_50 or higher.
|
||||
(*cd_ctx)[platform_idx].device[device_idx].have_lop3 = true;
|
||||
} else {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].warning = true;
|
||||
}
|
||||
|
||||
(*cd_ctx)[platform_idx].device[device_idx].is_nv = true;
|
||||
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
|
||||
if (profile_selected > 10) {
|
||||
// NVIDIA RTX 3090 perform better with 5
|
||||
(*cd_ctx)[platform_idx].device[device_idx].profile = (sm_maj >= 8) ? 5 : PROFILE_DEFAULT;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(*cd_ctx)[platform_idx].device[device_idx].warning = true;
|
||||
}
|
||||
}
|
||||
|
||||
free (tmp_buf);
|
||||
}
|
||||
|
||||
if (!show && verbose) printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].device[device_idx].selected) ? "yes" : "no");
|
||||
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].unsupported) {
|
||||
printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning");
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((*cd_ctx)[platform_idx].device[device_idx].warning) {
|
||||
if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning");
|
||||
}
|
||||
|
||||
(*cd_ctx)[platform_idx].device[device_idx].device_id = ocl_device;
|
||||
}
|
||||
free (ocl_devices);
|
||||
ocl_devices = NULL;
|
||||
}
|
||||
|
||||
free (ocl_platforms);
|
||||
ocl_platforms = NULL;
|
||||
|
||||
*platform_detected_cnt = ocl_platform_cnt;
|
||||
|
||||
if (show) free (*cd_ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id) {
|
||||
int err = 0;
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ License: GNU General Public License v3 or any later version (see LICENSE.txt)
|
|||
#include <stdbool.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
// max number of concurrent devices (tested up to 4x RTX 3090)
|
||||
#define MAX_OPENCL_DEVICES 16
|
||||
|
@ -54,9 +55,10 @@ typedef struct compute_device_ctx {
|
|||
bool warning, unsupported;
|
||||
|
||||
bool selected;
|
||||
bool enabled;
|
||||
|
||||
unsigned char pad1[4];
|
||||
unsigned char pad1[1];
|
||||
unsigned int profile;
|
||||
|
||||
unsigned int sm_maj;
|
||||
unsigned int sm_min;
|
||||
unsigned int compute_units;
|
||||
|
@ -70,11 +72,11 @@ typedef struct compute_platform_ctx {
|
|||
unsigned int device_cnt;
|
||||
unsigned int compute_units_max;
|
||||
|
||||
bool is_nv, is_apple, is_intel;
|
||||
bool is_nv, is_apple, is_intel, is_pocl;
|
||||
bool warning;
|
||||
bool selected;
|
||||
|
||||
unsigned char pad1[3];
|
||||
unsigned char pad1[2];
|
||||
compute_device_ctx_t device[0x10];
|
||||
|
||||
char name[0xff];
|
||||
|
@ -94,7 +96,7 @@ typedef struct opencl_ctx {
|
|||
|
||||
size_t *global_ws;
|
||||
size_t *local_ws;
|
||||
int *profiles;
|
||||
unsigned int *profiles;
|
||||
|
||||
cl_device_id *device_ids; // compute device id's array
|
||||
cl_context *contexts; // compute contexts
|
||||
|
@ -120,7 +122,8 @@ typedef struct opencl_ctx {
|
|||
} opencl_ctx_t;
|
||||
|
||||
bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsigned int cur_type, unsigned int allow_type);
|
||||
|
||||
unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt);
|
||||
int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *ocl_platform_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show);
|
||||
int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id);
|
||||
|
||||
#endif // OPENCL_H
|
||||
|
|
|
@ -340,7 +340,7 @@ int wu_queue_pop(wu_queue_ctx_t *ctx, wu_queue_data_t *wu, short remove) {
|
|||
break;
|
||||
case QUEUE_TYPE_RANDOM: // from the head
|
||||
#if TEST_UNIT == 1
|
||||
fprintf(stdout, "pop id %ld\n", wu->id);
|
||||
fprintf(stdout, "pop id %zu\n", wu->id);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if (ptrPrev == NULL) {
|
||||
|
@ -441,9 +441,9 @@ int main(void) {
|
|||
|
||||
wu_queue_type_t types[4] = { QUEUE_TYPE_FORWARD, QUEUE_TYPE_REVERSE, QUEUE_TYPE_RANDOM, 1234 };
|
||||
int types_max = (int)(sizeof(types) / sizeof(wu_queue_type_t));
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < types_max; i++) {
|
||||
int ret = 0;
|
||||
printf("[%d] trying wu_queue_init() in %s mode\n", i, wu_queue_strdesc(types[i]));
|
||||
|
||||
if ((ret = wu_queue_init(&ctx, types[i])) != 0) {
|
||||
|
|
|
@ -95,7 +95,7 @@ typedef struct wu_queue_ctx {
|
|||
|
||||
// mutex
|
||||
pthread_mutexattr_t queue_mutex_attr;
|
||||
// unsigned char pad1[4];
|
||||
unsigned char pad1[4];
|
||||
pthread_mutex_t queue_mutex;
|
||||
|
||||
} wu_queue_ctx_t;
|
||||
|
|
|
@ -56,6 +56,8 @@ const char *thread_strerror(int error) {
|
|||
return (const char *) "GENERIC ERROR";
|
||||
case THREAD_ERROR_ALLOC:
|
||||
return (const char *) "ALLOC FAILED";
|
||||
case THREAD_ERROR_INTERNAL:
|
||||
return (const char *) "INTERNAL ERROR";
|
||||
}
|
||||
|
||||
return (const char *) "GENERIC";
|
||||
|
@ -174,6 +176,198 @@ int thread_init(thread_ctx_t *ctx, short type, size_t thread_count) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int thread_start_scheduler (thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx)
|
||||
{
|
||||
size_t z = 0;
|
||||
bool found = false;
|
||||
bool done = false;
|
||||
unsigned int th_cnt = 0;
|
||||
|
||||
if (ctx->type == THREAD_TYPE_SEQ) {
|
||||
bool error = false;
|
||||
uint32_t slice = 0;
|
||||
for (slice = 0; slice < t_arg[0].max_slices; slice += ctx->thread_count) {
|
||||
int err = 0;
|
||||
|
||||
if ((err = thread_start(ctx, t_arg)) != 0) {
|
||||
printf("Error: thread_start() failed (%d): %s\n", err, thread_strerror(err));
|
||||
}
|
||||
|
||||
// waiting threads return
|
||||
if (err == 0) thread_stop(ctx);
|
||||
|
||||
for (z = 0; z < ctx->thread_count; z++) {
|
||||
if (t_arg[z].r) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (t_arg[z].err) {
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
|
||||
// internel err
|
||||
if (error && err == 0) {
|
||||
thread_destroy(ctx);
|
||||
err = THREAD_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
if (err != 0) return err;
|
||||
|
||||
if (found) break;
|
||||
}
|
||||
} else if (ctx->type == THREAD_TYPE_ASYNC) {
|
||||
|
||||
// crack hitag key or die tryin'
|
||||
do { // master
|
||||
th_cnt = 0;
|
||||
|
||||
for (z = 0; z < ctx->thread_count; z++) {
|
||||
#if TDEBUG >= 1 && DEBUGME == 1
|
||||
if (ctx->thread_count == 1) { printf("[%zu] get status from thread ...\n", z); fflush(stdout); }
|
||||
#endif
|
||||
|
||||
pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
thread_status_t cur_status = t_arg[z].status;
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
|
||||
#if TDEBUG >= 1 && DEBUGME == 1
|
||||
if (ctx->thread_count == 1) { printf("[%zu] thread status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); }
|
||||
#endif
|
||||
if (found) {
|
||||
#if TDEBUG >= 3
|
||||
printf("[%zu] Processing exit logic\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if (cur_status < TH_FOUND_KEY) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] key found from another thread, set quit\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
if (cur_status == TH_WAIT) pthread_cond_signal(&ctx->thread_conds[z]);
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
} else {
|
||||
if (ctx->thread_count == 1) {
|
||||
th_cnt++;
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, ctx->thread_count);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur_status == TH_WAIT) {
|
||||
pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
|
||||
if (found) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[%zu] key is found in another thread 1\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wu_queue_done(queue_ctx) != QUEUE_EMPTY) {
|
||||
t_arg[z].status = TH_PROCESSING;
|
||||
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] thread [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
pthread_cond_signal(&ctx->thread_conds[z]);
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
continue;
|
||||
} else {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] thread [%zu], max step reached. Quit.\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
cur_status = t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
|
||||
pthread_cond_signal(&ctx->thread_conds[z]);
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_status == TH_PROCESSING) {
|
||||
if (ctx->enable_condusleep) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] before pthread_cond_wait, TH_PROCESSING\n");
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_lock(&ctx->thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] thread [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_cond_wait(&ctx->thread_cond_usleep, &ctx->thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] thread [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if (t_arg[z].status == TH_FOUND_KEY) found = true;
|
||||
|
||||
pthread_mutex_unlock(&ctx->thread_mutex_usleep);
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] after pthread_cond_wait, TH_PROCESSING\n");
|
||||
fflush(stdout);
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[master] thread [%zu], the key is found. set TH_END from TH_PROCESSING\n", z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (cur_status == TH_ERROR) {
|
||||
// something went wrong
|
||||
pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
t_arg[z].status = TH_END;
|
||||
t_arg[z].quit = true;
|
||||
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur_status >= TH_FOUND_KEY) {
|
||||
th_cnt++;
|
||||
|
||||
if (cur_status == TH_FOUND_KEY) {
|
||||
thread_setEnd(ctx, t_arg);
|
||||
found = true;
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (th_cnt == ctx->thread_count) done = true;
|
||||
|
||||
} while (!done);
|
||||
}
|
||||
|
||||
return (found) ? 0 : 1;
|
||||
}
|
||||
|
||||
int thread_destroy(thread_ctx_t *ctx) {
|
||||
if (!ctx) return -1;
|
||||
if (!ctx->init) return -2;
|
||||
|
@ -258,8 +452,6 @@ const char *thread_status_strdesc(thread_status_t s) {
|
|||
return (const char *) "PROCESSING";
|
||||
case TH_ERROR:
|
||||
return (const char *) "ERROR";
|
||||
case TH_STOP:
|
||||
return (const char *) "STOP";
|
||||
case TH_FOUND_KEY:
|
||||
return (const char *) "FOUND_KEY";
|
||||
case TH_END:
|
||||
|
@ -274,11 +466,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) {
|
|||
|
||||
size_t z;
|
||||
|
||||
int m_ret = 0;
|
||||
int c_ret = 0;
|
||||
|
||||
for (z = 0; z < ctx->thread_count; z++) {
|
||||
m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
int m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]);
|
||||
if (m_ret != 0) {
|
||||
tprintf("[%zu] [%s] Error: pthread_mutex_lock() failed (%d): %s\n", z, __func__, m_ret, strerror(m_ret));
|
||||
}
|
||||
|
@ -296,10 +487,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) {
|
|||
}
|
||||
|
||||
#if DEBUGME > 0
|
||||
tprintf("[%zu] [%s] Set thread status to TH_STOP\n", z, __func__);
|
||||
tprintf("[%zu] [%s] Set thread status to TH_END\n", z, __func__);
|
||||
#endif
|
||||
|
||||
t_arg[z].status = TH_STOP;
|
||||
t_arg[z].status = TH_END;
|
||||
|
||||
if (tmp == TH_WAIT) {
|
||||
#if DEBUGME > 0
|
||||
|
@ -366,7 +557,7 @@ void *computing_process(void *arg) {
|
|||
|
||||
if (!ctx->force_hitag2_opencl) {
|
||||
#if DEBUGME >= 2
|
||||
printf("[slave][%zu] master, I found %5u candidates @ slice %zu\n", z, matches_found[0], a->slice + 1);
|
||||
printf("[%s][%zu] master, I found %5u candidates @ slice %zu\n", __func__, z, matches_found[0], a->slice + 1);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -378,7 +569,7 @@ void *computing_process(void *arg) {
|
|||
// the OpenCL kernel return only one key if found, else nothing
|
||||
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, I found the key @ slice %zu\n", z, a->slice + 1);
|
||||
printf("[%s][%zu] master, I found the key @ slice %zu\n", __func__, z, a->slice + 1);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -400,32 +591,31 @@ void *computing_process_async(void *arg) {
|
|||
// fetching data from thread struct, I hope they are good
|
||||
thread_status_t status = a->status;
|
||||
|
||||
uint64_t *matches = a->matches;
|
||||
uint32_t *matches_found = a->matches_found;
|
||||
uint32_t uid = a->uid;
|
||||
uint32_t aR2 = a->aR2;
|
||||
uint32_t nR1 = a->nR1;
|
||||
uint32_t nR2 = a->nR2;
|
||||
|
||||
uint64_t *matches = a->matches;
|
||||
uint32_t *matches_found = a->matches_found;
|
||||
size_t max_step = a->max_step;
|
||||
size_t max_slices = a->max_slices;
|
||||
|
||||
opencl_ctx_t *ctx = a->ocl_ctx;
|
||||
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
|
||||
uint64_t off = 0;
|
||||
// size_t slice = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (status == TH_START) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status));
|
||||
printf("[%s][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", __func__, z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status));
|
||||
#endif
|
||||
status = TH_WAIT;
|
||||
// proceed to next
|
||||
}
|
||||
|
||||
do { // slave
|
||||
do {
|
||||
if (status == TH_WAIT) {
|
||||
pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]);
|
||||
|
||||
|
@ -433,7 +623,7 @@ void *computing_process_async(void *arg) {
|
|||
|
||||
if (a->status == TH_END) { // other threads found the key
|
||||
fflush(stdout);
|
||||
status = TH_END;
|
||||
//status = TH_END;
|
||||
a->quit = true;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
pthread_exit(NULL);
|
||||
|
@ -444,7 +634,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slate][%zu] after pthread_cond_signal TH_WAIT\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
|
@ -452,7 +642,7 @@ void *computing_process_async(void *arg) {
|
|||
}
|
||||
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", z, thread_status_strdesc(status));
|
||||
printf("[%s][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", __func__, z, thread_status_strdesc(status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -461,7 +651,7 @@ void *computing_process_async(void *arg) {
|
|||
status = a->status; // read new status from master
|
||||
|
||||
#if TDEBUG >= 2
|
||||
printf("[slave][%zu] master, got the signal with new state: %s.\n", z, thread_status_strdesc(status));
|
||||
printf("[%s][%zu] master, got the signal with new state: %s.\n", __func__, z, thread_status_strdesc(status));
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -469,7 +659,7 @@ void *computing_process_async(void *arg) {
|
|||
|
||||
if (status == TH_WAIT) {
|
||||
#if TDEBUG >=1
|
||||
printf("[slave] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n");
|
||||
printf("[%s] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n", __func__);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
break;
|
||||
|
@ -478,7 +668,7 @@ void *computing_process_async(void *arg) {
|
|||
|
||||
if (status == TH_ERROR) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, got error signal, proceed with exit\n", z);
|
||||
printf("[%s][%zu] master, got error signal, proceed with exit\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_exit(NULL);
|
||||
|
@ -486,7 +676,7 @@ void *computing_process_async(void *arg) {
|
|||
|
||||
if (status == TH_PROCESSING) {
|
||||
#if TDEBUG >= 2
|
||||
printf("[slave][%zu] master, got a work-unit, processing ...\n", z);
|
||||
printf("[%s][%zu] master, got a work-unit, processing ...\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -521,7 +711,7 @@ void *computing_process_async(void *arg) {
|
|||
a->status = TH_ERROR;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, something is broken, exit\n", z);
|
||||
printf("[%s][%zu] master, something is broken, exit\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -529,7 +719,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_ERROR\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_ERROR\n", __func__, z);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
}
|
||||
|
@ -539,12 +729,12 @@ void *computing_process_async(void *arg) {
|
|||
}
|
||||
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, process is done but no candidates found\n", z);
|
||||
printf("[%s][%zu] master, process is done but no candidates found\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]);
|
||||
|
||||
if (a->slice >= max_step) a->status = TH_END;
|
||||
if (a->slice >= max_slices) a->status = TH_END;
|
||||
else a->status = TH_WAIT;
|
||||
|
||||
status = a->status;
|
||||
|
@ -555,7 +745,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_WAIT\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
|
@ -566,7 +756,7 @@ void *computing_process_async(void *arg) {
|
|||
|
||||
if (!ctx->force_hitag2_opencl) {
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, we got %5u candidates. Proceed to validation\n", z, matches_found[0]);
|
||||
printf("[%s][%zu] master, we got %5u candidates. Proceed to validation\n", __func__, z, matches_found[0]);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -576,7 +766,7 @@ void *computing_process_async(void *arg) {
|
|||
a->status = TH_END;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, Another thread found the key, quit 2 \n", z);
|
||||
printf("[%s][%zu] master, Another thread found the key, quit 2 \n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -584,7 +774,8 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_END\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z);
|
||||
fflush (stdout);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
}
|
||||
|
@ -600,7 +791,7 @@ void *computing_process_async(void *arg) {
|
|||
a->quit = true;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", z, a->s, a->slice + 1);
|
||||
printf("[%s][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", __func__, z, a->s, a->slice + 1);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -608,7 +799,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
}
|
||||
|
@ -622,7 +813,7 @@ void *computing_process_async(void *arg) {
|
|||
a->status = TH_END;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, Another thread found the key, quit 1 \n", z);
|
||||
printf("[%s][%zu] master, Another thread found the key, quit 1 \n", __func__, z);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -630,7 +821,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_END\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
}
|
||||
|
@ -651,7 +842,7 @@ void *computing_process_async(void *arg) {
|
|||
a->quit = true;
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] master, I found the key at slice %zu\n", z, a->slice + 1);
|
||||
printf("[%s][%zu] master, I found the key at slice %zu\n", __func__, z, a->slice + 1);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
|
@ -659,7 +850,7 @@ void *computing_process_async(void *arg) {
|
|||
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
|
||||
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
|
||||
#if TDEBUG >= 1
|
||||
printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
|
||||
printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z);
|
||||
#endif
|
||||
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
|
||||
}
|
||||
|
@ -671,10 +862,10 @@ void *computing_process_async(void *arg) {
|
|||
if (status >= TH_FOUND_KEY) {
|
||||
#if TDEBUG >= 1
|
||||
if (status == TH_FOUND_KEY) {
|
||||
printf("[slave][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", z);
|
||||
printf("[%s][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", __func__, z);
|
||||
fflush(stdout);
|
||||
} else if (status == TH_END) {
|
||||
printf("[slave][%zu] master, TH_END reached\n", z);
|
||||
printf("[%s][%zu] master, TH_END reached\n", __func__, z);
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,6 @@ typedef enum thread_status {
|
|||
TH_START = 0,
|
||||
TH_WAIT,
|
||||
TH_PROCESSING,
|
||||
TH_STOP,
|
||||
TH_ERROR,
|
||||
TH_FOUND_KEY,
|
||||
TH_END
|
||||
|
@ -66,7 +65,8 @@ typedef enum thread_error {
|
|||
THREAD_ERROR_MUTEX_USLEEP = -11,
|
||||
THREAD_ERROR_COND_USLEEP = -12,
|
||||
THREAD_ERROR_GENERIC = -13,
|
||||
THREAD_ERROR_ALLOC = -14
|
||||
THREAD_ERROR_ALLOC = -14,
|
||||
THREAD_ERROR_INTERNAL = -15
|
||||
|
||||
} thread_error_t;
|
||||
|
||||
|
@ -92,6 +92,7 @@ typedef struct threads_ctx {
|
|||
pthread_attr_t attr;
|
||||
pthread_mutexattr_t mutex_attr;
|
||||
|
||||
unsigned char pad3[4];
|
||||
} thread_ctx_t;
|
||||
|
||||
// used by threads engine
|
||||
|
@ -105,13 +106,13 @@ typedef struct thread_arg {
|
|||
bool r;
|
||||
bool err;
|
||||
bool quit;
|
||||
bool async;
|
||||
|
||||
unsigned char pad2[1];
|
||||
uint64_t off;
|
||||
uint64_t *matches;
|
||||
uint32_t *matches_found;
|
||||
size_t slice;
|
||||
size_t max_step;
|
||||
size_t max_slices;
|
||||
size_t device_id;
|
||||
|
||||
uint64_t key;
|
||||
|
@ -124,11 +125,12 @@ typedef struct thread_arg {
|
|||
int thread_init(thread_ctx_t *ctx, short type, size_t thread_count);
|
||||
int thread_start(thread_ctx_t *ctx, thread_args_t *args);
|
||||
int thread_stop(thread_ctx_t *ctx);
|
||||
int thread_start_scheduler(thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx);
|
||||
bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg);
|
||||
|
||||
void tprintf(const char *restrict format, ...);
|
||||
const char *thread_strerror(int error);
|
||||
const char *thread_status_strdesc(thread_status_t s);
|
||||
bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg);
|
||||
|
||||
void *computing_process(void *arg);
|
||||
void *computing_process_async(void *arg);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue