move thread scheduler threads.c, move opencl engine in opencl.c, minor fixes

This commit is contained in:
Gabriele Gristina 2021-01-16 01:08:30 +01:00
commit 3ecebacdd2
7 changed files with 753 additions and 724 deletions

View file

@ -151,7 +151,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn
unsigned int tmp_sel = (unsigned int) strtoul(next, NULL, 10);
if (errno == EINVAL || errno == ERANGE ||
(tmp_sel < 1 || tmp_sel > 16)) {
printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
return false;
}
@ -165,7 +165,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn
} else {
out[0] = (unsigned int) strtoul(in, NULL, 10);
if (errno == EINVAL || errno == ERANGE) {
printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'");
return false;
}
*out_cnt = 1;
@ -191,7 +191,6 @@ int main(int argc, char **argv) {
unsigned int profile_selected = 2;
unsigned int queue_type = 0;
uint32_t target = 0;
uint32_t **matches_found = NULL;
uint64_t **matches = NULL;
@ -211,7 +210,7 @@ int main(int argc, char **argv) {
// 0: gpu, 1: cpu, 2: all
device_types_selected = (unsigned int) strtoul(optarg, NULL, 10);
if (device_types_selected > 2) {
printf("! Invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n");
printf("Error: invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n");
usage(argv[0]);
}
break;
@ -222,7 +221,7 @@ int main(int argc, char **argv) {
case 'P':
profile_selected = (unsigned int) strtoul(optarg, NULL, 10);
if (profile_selected > 10) {
printf("! Invalid PROFILE argument (accepted valuee: from 0 to 10)\n");
printf("Error: invalid PROFILE argument (accepted valuee: from 0 to 10)\n");
usage(argv[0]);
}
break;
@ -233,7 +232,7 @@ int main(int argc, char **argv) {
// 0: forward, 1: reverse, 2: random
queue_type = (unsigned int) strtoul(optarg, NULL, 10);
if (queue_type != QUEUE_TYPE_FORWARD && queue_type != QUEUE_TYPE_REVERSE && queue_type != QUEUE_TYPE_RANDOM) {
printf("! Invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n");
printf("Error: invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n");
usage(argv[0]);
}
break;
@ -298,13 +297,13 @@ int main(int argc, char **argv) {
printf("Device types selected : %s\n", (device_types_selected == CL_DEVICE_TYPE_GPU) ? "GPU" : (device_types_selected == CL_DEVICE_TYPE_CPU) ? "CPU" : "ALL");
printf("Scheduler selected : %s\n", (thread_scheduler_type_selected == 0) ? "sequential" : "async");
printf("Profile selected : %d\n", profile_selected);
printf("Profile selected : %u\n", profile_selected);
}
if (!show) {
if ((argc - optind) < 5) {
#if DEBUGME > 0
printf("! Invalid extra arguments\n");
printf("Error: invalid extra arguments\n");
#endif
usage(argv[0]);
}
@ -313,41 +312,41 @@ int main(int argc, char **argv) {
switch (e) {
case 0: // UID
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid UID length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid UID length\n"); usage(argv[0]); }
uid = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
} else {
if (strlen(argv[optind]) != 8) { printf("! Invalid UID length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 8) { printf("Error: invalid UID length\n"); usage(argv[0]); }
uid = (uint32_t) rev32(hexreversetoulong(argv[optind]));
}
break;
case 1: // nR1
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); }
nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
} else {
if (strlen(argv[optind]) != 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); }
nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind]));
}
break;
case 2: // aR1
if (strlen(argv[optind]) != 8) { printf("! Invalid aR1 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 8) { printf("Error: invalid aR1 length\n"); usage(argv[0]); }
aR1 = (uint32_t) strtoul(argv[optind], NULL, 16);
break;
case 3: // nR2
if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) {
if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); }
nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2));
} else {
if (strlen(argv[optind]) != 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); }
nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind]));
}
break;
case 4: // aR2
if (strlen(argv[optind]) != 8) { printf("! Invalid aR2 length\n"); usage(argv[0]); }
if (strlen(argv[optind]) != 8) { printf("Error: invalid aR2 length\n"); usage(argv[0]); }
aR2 = (uint32_t) strtoul(argv[optind], NULL, 16);
break;
@ -371,7 +370,7 @@ int main(int argc, char **argv) {
if (!show) {
if (verbose) printf("uid: %u, aR2: %u, nR1: %u, nR2: %u\n", checks[0], checks[1], checks[2], checks[3]);
target = ~aR1;
uint32_t target = ~aR1;
// bitslice inverse target bits
bitslice(~target, keystream);
@ -446,389 +445,33 @@ int main(int argc, char **argv) {
close(fd);
}
// now discover and set up compute device(s)
int err = 0;
cl_uint ocl_platform_cnt = 0;
unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16
cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id));
if (!ocl_platforms) {
printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(ocl_platforms)
// enum platforms
err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformIDs() failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (ocl_platform_cnt == 0) {
printf("No platforms found, exit\n");
MEMORY_FREE_ALL
exit(2);
}
// allocate memory to hold info about platforms/devices
compute_platform_ctx_t *cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t));
if (!cd_ctx) {
printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno));
MEMORY_FREE_ALL
exit(err);
}
MEMORY_FREE_ADD(cd_ctx)
cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION };
unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info);
cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS };
unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info);
unsigned int info_idx = 0;
size_t tmp_len = 0;
char *tmp_buf = NULL;
unsigned int global_device_id = 0;
size_t selected_platforms_cnt = 0;
size_t selected_devices_cnt = 0;
compute_platform_ctx_t *cd_ctx = NULL;
if (show) verbose = true;
if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt);
for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) {
cd_ctx[platform_idx].platform_id = ocl_platforms[platform_idx];
cd_ctx[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0);
if (cd_ctx[platform_idx].selected) selected_platforms_cnt++;
if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1);
for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) {
cl_platform_info ocl_info = ocl_platforms_info[info_idx];
err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (tmp_len > 0) {
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(tmp_buf)
err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformInfo(param) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
} else {
tmp_len = 4;
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(tmp_buf)
strncpy(tmp_buf, "N/A\0", tmp_len);
}
if (verbose) {
const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version";
printf("%14s: %s\n", tmp_info_desc, tmp_buf);
}
switch (info_idx) {
case 0:
strncpy(cd_ctx[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
break;
case 1:
strncpy(cd_ctx[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 2:
strncpy(cd_ctx[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
}
if (info_idx == 1) {
// todo: do the same this devices
if (!strncmp(tmp_buf, "NVIDIA", 6)) cd_ctx[platform_idx].is_nv = true;
else if (!strncmp(tmp_buf, "Apple", 5)) { cd_ctx[platform_idx].is_apple = true; cd_ctx[platform_idx].warning = true; }
else if (!strncmp(tmp_buf, "Intel", 5)) cd_ctx[platform_idx].is_intel = true;
}
MEMORY_FREE_DEL(tmp_buf)
}
if (!show && verbose) {
printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].selected) ? "yes" : "no");
if (cd_ctx[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning");
}
// enum devices with this platform
unsigned int ocl_device_cnt = 0;
unsigned int ocl_device_max = MAX_OPENCL_DEVICES;
cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id));
if (!ocl_devices) {
printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(ocl_devices)
err = clGetDeviceIDs(cd_ctx[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt);
if (ocl_device_cnt == 0) {
if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx);
cd_ctx[platform_idx].device_cnt = 0;
continue;
}
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt);
cd_ctx[platform_idx].device_cnt = ocl_device_cnt;
for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) {
memset(&cd_ctx[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t));
cl_device_id ocl_device = ocl_devices[device_idx];
cd_ctx[platform_idx].device[device_idx].platform_id = cd_ctx[platform_idx].platform_id;
if (verbose) printf("---- * ID: %u\n", global_device_id + 1);
for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) {
cl_device_info ocl_dev_info = ocl_devices_info[info_idx];
if (info_idx == 0) {
cl_device_type device_type;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (device_type & CL_DEVICE_TYPE_GPU) cd_ctx[platform_idx].device[device_idx].is_gpu = 1;
if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other");
cd_ctx[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected);
global_device_id++;
if (cd_ctx[platform_idx].device[device_idx].selected) selected_devices_cnt++;
continue;
} else if (info_idx == 5) {
cl_device_local_mem_type local_mem_type;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) {
if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global");
if (cd_ctx[platform_idx].is_apple) {
if (strncmp(cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
} else if (cd_ctx[platform_idx].is_nv) cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
/*
// swap the 'if' comment for enable local memory with apple gpu's (my Iris crash, abort 6)
// if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple && !cd_ctx[platform_idx].device[device_idx].is_gpu))
if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple))
{
cd_ctx[platform_idx].device[device_idx].have_local_memory = true;
}
*/
} else {
if (verbose) printf("%14s: None\n", "Local Mem Type");
}
if (verbose) printf("%14s: %s\n", "Local Mem Opt", (cd_ctx[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no");
continue;
} else if (info_idx == 6) {
size_t wis[3] = { 0 };
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]);
#if APPLE_GPU_BROKEN == 1
if (wis[1] < GLOBAL_WS_1 && cd_ctx[platform_idx].device[device_idx].is_apple_gpu) {
cd_ctx[platform_idx].device[device_idx].unsupported = true;
}
#endif
continue;
} else if (info_idx == 7) {
cl_uint cores = 0;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (verbose) printf("%14s: %u\n", "Compute Units", cores);
cd_ctx[platform_idx].device[device_idx].compute_units = cores;
continue;
}
tmp_len = 0;
tmp_buf = NULL;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
if (tmp_len > 0) {
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(tmp_buf)
err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(param) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
} else {
tmp_len = 4;
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(tmp_buf)
strncpy(tmp_buf, "N/A\0", tmp_len);
}
if (verbose) {
const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor";
printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf);
}
switch (info_idx) {
case 1:
strncpy(cd_ctx[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
break;
case 2:
strncpy(cd_ctx[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 3:
strncpy(cd_ctx[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 4:
strncpy(cd_ctx[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
}
if (info_idx == 4) {
if (!strncmp(tmp_buf, "Intel", 5) && cd_ctx[platform_idx].is_apple) {
// disable hitag2 with apple platform and not apple device vendor (< Apple M1)
ctx.force_hitag2_opencl = false;
cd_ctx[platform_idx].device[device_idx].is_apple_gpu = cd_ctx[platform_idx].device[device_idx].is_gpu;
}
if (!strncmp(tmp_buf, "NVIDIA", 6) && cd_ctx[platform_idx].is_nv) {
unsigned int sm_maj = 0, sm_min = 0;
err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0);
err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err);
MEMORY_FREE_ALL
exit(2);
}
cd_ctx[platform_idx].device[device_idx].sm_maj = sm_maj;
cd_ctx[platform_idx].device[device_idx].sm_min = sm_min;
if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min);
if (sm_maj >= 5) { // >= Maxwell
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3
// Requires sm_50 or higher.
cd_ctx[platform_idx].device[device_idx].have_lop3 = true;
} else {
cd_ctx[platform_idx].device[device_idx].warning = true;
}
cd_ctx[platform_idx].device[device_idx].is_nv = true;
} else {
cd_ctx[platform_idx].device[device_idx].warning = true;
}
}
MEMORY_FREE_DEL(tmp_buf)
}
if (!show && verbose) printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].device[device_idx].selected) ? "yes" : "no");
if (cd_ctx[platform_idx].device[device_idx].unsupported) {
printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning");
continue;
}
if (cd_ctx[platform_idx].device[device_idx].warning) {
if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning");
}
cd_ctx[platform_idx].device[device_idx].device_id = ocl_device;
}
MEMORY_FREE_DEL(ocl_devices)
// now discover and set up compute device(s)
if ((err = discoverDevices(profile_selected, device_types_selected, &ocl_platform_cnt, &selected_platforms_cnt, &selected_devices_cnt, &cd_ctx, plat_sel, plat_cnt, dev_sel, dev_cnt, verbose, show)) != 0)
{
printf ("Error: discoverDevices() failed\n");
if (err < -5) free (cd_ctx);
MEMORY_FREE_ALL
exit (2);
}
MEMORY_FREE_DEL(ocl_platforms)
// new selection engine, need to support multi-gpu system (with the same platform)
if (verbose) printf("\n");
// new selection engine, need to support multi-gpu system (with the same platform)
if (show) {
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(cd_ctx)
if (selected_platforms_cnt == 0) {
printf("! No platform was selected ...\n");
MEMORY_FREE_ALL
@ -855,6 +498,11 @@ int main(int argc, char **argv) {
for (q = 0; q < cd_ctx[w].device_cnt; q++) {
if (!cd_ctx[w].device[q].selected) continue;
if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
// disable hitag2 with apple platform and not apple device vendor (< Apple M1)
ctx.force_hitag2_opencl = false;
}
printf("%2zu - %s", z, cd_ctx[w].device[q].name);
if (verbose) {
printf(" (Lop3 %s, ", (cd_ctx[w].device[q].have_lop3) ? "yes" : "no");
@ -989,14 +637,6 @@ int main(int argc, char **argv) {
MEMORY_FREE_ADD(ctx.local_ws)
if (!(ctx.profiles = (int *) calloc(selected_devices_cnt, sizeof(int)))) {
printf("Error: calloc (ctx.profiles) failed (%d): %s\n", errno, strerror(errno));
MEMORY_FREE_ALL
exit(2);
}
MEMORY_FREE_ADD(ctx.profiles)
// show buidlog in case of error
// todo: only for device models
unsigned int build_errors = 0;
@ -1121,7 +761,6 @@ int main(int argc, char **argv) {
if (err != CL_SUCCESS) {
printf("[%zu] Error: clGetProgramBuildInfo failed (%d)\n", z, err);
continue;
// exit (2);
}
if (len == 0) continue;
@ -1132,17 +771,13 @@ int main(int argc, char **argv) {
if (!buffer) {
printf("[%zu] Error: calloc (CL_PROGRAM_BUILD_LOG) failed (%d): %s\n", z, errno, strerror(errno));
continue;
// exit (2);
}
MEMORY_FREE_ADD(buffer)
err = clGetProgramBuildInfo(ctx.programs[z], cd_ctx[w].device[q].device_id, CL_PROGRAM_BUILD_LOG, len, buffer, 0);
if (err != CL_SUCCESS) {
printf("[%zu] clGetProgramBuildInfo() failed (%d)\n", z, err);
MEMORY_FREE_DEL(buffer)
free (buffer);
continue;
// exit (2);
}
#if DEBUGME > 0
@ -1152,7 +787,7 @@ int main(int argc, char **argv) {
printf("[%zu] Build log (len %zu):\n--------\n%s\n--------\n", z, len, buffer);
}
MEMORY_FREE_DEL(buffer)
free (buffer);
build_logs++;
#if DEBUGME == 0
@ -1196,76 +831,13 @@ int main(int argc, char **argv) {
}
}
// z is device counter, dolphin counter as well
// setup, phase 2 (select lower profile)
int profile = 0xff;
g = 0;
for (w = 0; w < ocl_platform_cnt; w++) {
if (!cd_ctx[w].selected) continue;
for (q = 0; q < cd_ctx[w].device_cnt; q++) {
if (!cd_ctx[w].device[q].selected) continue;
ctx.profiles[g] = (int) profile_selected; // start with default
#if DEBUGME > 1
printf("[debug] Initial profile for device %zu: %d\n", z, ctx.profiles[g]);
#endif
// force profile to 0 with Apple GPU's to get it stable, and 1 for CPU
if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
if (cd_ctx[w].device[q].is_gpu) {
if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel GPU's, 2 is the old 0
} else {
if (profile_selected > 3) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel CPU's, 3 is the old 1
}
}
// force profile to 0 with Intel GPU and 2 wih Intel CPU's
if (cd_ctx[w].is_intel && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) {
if (cd_ctx[w].device[q].is_gpu) {
ctx.profiles[g] = 0; // Intel GPU, work better with a very slow profile
} else {
if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Intel CPU (2 is the old 0)
}
}
// force profile to 2 with NVIDIA GPU's with NVIDIA platform
if (cd_ctx[w].is_nv && cd_ctx[w].device[q].is_gpu && !strncmp(cd_ctx[w].device[q].vendor, "NVIDIA", 6)) {
if (profile_selected > 10) {
// NVIDIA RTX 3090 perform better with 5
ctx.profiles[g] = (cd_ctx[w].device[q].sm_maj >= 8) ? 5 : PROFILE_DEFAULT;
}
}
// probably unstested hw, set profile to 0
if (profile_selected == 0xff) {
profile_selected = 0;
ctx.profiles[g] = 0;
}
// with same devices will be selected the best
// but for different devices in the same platform we need the worst for now (todo)
if (ctx.profiles[q] < profile) profile = ctx.profiles[q];
}
}
// profile consistency check
if (profile < 0 || profile > 10) {
printf("! Error: the selected profile is not allowed (%d)\n", profile);
MEMORY_FREE_OPENCL(ctx, z)
MEMORY_FREE_LIST_Z(matches, z)
MEMORY_FREE_LIST_Z(matches_found, z)
MEMORY_FREE_ALL
exit(2);
}
unsigned int profile = get_smallest_profile (cd_ctx, ocl_platform_cnt);
// setup, phase 3 (finis him)
// z is device counter, dolphin buggy counter as well
z = 0;
for (w = 0; w < ocl_platform_cnt; w++) {
@ -1296,8 +868,7 @@ int main(int argc, char **argv) {
MEMORY_FREE_ALL
exit(2);
}
} else {
// one
} else { // one
if (!(matches[z] = (uint64_t *) calloc(1, sizeof(uint64_t)))) {
printf("[%zu] Error: calloc (matches) failed (%d): %s\n", z, errno, strerror(errno));
MEMORY_FREE_OPENCL(ctx, z)
@ -1374,7 +945,7 @@ int main(int argc, char **argv) {
}
err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates[z], CL_TRUE, 0, sizeof(uint16_t) * ((1 << 20) * 3), candidates, 0, NULL, NULL);
// err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL);
// err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf("[%zu] Error: clEnqueueWriteBuffer(ctx.candidates) failed (%d)\n", z, err);
MEMORY_FREE_OPENCL(ctx, z)
@ -1492,17 +1063,14 @@ int main(int argc, char **argv) {
t_arg[z].aR2 = aR2;
t_arg[z].nR1 = nR1;
t_arg[z].nR2 = nR2;
t_arg[z].max_step = max_step;
t_arg[z].max_slices = max_step;
t_arg[z].ocl_ctx = &ctx;
t_arg[z].device_id = z;
t_arg[z].async = (ctx.thread_sched_type == THREAD_TYPE_ASYNC);
t_arg[z].thread_ctx = &th_ctx;
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
t_arg[z].matches = matches[z];
t_arg[z].matches_found = matches_found[z];
t_arg[z].status = TH_START;
}
t_arg[z].r = false;
t_arg[z].matches = matches[z];
t_arg[z].matches_found = matches_found[z];
t_arg[z].status = TH_START;
}
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
@ -1550,202 +1118,20 @@ int main(int argc, char **argv) {
printf("Attack 5 - opencl - start (Max Slices %u, %s order", max_step, wu_queue_strdesc(ctx.queue_ctx.queue_type));
if (!verbose) printf(")\n\n");
else printf(", Profile %d, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no");
else printf(", Profile %u, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no");
if (gettimeofday(&cpu_t_start, NULL) == -1) {
printf("! gettimeofday(start) failed (%d): %s\n", errno, strerror(errno));
printf("Error: gettimeofday(start) failed (%d): %s\n", errno, strerror(errno));
show_overall_time = false;
}
if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) {
// crack hitag key or die tryin'
unsigned int th_cnt;
bool done = false;
do { // master
th_cnt = 0;
for (z = 0; z < thread_count; z++) {
#if TDEBUG >= 1 && DEBUGME == 1
if (thread_count == 1) { printf("[%zu] get status from slave ...\n", z); fflush(stdout); }
#endif
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
thread_status_t cur_status = t_arg[z].status;
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
#if TDEBUG >= 1 && DEBUGME == 1
if (thread_count == 1) { printf("[%zu] slave status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); }
#endif
if (found) {
#if TDEBUG >= 3
printf("[%zu] Processing exit logic\n", z);
fflush(stdout);
#endif
if (cur_status < TH_FOUND_KEY) {
#if TDEBUG >= 1
printf("[%zu] key found from another thread, set quit\n", z);
fflush(stdout);
#endif
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
if (cur_status == TH_WAIT) pthread_cond_signal(&th_ctx.thread_conds[z]);
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
} else {
if (thread_count == 1) {
th_cnt++;
#if TDEBUG >= 1
printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, thread_count);
fflush(stdout);
#endif
}
}
continue;
}
if (cur_status == TH_WAIT) {
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
if (found) {
#if TDEBUG >= 1
printf("[%zu] key is found in another thread 1\n", z);
fflush(stdout);
#endif
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
continue;
}
if (wu_queue_done(&ctx.queue_ctx) != QUEUE_EMPTY) {
t_arg[z].status = TH_PROCESSING;
#if TDEBUG >= 1
printf("[master] slave [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
pthread_cond_signal(&th_ctx.thread_conds[z]);
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
continue;
} else {
#if TDEBUG >= 1
printf("[master] slave [%zu], max step reached. Quit.\n", z);
fflush(stdout);
#endif
cur_status = t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_cond_signal(&th_ctx.thread_conds[z]);
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
}
}
if (cur_status == TH_PROCESSING) {
if (th_ctx.enable_condusleep) {
#if TDEBUG >= 1
printf("[master] before pthread_cond_wait, TH_PROCESSING\n");
#endif
pthread_mutex_lock(&th_ctx.thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] slave [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
pthread_cond_wait(&th_ctx.thread_cond_usleep, &th_ctx.thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] slave [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
if (t_arg[z].status == TH_FOUND_KEY) found = true;
pthread_mutex_unlock(&th_ctx.thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] after pthread_cond_wait, TH_PROCESSING\n");
#endif
continue;
}
if (found) {
#if TDEBUG >= 1
printf("[master] slave [%zu], the key is found. set TH_END from TH_PROCESSING\n", z);
fflush(stdout);
#endif
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
continue;
}
}
if (cur_status == TH_ERROR) {
// something went wrong
pthread_mutex_lock(&th_ctx.thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&th_ctx.thread_mutexs[z]);
continue;
}
// todo, do more clean exit logic
if (cur_status >= TH_FOUND_KEY) {
th_cnt++;
if (cur_status == TH_FOUND_KEY) {
thread_setEnd(&th_ctx, t_arg);
found = true;
done = true;
}
}
}
if (th_cnt == thread_count) done = true;
} while (!done);
// end of async engine
} else if (ctx.thread_sched_type == THREAD_TYPE_SEQ) {
uint32_t step = 0;
bool quit = false;
for (step = 0; step < max_step; step += thread_count) {
for (z = 0; z < thread_count; z++) {
t_arg[z].r = found;
t_arg[z].matches = matches[z];
t_arg[z].matches_found = matches_found[z];
}
if ((ret = thread_start(&th_ctx, t_arg)) != 0) {
printf("Error: thread_start() failed (%d): %s\n", ret, thread_strerror(ret));
thread_destroy(&th_ctx);
MEMORY_FREE_OPENCL(ctx, z)
MEMORY_FREE_LIST_Z(matches, z)
MEMORY_FREE_LIST_Z(matches_found, z)
MEMORY_FREE_ALL
exit(3);
}
// waiting threads return
thread_stop(&th_ctx);
for (z = 0; z < th_ctx.thread_count; z++) {
if (t_arg[z].r) found = true;
if (t_arg[z].err) {
error = true;
quit = true;
}
}
if (found || quit) break;
}
// Hokuto Hyakuretsu Ken
ret = thread_start_scheduler (&th_ctx, t_arg, &ctx.queue_ctx);
if (ret < 0) {
printf("Error: thread_start_scheduler() failed (%d): %s\n", ret, thread_strerror(ret));
error = true;
} else if (ret == 0) {
found = true;
}
// if found, show the key here
@ -1755,7 +1141,7 @@ int main(int argc, char **argv) {
if (thread_count > 1) printf("[%zu] ", y);
printf("Key found @ slice %lu/%lu: ", t_arg[y].slice, t_arg[y].max_step);
printf("Key found @ slice %zu/%zu: ", t_arg[y].slice, t_arg[y].max_slices);
for (int i = 0; i < 6; i++) {
printf("%02X", (uint8_t)(t_arg[y].key & 0xff));
t_arg[y].key = t_arg[y].key >> 8;
@ -1770,14 +1156,13 @@ int main(int argc, char **argv) {
if (gettimeofday(&cpu_t_end, NULL) == 0) {
timersub(&cpu_t_end, &cpu_t_start, &cpu_t_result);
} else {
printf("! gettimeofday(end) failed (%d): %s\n", errno, strerror(errno));
printf("Error. gettimeofday(end) failed (%d): %s\n", errno, strerror(errno));
show_overall_time = false;
}
}
if (!found) {
printf("\nError. %s\n", (error) ? "something went wrong :(" : "Key not found :|");
if (error) exit(-1);
}
printf("\nAttack 5 - opencl - end");
@ -1792,17 +1177,19 @@ int main(int argc, char **argv) {
fflush(stdout);
#endif
thread_stop(&th_ctx);
if (!error && th_ctx.type != THREAD_TYPE_SEQ) thread_stop(&th_ctx);
#if DEBUGME > 1
printf("destroy threads\n");
fflush(stdout);
#endif
if ((ret = thread_destroy(&th_ctx)) != 0) {
if (!error) {
if ((ret = thread_destroy(&th_ctx)) != 0) {
#if DEBUGME > 0
printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret));
printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret));
#endif
}
}
#if DEBUGME > 1

View file

@ -40,6 +40,452 @@ bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsi
return false;
}
unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt)
{
unsigned int profile = 0xff;
size_t x = 0, y = 0;
for (x = 0; x < ocl_platform_cnt; x++) {
if (!cd_ctx[x].selected) continue;
for (y = 0; y < cd_ctx[x].device_cnt; y++) {
if (!cd_ctx[x].device[y].selected) continue;
#if DEBUGME > 1
printf("[debug] Initial profile for device %zu: %d\n", z, cd_ctx[x].device[y].profile);
#endif
// with same devices will be selected the best
// but for different devices in the same platform we need the worst for now (todo)
if (cd_ctx[x].device[y].profile < profile) profile = cd_ctx[x].device[y].profile;
}
}
// at worst, set profile to 0
if (profile > 10) profile = 0;
return profile;
}
int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *platform_detected_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show)
{
int err = 0;
unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16
cl_uint ocl_platform_cnt;
cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id));
if (!ocl_platforms) {
printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno));
return -2;
}
// enum platforms
err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformIDs() failed (%d)\n", err);
free (ocl_platforms);
return -3;
}
if (ocl_platform_cnt == 0) {
printf("No platforms found, exit\n");
free (ocl_platforms);
return -4;
}
// allocate memory to hold info about platforms/devices
*cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t));
if (*cd_ctx == NULL) {
printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno));
free (ocl_platforms);
return -5;
}
cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION };
unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info);
cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS };
unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info);
unsigned int info_idx = 0;
size_t tmp_len = 0;
char *tmp_buf = NULL;
unsigned int global_device_id = 0;
if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt);
for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) {
(*cd_ctx)[platform_idx].platform_id = ocl_platforms[platform_idx];
(*cd_ctx)[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0);
if ((*cd_ctx)[platform_idx].selected) (*selected_platforms_cnt)++;
if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1);
for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) {
cl_platform_info ocl_info = ocl_platforms_info[info_idx];
err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err);
free (*cd_ctx);
free (ocl_platforms);
return -6;
}
if (tmp_len > 0) {
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
free (*cd_ctx);
free (ocl_platforms);
return -7;
}
err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformInfo(param) failed (%d)\n", err);
free (tmp_buf);
free (*cd_ctx);
free (ocl_platforms);
return -8;
}
} else {
tmp_len = 4;
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
free (*cd_ctx);
free (ocl_platforms);
return -7;
}
strncpy(tmp_buf, "N/A\0", tmp_len);
}
if (verbose) {
const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version";
printf("%14s: %s\n", tmp_info_desc, tmp_buf);
}
switch (info_idx) {
case 0:
strncpy((*cd_ctx)[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
break;
case 1:
strncpy((*cd_ctx)[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 2:
strncpy((*cd_ctx)[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
}
if (info_idx == 1) {
if (!strncmp(tmp_buf, "NVIDIA", 6)) (*cd_ctx)[platform_idx].is_nv = true;
else if (!strncmp(tmp_buf, "Apple", 5)) { (*cd_ctx)[platform_idx].is_apple = true; (*cd_ctx)[platform_idx].warning = true; }
else if (!strncmp(tmp_buf, "Intel", 5)) (*cd_ctx)[platform_idx].is_intel = true;
else if (!strncmp(tmp_buf, "The pocl project", 16)) (*cd_ctx)[platform_idx].is_pocl = true;
}
free (tmp_buf);
}
if (!show && verbose) {
printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].selected) ? "yes" : "no");
if ((*cd_ctx)[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning");
}
// enum devices with this platform
unsigned int ocl_device_cnt = 0;
unsigned int ocl_device_max = MAX_OPENCL_DEVICES;
cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id));
if (!ocl_devices) {
printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno));
free (*cd_ctx);
free (ocl_platforms);
return -7;
}
err = clGetDeviceIDs((*cd_ctx)[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt);
if (ocl_device_cnt == 0) {
if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx);
(*cd_ctx)[platform_idx].device_cnt = 0;
continue;
}
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -9;
}
if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt);
(*cd_ctx)[platform_idx].device_cnt = ocl_device_cnt;
for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) {
memset(&(*cd_ctx)[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t));
cl_device_id ocl_device = ocl_devices[device_idx];
(*cd_ctx)[platform_idx].device[device_idx].platform_id = (*cd_ctx)[platform_idx].platform_id;
if (verbose) printf("---- * ID: %u\n", global_device_id + 1);
for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) {
cl_device_info ocl_dev_info = ocl_devices_info[info_idx];
if (info_idx == 0) {
cl_device_type device_type;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
if (device_type & CL_DEVICE_TYPE_GPU) (*cd_ctx)[platform_idx].device[device_idx].is_gpu = 1;
else if ((device_type & CL_DEVICE_TYPE_CPU) && (*cd_ctx)[platform_idx].is_pocl) {
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected;
}
if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other");
(*cd_ctx)[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected);
global_device_id++;
if ((*cd_ctx)[platform_idx].device[device_idx].selected) (*selected_devices_cnt)++;
continue;
} else if (info_idx == 5) {
cl_device_local_mem_type local_mem_type;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) {
if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global");
if ((*cd_ctx)[platform_idx].is_apple) {
if (strncmp((*cd_ctx)[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) {
(*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true;
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
if (profile_selected > 2) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel GPU's
} else {
if (profile_selected > 3) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel CPU's
}
}
} else if ((*cd_ctx)[platform_idx].is_nv) {
(*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true;
}
} else {
if (verbose) printf("%14s: None\n", "Local Mem Type");
}
if (verbose) printf("%14s: %s\n", "Local Mem Opt", ((*cd_ctx)[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no");
continue;
} else if (info_idx == 6) {
size_t wis[3] = { 0 };
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]);
#if APPLE_GPU_BROKEN == 1
if (wis[1] < GLOBAL_WS_1 && (*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu) {
(*cd_ctx)[platform_idx].device[device_idx].unsupported = true;
}
#endif
continue;
} else if (info_idx == 7) {
cl_uint cores = 0;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
if (verbose) printf("%14s: %u\n", "Compute Units", cores);
(*cd_ctx)[platform_idx].device[device_idx].compute_units = cores;
continue;
}
tmp_len = 0;
tmp_buf = NULL;
err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
if (tmp_len > 0) {
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -7;
}
err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(param) failed (%d)\n", err);
free (tmp_buf);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
} else {
tmp_len = 4;
if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) {
printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno));
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -7;
}
strncpy(tmp_buf, "N/A\0", tmp_len);
}
if (verbose) {
const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor";
printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf);
}
switch (info_idx) {
case 1:
strncpy((*cd_ctx)[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1);
break;
case 2:
strncpy((*cd_ctx)[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 3:
strncpy((*cd_ctx)[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
case 4:
strncpy((*cd_ctx)[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1);
break;
}
if (info_idx == 1) {
// force profile to 0-1 with Jetson Nano
if (strstr(tmp_buf, "Tegra") && (*cd_ctx)[platform_idx].is_pocl) {
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected;
}
} else if (info_idx == 4) {
if (!strncmp(tmp_buf, "Intel", 5)) {
if ((*cd_ctx)[platform_idx].is_apple) {
(*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu = (*cd_ctx)[platform_idx].device[device_idx].is_gpu;
}
// force profile to 0 with Intel GPU and 2 wih Intel CPU's
if ((*cd_ctx)[platform_idx].is_intel) {
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
(*cd_ctx)[platform_idx].device[device_idx].profile = 0; // Intel GPU's, work better with a very slow profile
} else {
(*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 2) ? PROFILE_DEFAULT : profile_selected; // Intel CPU's
}
}
}
if (!strncmp(tmp_buf, "NVIDIA", 6) && (*cd_ctx)[platform_idx].is_nv) {
unsigned int sm_maj = 0, sm_min = 0;
err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0);
err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0);
if (err != CL_SUCCESS) {
printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err);
free (tmp_buf);
free (ocl_devices);
free (*cd_ctx);
free (ocl_platforms);
return -10;
}
(*cd_ctx)[platform_idx].device[device_idx].sm_maj = sm_maj;
(*cd_ctx)[platform_idx].device[device_idx].sm_min = sm_min;
if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min);
if (sm_maj >= 5) { // >= Maxwell
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3
// Requires sm_50 or higher.
(*cd_ctx)[platform_idx].device[device_idx].have_lop3 = true;
} else {
(*cd_ctx)[platform_idx].device[device_idx].warning = true;
}
(*cd_ctx)[platform_idx].device[device_idx].is_nv = true;
if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) {
if (profile_selected > 10) {
// NVIDIA RTX 3090 perform better with 5
(*cd_ctx)[platform_idx].device[device_idx].profile = (sm_maj >= 8) ? 5 : PROFILE_DEFAULT;
}
}
} else {
(*cd_ctx)[platform_idx].device[device_idx].warning = true;
}
}
free (tmp_buf);
}
if (!show && verbose) printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].device[device_idx].selected) ? "yes" : "no");
if ((*cd_ctx)[platform_idx].device[device_idx].unsupported) {
printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning");
continue;
}
if ((*cd_ctx)[platform_idx].device[device_idx].warning) {
if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning");
}
(*cd_ctx)[platform_idx].device[device_idx].device_id = ocl_device;
}
free (ocl_devices);
ocl_devices = NULL;
}
free (ocl_platforms);
ocl_platforms = NULL;
*platform_detected_cnt = ocl_platform_cnt;
if (show) free (*cd_ctx);
return 0;
}
int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id) {
int err = 0;

View file

@ -38,6 +38,7 @@ License: GNU General Public License v3 or any later version (see LICENSE.txt)
#include <stdbool.h>
#include <stdio.h>
#include <errno.h>
// max number of concurrent devices (tested up to 4x RTX 3090)
#define MAX_OPENCL_DEVICES 16
@ -54,9 +55,10 @@ typedef struct compute_device_ctx {
bool warning, unsupported;
bool selected;
bool enabled;
unsigned char pad1[4];
unsigned char pad1[1];
unsigned int profile;
unsigned int sm_maj;
unsigned int sm_min;
unsigned int compute_units;
@ -70,11 +72,11 @@ typedef struct compute_platform_ctx {
unsigned int device_cnt;
unsigned int compute_units_max;
bool is_nv, is_apple, is_intel;
bool is_nv, is_apple, is_intel, is_pocl;
bool warning;
bool selected;
unsigned char pad1[3];
unsigned char pad1[2];
compute_device_ctx_t device[0x10];
char name[0xff];
@ -94,7 +96,7 @@ typedef struct opencl_ctx {
size_t *global_ws;
size_t *local_ws;
int *profiles;
unsigned int *profiles;
cl_device_id *device_ids; // compute device id's array
cl_context *contexts; // compute contexts
@ -120,7 +122,8 @@ typedef struct opencl_ctx {
} opencl_ctx_t;
bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsigned int cur_type, unsigned int allow_type);
unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt);
int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *ocl_platform_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show);
int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id);
#endif // OPENCL_H

View file

@ -340,7 +340,7 @@ int wu_queue_pop(wu_queue_ctx_t *ctx, wu_queue_data_t *wu, short remove) {
break;
case QUEUE_TYPE_RANDOM: // from the head
#if TEST_UNIT == 1
fprintf(stdout, "pop id %ld\n", wu->id);
fprintf(stdout, "pop id %zu\n", wu->id);
fflush(stdout);
#endif
if (ptrPrev == NULL) {
@ -441,9 +441,9 @@ int main(void) {
wu_queue_type_t types[4] = { QUEUE_TYPE_FORWARD, QUEUE_TYPE_REVERSE, QUEUE_TYPE_RANDOM, 1234 };
int types_max = (int)(sizeof(types) / sizeof(wu_queue_type_t));
int ret = 0;
for (i = 0; i < types_max; i++) {
int ret = 0;
printf("[%d] trying wu_queue_init() in %s mode\n", i, wu_queue_strdesc(types[i]));
if ((ret = wu_queue_init(&ctx, types[i])) != 0) {

View file

@ -95,7 +95,7 @@ typedef struct wu_queue_ctx {
// mutex
pthread_mutexattr_t queue_mutex_attr;
// unsigned char pad1[4];
unsigned char pad1[4];
pthread_mutex_t queue_mutex;
} wu_queue_ctx_t;

View file

@ -56,6 +56,8 @@ const char *thread_strerror(int error) {
return (const char *) "GENERIC ERROR";
case THREAD_ERROR_ALLOC:
return (const char *) "ALLOC FAILED";
case THREAD_ERROR_INTERNAL:
return (const char *) "INTERNAL ERROR";
}
return (const char *) "GENERIC";
@ -174,6 +176,198 @@ int thread_init(thread_ctx_t *ctx, short type, size_t thread_count) {
return 0;
}
int thread_start_scheduler (thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx)
{
size_t z = 0;
bool found = false;
bool done = false;
unsigned int th_cnt = 0;
if (ctx->type == THREAD_TYPE_SEQ) {
bool error = false;
uint32_t slice = 0;
for (slice = 0; slice < t_arg[0].max_slices; slice += ctx->thread_count) {
int err = 0;
if ((err = thread_start(ctx, t_arg)) != 0) {
printf("Error: thread_start() failed (%d): %s\n", err, thread_strerror(err));
}
// waiting threads return
if (err == 0) thread_stop(ctx);
for (z = 0; z < ctx->thread_count; z++) {
if (t_arg[z].r) {
found = true;
break;
}
if (t_arg[z].err) {
error = true;
}
}
// internel err
if (error && err == 0) {
thread_destroy(ctx);
err = THREAD_ERROR_INTERNAL;
}
if (err != 0) return err;
if (found) break;
}
} else if (ctx->type == THREAD_TYPE_ASYNC) {
// crack hitag key or die tryin'
do { // master
th_cnt = 0;
for (z = 0; z < ctx->thread_count; z++) {
#if TDEBUG >= 1 && DEBUGME == 1
if (ctx->thread_count == 1) { printf("[%zu] get status from thread ...\n", z); fflush(stdout); }
#endif
pthread_mutex_lock(&ctx->thread_mutexs[z]);
thread_status_t cur_status = t_arg[z].status;
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
#if TDEBUG >= 1 && DEBUGME == 1
if (ctx->thread_count == 1) { printf("[%zu] thread status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); }
#endif
if (found) {
#if TDEBUG >= 3
printf("[%zu] Processing exit logic\n", z);
fflush(stdout);
#endif
if (cur_status < TH_FOUND_KEY) {
#if TDEBUG >= 1
printf("[%zu] key found from another thread, set quit\n", z);
fflush(stdout);
#endif
pthread_mutex_lock(&ctx->thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
if (cur_status == TH_WAIT) pthread_cond_signal(&ctx->thread_conds[z]);
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
} else {
if (ctx->thread_count == 1) {
th_cnt++;
#if TDEBUG >= 1
printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, ctx->thread_count);
fflush(stdout);
#endif
}
}
continue;
}
if (cur_status == TH_WAIT) {
pthread_mutex_lock(&ctx->thread_mutexs[z]);
if (found) {
#if TDEBUG >= 1
printf("[%zu] key is found in another thread 1\n", z);
fflush(stdout);
#endif
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
continue;
}
if (wu_queue_done(queue_ctx) != QUEUE_EMPTY) {
t_arg[z].status = TH_PROCESSING;
#if TDEBUG >= 1
printf("[master] thread [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
pthread_cond_signal(&ctx->thread_conds[z]);
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
continue;
} else {
#if TDEBUG >= 1
printf("[master] thread [%zu], max step reached. Quit.\n", z);
fflush(stdout);
#endif
cur_status = t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_cond_signal(&ctx->thread_conds[z]);
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
}
}
if (cur_status == TH_PROCESSING) {
if (ctx->enable_condusleep) {
#if TDEBUG >= 1
printf("[master] before pthread_cond_wait, TH_PROCESSING\n");
fflush(stdout);
#endif
pthread_mutex_lock(&ctx->thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] thread [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
pthread_cond_wait(&ctx->thread_cond_usleep, &ctx->thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] thread [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status));
fflush(stdout);
#endif
if (t_arg[z].status == TH_FOUND_KEY) found = true;
pthread_mutex_unlock(&ctx->thread_mutex_usleep);
#if TDEBUG >= 1
printf("[master] after pthread_cond_wait, TH_PROCESSING\n");
fflush(stdout);
#endif
continue;
}
if (found) {
#if TDEBUG >= 1
printf("[master] thread [%zu], the key is found. set TH_END from TH_PROCESSING\n", z);
fflush(stdout);
#endif
pthread_mutex_lock(&ctx->thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
continue;
}
}
if (cur_status == TH_ERROR) {
// something went wrong
pthread_mutex_lock(&ctx->thread_mutexs[z]);
t_arg[z].status = TH_END;
t_arg[z].quit = true;
pthread_mutex_unlock(&ctx->thread_mutexs[z]);
continue;
}
if (cur_status >= TH_FOUND_KEY) {
th_cnt++;
if (cur_status == TH_FOUND_KEY) {
thread_setEnd(ctx, t_arg);
found = true;
done = true;
}
}
}
if (th_cnt == ctx->thread_count) done = true;
} while (!done);
}
return (found) ? 0 : 1;
}
int thread_destroy(thread_ctx_t *ctx) {
if (!ctx) return -1;
if (!ctx->init) return -2;
@ -258,8 +452,6 @@ const char *thread_status_strdesc(thread_status_t s) {
return (const char *) "PROCESSING";
case TH_ERROR:
return (const char *) "ERROR";
case TH_STOP:
return (const char *) "STOP";
case TH_FOUND_KEY:
return (const char *) "FOUND_KEY";
case TH_END:
@ -274,11 +466,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) {
size_t z;
int m_ret = 0;
int c_ret = 0;
for (z = 0; z < ctx->thread_count; z++) {
m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]);
int m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]);
if (m_ret != 0) {
tprintf("[%zu] [%s] Error: pthread_mutex_lock() failed (%d): %s\n", z, __func__, m_ret, strerror(m_ret));
}
@ -296,10 +487,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) {
}
#if DEBUGME > 0
tprintf("[%zu] [%s] Set thread status to TH_STOP\n", z, __func__);
tprintf("[%zu] [%s] Set thread status to TH_END\n", z, __func__);
#endif
t_arg[z].status = TH_STOP;
t_arg[z].status = TH_END;
if (tmp == TH_WAIT) {
#if DEBUGME > 0
@ -366,7 +557,7 @@ void *computing_process(void *arg) {
if (!ctx->force_hitag2_opencl) {
#if DEBUGME >= 2
printf("[slave][%zu] master, I found %5u candidates @ slice %zu\n", z, matches_found[0], a->slice + 1);
printf("[%s][%zu] master, I found %5u candidates @ slice %zu\n", __func__, z, matches_found[0], a->slice + 1);
fflush(stdout);
#endif
@ -378,7 +569,7 @@ void *computing_process(void *arg) {
// the OpenCL kernel return only one key if found, else nothing
#if TDEBUG >= 1
printf("[slave][%zu] master, I found the key @ slice %zu\n", z, a->slice + 1);
printf("[%s][%zu] master, I found the key @ slice %zu\n", __func__, z, a->slice + 1);
fflush(stdout);
#endif
@ -400,32 +591,31 @@ void *computing_process_async(void *arg) {
// fetching data from thread struct, I hope they are good
thread_status_t status = a->status;
uint64_t *matches = a->matches;
uint32_t *matches_found = a->matches_found;
uint32_t uid = a->uid;
uint32_t aR2 = a->aR2;
uint32_t nR1 = a->nR1;
uint32_t nR2 = a->nR2;
uint64_t *matches = a->matches;
uint32_t *matches_found = a->matches_found;
size_t max_step = a->max_step;
size_t max_slices = a->max_slices;
opencl_ctx_t *ctx = a->ocl_ctx;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
uint64_t off = 0;
// size_t slice = 0;
int ret = 0;
if (status == TH_START) {
#if TDEBUG >= 1
printf("[slave][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status));
printf("[%s][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", __func__, z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status));
#endif
status = TH_WAIT;
// proceed to next
}
do { // slave
do {
if (status == TH_WAIT) {
pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]);
@ -433,7 +623,7 @@ void *computing_process_async(void *arg) {
if (a->status == TH_END) { // other threads found the key
fflush(stdout);
status = TH_END;
//status = TH_END;
a->quit = true;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
pthread_exit(NULL);
@ -444,7 +634,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slate][%zu] after pthread_cond_signal TH_WAIT\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z);
fflush(stdout);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
@ -452,7 +642,7 @@ void *computing_process_async(void *arg) {
}
#if TDEBUG >= 1
printf("[slave][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", z, thread_status_strdesc(status));
printf("[%s][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", __func__, z, thread_status_strdesc(status));
fflush(stdout);
#endif
@ -461,7 +651,7 @@ void *computing_process_async(void *arg) {
status = a->status; // read new status from master
#if TDEBUG >= 2
printf("[slave][%zu] master, got the signal with new state: %s.\n", z, thread_status_strdesc(status));
printf("[%s][%zu] master, got the signal with new state: %s.\n", __func__, z, thread_status_strdesc(status));
fflush(stdout);
#endif
@ -469,7 +659,7 @@ void *computing_process_async(void *arg) {
if (status == TH_WAIT) {
#if TDEBUG >=1
printf("[slave] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n");
printf("[%s] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n", __func__);
fflush(stdout);
#endif
break;
@ -478,7 +668,7 @@ void *computing_process_async(void *arg) {
if (status == TH_ERROR) {
#if TDEBUG >= 1
printf("[slave][%zu] master, got error signal, proceed with exit\n", z);
printf("[%s][%zu] master, got error signal, proceed with exit\n", __func__, z);
fflush(stdout);
#endif
pthread_exit(NULL);
@ -486,7 +676,7 @@ void *computing_process_async(void *arg) {
if (status == TH_PROCESSING) {
#if TDEBUG >= 2
printf("[slave][%zu] master, got a work-unit, processing ...\n", z);
printf("[%s][%zu] master, got a work-unit, processing ...\n", __func__, z);
fflush(stdout);
#endif
@ -521,7 +711,7 @@ void *computing_process_async(void *arg) {
a->status = TH_ERROR;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf("[slave][%zu] master, something is broken, exit\n", z);
printf("[%s][%zu] master, something is broken, exit\n", __func__, z);
fflush(stdout);
#endif
@ -529,7 +719,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_ERROR\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_ERROR\n", __func__, z);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
}
@ -539,12 +729,12 @@ void *computing_process_async(void *arg) {
}
#if TDEBUG >= 1
printf("[slave][%zu] master, process is done but no candidates found\n", z);
printf("[%s][%zu] master, process is done but no candidates found\n", __func__, z);
fflush(stdout);
#endif
pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]);
if (a->slice >= max_step) a->status = TH_END;
if (a->slice >= max_slices) a->status = TH_END;
else a->status = TH_WAIT;
status = a->status;
@ -555,7 +745,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_WAIT\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z);
fflush(stdout);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
@ -566,7 +756,7 @@ void *computing_process_async(void *arg) {
if (!ctx->force_hitag2_opencl) {
#if TDEBUG >= 1
printf("[slave][%zu] master, we got %5u candidates. Proceed to validation\n", z, matches_found[0]);
printf("[%s][%zu] master, we got %5u candidates. Proceed to validation\n", __func__, z, matches_found[0]);
fflush(stdout);
#endif
@ -576,7 +766,7 @@ void *computing_process_async(void *arg) {
a->status = TH_END;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf("[slave][%zu] master, Another thread found the key, quit 2 \n", z);
printf("[%s][%zu] master, Another thread found the key, quit 2 \n", __func__, z);
fflush(stdout);
#endif
@ -584,7 +774,8 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_END\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z);
fflush (stdout);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
}
@ -600,7 +791,7 @@ void *computing_process_async(void *arg) {
a->quit = true;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf("[slave][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", z, a->s, a->slice + 1);
printf("[%s][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", __func__, z, a->s, a->slice + 1);
fflush(stdout);
#endif
@ -608,7 +799,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
}
@ -622,7 +813,7 @@ void *computing_process_async(void *arg) {
a->status = TH_END;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf("[slave][%zu] master, Another thread found the key, quit 1 \n", z);
printf("[%s][%zu] master, Another thread found the key, quit 1 \n", __func__, z);
fflush(stdout);
#endif
@ -630,7 +821,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_END\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
}
@ -651,7 +842,7 @@ void *computing_process_async(void *arg) {
a->quit = true;
pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf("[slave][%zu] master, I found the key at slice %zu\n", z, a->slice + 1);
printf("[%s][%zu] master, I found the key at slice %zu\n", __func__, z, a->slice + 1);
fflush(stdout);
#endif
@ -659,7 +850,7 @@ void *computing_process_async(void *arg) {
pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z);
#endif
pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep);
}
@ -671,10 +862,10 @@ void *computing_process_async(void *arg) {
if (status >= TH_FOUND_KEY) {
#if TDEBUG >= 1
if (status == TH_FOUND_KEY) {
printf("[slave][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", z);
printf("[%s][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", __func__, z);
fflush(stdout);
} else if (status == TH_END) {
printf("[slave][%zu] master, TH_END reached\n", z);
printf("[%s][%zu] master, TH_END reached\n", __func__, z);
fflush(stdout);
}
#endif

View file

@ -38,7 +38,6 @@ typedef enum thread_status {
TH_START = 0,
TH_WAIT,
TH_PROCESSING,
TH_STOP,
TH_ERROR,
TH_FOUND_KEY,
TH_END
@ -66,7 +65,8 @@ typedef enum thread_error {
THREAD_ERROR_MUTEX_USLEEP = -11,
THREAD_ERROR_COND_USLEEP = -12,
THREAD_ERROR_GENERIC = -13,
THREAD_ERROR_ALLOC = -14
THREAD_ERROR_ALLOC = -14,
THREAD_ERROR_INTERNAL = -15
} thread_error_t;
@ -92,6 +92,7 @@ typedef struct threads_ctx {
pthread_attr_t attr;
pthread_mutexattr_t mutex_attr;
unsigned char pad3[4];
} thread_ctx_t;
// used by threads engine
@ -105,13 +106,13 @@ typedef struct thread_arg {
bool r;
bool err;
bool quit;
bool async;
unsigned char pad2[1];
uint64_t off;
uint64_t *matches;
uint32_t *matches_found;
size_t slice;
size_t max_step;
size_t max_slices;
size_t device_id;
uint64_t key;
@ -124,11 +125,12 @@ typedef struct thread_arg {
int thread_init(thread_ctx_t *ctx, short type, size_t thread_count);
int thread_start(thread_ctx_t *ctx, thread_args_t *args);
int thread_stop(thread_ctx_t *ctx);
int thread_start_scheduler(thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx);
bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg);
void tprintf(const char *restrict format, ...);
const char *thread_strerror(int error);
const char *thread_status_strdesc(thread_status_t s);
bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg);
void *computing_process(void *arg);
void *computing_process_async(void *arg);