add crack5opencl

This commit is contained in:
Gabriele Gristina 2021-01-13 23:37:10 +01:00
parent 2646925adf
commit 6403ef679a
40 changed files with 14103 additions and 5 deletions

@ -5,7 +5,7 @@ include ../../Makefile.defs
all clean install uninstall check: %: crack2/% crack3/% crack4/% crack5/%
ifneq ($(SKIPGPU),1)
all clean install uninstall check: %: crack5gpu/%
all clean install uninstall check: %: crack5gpu/% crack5opencl/%
endif
crack2/%: FORCE
@ -28,6 +28,10 @@ crack5gpu/%: FORCE
$(info [*] MAKE $@)
$(Q)$(MAKE) --no-print-directory -C crack5gpu $(patsubst crack5gpu/%,%,$@) DESTDIR=$(MYDESTDIR)
crack5opencl/%: FORCE
$(info [*] MAKE $@)
$(Q)$(MAKE) --no-print-directory -C crack5opencl $(patsubst crack5opencl/%,%,$@) DESTDIR=$(MYDESTDIR)
FORCE: # Dummy target to force remake in the subdirectories, even if files exist (this Makefile doesn't know about the prerequisites)
.phony: crack2 crack3 crack4 crack5 crack5gpu FORCE
.phony: crack2 crack3 crack4 crack5 crack5gpu crack5opencl FORCE

@ -5,6 +5,7 @@ Authors:
* Attacks 1, 2, 3, 4 : Kevin Sheldrake <kev@headhacking.com>
* Attacks 5, 5gpu : anonymous, based on https://github.com/factoritbv/hitag2hell by FactorIT B.V.
* Attacks 5, 5opencl : Gabriele Gristina <gabriele.gristina@gmail.com>, based on 5gpu
Introduction
------------
@ -205,8 +206,8 @@ Stop once you got two pairs.
$ ./ht2crack5 <UID> <nR1> <aR1> <nR2> <aR2>
```
Usage details: Attack 5gpu
--------------------------
Usage details: Attack 5gpu/5opencl
----------------------------------
Attack 5gpu requires two encrypted nonce and challenge
response value pairs (nR, aR) for the tag's UID.
@ -220,6 +221,14 @@ Stop once you got two pairs.
$ ./ht2crack5gpu <UID> <nR1> <aR1> <nR2> <aR2>
```
Attack 5opencl requires the same of 5gpu version.
```
$ ./ht2crack5opencl <UID> <nR1> <aR1> <nR2> <aR2>
```
More details in crack5opencl/README.md
Usage details: Next steps
-------------------------

File diff suppressed because it is too large Load Diff

@ -0,0 +1,128 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( push )
#pragma warning( disable : 4201 )
#endif
#endif
#include <d3d10.h>
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

@ -0,0 +1,128 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( push )
#pragma warning( disable : 4201 )
#endif
#endif
#include <d3d11.h>
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

@ -0,0 +1,118 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

@ -0,0 +1,170 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_dx9_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <d3d9.h>
#include <dxvahd.h>
#include <wtypes.h>
#include <d3d9types.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */

@ -0,0 +1,120 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

@ -0,0 +1,907 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
/* cl_khr_fp64 extension - no extension #define since it has no functions */
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
#if CL_TARGET_OPENCL_VERSION <= 110
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#endif
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
void (* pfn_notify)(cl_mem memobj, void * user_data),
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
/*******************************
* cl_khr_il_program extension *
*******************************/
#define cl_khr_il_program 1
/* New property to clGetDeviceInfo for retrieving supported intermediate
* languages
*/
#define CL_DEVICE_IL_VERSION_KHR 0x105B
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
* program
*/
#define CL_PROGRAM_IL_KHR 0x1169
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithILKHR(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret);
typedef CL_API_ENTRY cl_program
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/* Extension: cl_khr_image2d_from_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without
* a copy. The type associated with a 2D image created from a buffer in an
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
* built-in functions are supported for 2D images and 2D images created from
* a buffer. Similarly, the write_image built-ins are also supported for 2D
* images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the
* width, height, image format (i.e. channel order and channel data type)
* and optionally the row pitch.
*
* The pitch specified must be a multiple of
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
* The base address of the buffer must be aligned to
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
*/
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
/**************************************
* cl_khr_initialize_memory extension *
**************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_CONTEXT_TERMINATED_KHR -1121
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/*****************************************
* cl_khr_create_command_queue extension *
*****************************************/
#define cl_khr_create_command_queue 1
typedef cl_properties cl_queue_properties_khr;
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithPropertiesKHR(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_command_queue
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030
#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031
#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033
#define CL_DEVICE_PCIE_ID_AMD 0x4034
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
/***********************************
* cl_ext_device_fission extension
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/***********************************
* cl_ext_migrate_memobject extension definitions
***********************************/
#define cl_ext_migrate_memobject 1
typedef cl_bitfield cl_mem_migration_flags_ext;
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
/*********************************
* cl_ext_cxx_for_opencl extension
*********************************/
#define cl_ext_cxx_for_opencl 1
#define CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define cl_qcom_ext_host_ptr 1
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*******************************************
* cl_qcom_ext_host_ptr_iocoherent extension
********************************************/
/* Cache policy specifying io-coherence */
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
/*********************************
* cl_qcom_android_native_buffer_host_ptr extension
*********************************/
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
typedef struct _cl_mem_android_native_buffer_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
cl_mem_ext_host_ptr ext_host_ptr;
/* Virtual pointer to the android native buffer */
void* anb_ptr;
} cl_mem_android_native_buffer_host_ptr;
/******************************************
* cl_img_yuv_image extension *
******************************************/
/* Image formats used in clCreateImage */
#define CL_NV21_IMG 0x40D0
#define CL_YV12_IMG 0x40D1
/******************************************
* cl_img_cached_allocations extension *
******************************************/
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
/******************************************
* cl_img_use_gralloc_ptr extension *
******************************************/
#define cl_img_use_gralloc_ptr 1
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
/* To be used by clGetEventInfo: */
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
/* Error codes from clEnqueueAcquireGrallocObjectsIMG and clEnqueueReleaseGrallocObjectsIMG */
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
#define CL_INVALID_GRALLOC_OBJECT_IMG 0x40D5
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_img_generate_mipmap extension *
******************************************/
#define cl_img_generate_mipmap 1
typedef cl_uint cl_mipmap_filter_mode_img;
/* To be used by clEnqueueGenerateMipmapIMG */
#define CL_MIPMAP_FILTER_ANY_IMG 0x0
#define CL_MIPMAP_FILTER_BOX_IMG 0x1
/* To be used by clGetEventInfo */
#define CL_COMMAND_GENERATE_MIPMAP_IMG 0x40D6
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueGenerateMipmapIMG(cl_command_queue command_queue,
cl_mem src_image,
cl_mem dst_image,
cl_mipmap_filter_mode_img mipmap_filter_mode,
const size_t *array_region,
const size_t *mip_region,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_img_mem_properties extension *
******************************************/
#define cl_img_mem_properties 1
/* To be used by clCreateBufferWithProperties */
#define CL_MEM_ALLOC_FLAGS_IMG 0x40D7
/* To be used wiith the CL_MEM_ALLOC_FLAGS_IMG property */
typedef cl_bitfield cl_mem_alloc_flags_img;
/* To be used with cl_mem_alloc_flags_img */
#define CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG (1 << 0)
/*********************************
* cl_khr_subgroups extension
*********************************/
#define cl_khr_subgroups 1
#if !defined(CL_VERSION_2_1)
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
In hindsight, there should have been a khr suffix on this type for
the extension, but keeping it un-suffixed to maintain backwards
compatibility. */
typedef cl_uint cl_kernel_sub_group_info;
#endif
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
/*********************************
* cl_khr_mipmap_image extension
*********************************/
/* cl_sampler_properties */
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
/*********************************
* cl_khr_priority_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
/*********************************
* cl_khr_throttle_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
/*********************************
* cl_khr_subgroup_named_barrier
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_subgroup_named_barrier 1
/* cl_device_info */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
/*********************************
* cl_khr_extended_versioning
*********************************/
#define cl_khr_extended_versioning 1
#define CL_VERSION_MAJOR_BITS_KHR (10)
#define CL_VERSION_MINOR_BITS_KHR (10)
#define CL_VERSION_PATCH_BITS_KHR (12)
#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1)
#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1)
#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1)
#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR))
#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR)
#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR)
#define CL_MAKE_VERSION_KHR(major, minor, patch) \
((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \
(((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \
((patch) & CL_VERSION_PATCH_MASK_KHR))
typedef cl_uint cl_version_khr;
#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64
typedef struct _cl_name_version_khr
{
cl_version_khr version;
char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR];
} cl_name_version_khr;
/* cl_platform_info */
#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906
#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907
/* cl_device_info */
#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E
#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F
#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060
#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061
#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062
/*********************************
* cl_khr_device_uuid extension
*********************************/
#define cl_khr_device_uuid 1
#define CL_UUID_SIZE_KHR 16
#define CL_LUID_SIZE_KHR 8
#define CL_DEVICE_UUID_KHR 0x106A
#define CL_DRIVER_UUID_KHR 0x106B
#define CL_DEVICE_LUID_VALID_KHR 0x106C
#define CL_DEVICE_LUID_KHR 0x106D
#define CL_DEVICE_NODE_MASK_KHR 0x106E
/**********************************
* cl_arm_import_memory extension *
**********************************/
#define cl_arm_import_memory 1
typedef intptr_t cl_import_properties_arm;
/* Default and valid proporties name for cl_arm_import_memory */
#define CL_IMPORT_TYPE_ARM 0x40B2
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
/* Protected memory property */
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2
/* Data consistency with host property */
#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3
/* Import memory size value to indicate a size for the whole buffer */
#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX
/* This extension adds a new function that allows for direct memory import into
* OpenCL via the clImportMemoryARM function.
*
* Memory imported through this interface will be mapped into the device's page
* tables directly, providing zero copy access. It will never fall back to copy
* operations and aliased buffers.
*
* Types of memory supported for import are specified as additional extension
* strings.
*
* This extension produces cl_mem allocations which are compatible with all other
* users of cl_mem in the standard API.
*
* This extension maps pages with the same properties as the normal buffer creation
* function clCreateBuffer.
*/
extern CL_API_ENTRY cl_mem CL_API_CALL
clImportMemoryARM( cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
void *memory,
size_t size,
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
/******************************************
* cl_arm_shared_virtual_memory extension *
******************************************/
#define cl_arm_shared_virtual_memory 1
/* Used by clGetDeviceInfo */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* Used by clGetMemObjectInfo */
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
/* Used by clSetKernelExecInfoARM: */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
/* To be used by clGetEventInfo: */
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
/* Flag values used by clSVMAllocARM: */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
typedef cl_bitfield cl_svm_mem_flags_arm;
typedef cl_uint cl_kernel_exec_info_arm;
typedef cl_bitfield cl_device_svm_capabilities_arm;
extern CL_API_ENTRY void * CL_API_CALL
clSVMAllocARM(cl_context context,
cl_svm_mem_flags_arm flags,
size_t size,
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY void CL_API_CALL
clSVMFreeARM(cl_context context,
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFreeARM(cl_command_queue command_queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void * user_data),
void * user_data,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
cl_bool blocking_copy,
void * dst_ptr,
const void * src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
void * svm_ptr,
const void * pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMapARM(cl_command_queue command_queue,
cl_bool blocking_map,
cl_map_flags flags,
void * svm_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
void * svm_ptr,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointerARM(cl_kernel kernel,
cl_uint arg_index,
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfoARM(cl_kernel kernel,
cl_kernel_exec_info_arm param_name,
size_t param_value_size,
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
/********************************
* cl_arm_get_core_id extension *
********************************/
#ifdef CL_VERSION_1_2
#define cl_arm_get_core_id 1
/* Device info property for bitfield of cores present */
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
#endif /* CL_VERSION_1_2 */
/*********************************
* cl_arm_job_slot_selection
*********************************/
#define cl_arm_job_slot_selection 1
/* cl_device_info */
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
/* cl_command_queue_properties */
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
/*********************************
* cl_arm_scheduling_controls
*********************************/
#define cl_arm_scheduling_controls 1
/* cl_device_info */
#define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4
#define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM (1 << 0)
#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1)
#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2)
/* cl_kernel_info */
#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5
#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6
/* cl_queue_properties */
#define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

@ -0,0 +1,731 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2020 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_ext_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __CL_EXT_INTEL_H
#define __CL_EXT_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_thread_local_exec extension *
****************************************/
#define cl_intel_thread_local_exec 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
#define cl_intel_device_partition_by_names 1
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
/************************************************
* cl_intel_accelerator extension *
* cl_intel_motion_estimation extension *
* cl_intel_advanced_motion_estimation extension *
*************************************************/
#define cl_intel_accelerator 1
#define cl_intel_motion_estimation 1
#define cl_intel_advanced_motion_estimation 1
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
typedef cl_uint cl_accelerator_type_intel;
typedef cl_uint cl_accelerator_info_intel;
typedef struct _cl_motion_estimation_desc_intel {
cl_uint mb_block_type;
cl_uint subpixel_mode;
cl_uint sad_adjust_mode;
cl_uint search_path_type;
} cl_motion_estimation_desc_intel;
/* error codes */
#define CL_INVALID_ACCELERATOR_INTEL -1094
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
/* cl_accelerator_type_intel */
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
/* cl_accelerator_info_intel */
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
/* cl_motion_detect_desc_intel flags */
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
/* cl_device_info */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
#define CL_ME_VERSION_LEGACY_INTEL 0x0
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetAcceleratorInfoINTEL(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_intel_simultaneous_sharing extension *
*******************************************/
#define cl_intel_simultaneous_sharing 1
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/***********************************
* cl_intel_egl_image_yuv extension *
************************************/
#define cl_intel_egl_image_yuv 1
#define CL_EGL_YUV_PLANE_INTEL 0x4107
/********************************
* cl_intel_packed_yuv extension *
*********************************/
#define cl_intel_packed_yuv 1
#define CL_YUYV_INTEL 0x4076
#define CL_UYVY_INTEL 0x4077
#define CL_YVYU_INTEL 0x4078
#define CL_VYUY_INTEL 0x4079
/********************************************
* cl_intel_required_subgroup_size extension *
*********************************************/
#define cl_intel_required_subgroup_size 1
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
/****************************************
* cl_intel_driver_diagnostics extension *
*****************************************/
#define cl_intel_driver_diagnostics 1
typedef cl_uint cl_diagnostics_verbose_level;
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
/********************************
* cl_intel_planar_yuv extension *
*********************************/
#define CL_NV12_INTEL 0x410E
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/*******************************************************
* cl_intel_device_side_avc_motion_estimation extension *
********************************************************/
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */
#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
/*******************************************
* cl_intel_unified_shared_memory extension *
********************************************/
/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */
#define cl_intel_unified_shared_memory 1
/* cl_device_info */
#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191
#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192
#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1)
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2)
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3)
typedef cl_properties cl_mem_properties_intel;
/* cl_mem_properties_intel */
#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195
typedef cl_bitfield cl_mem_alloc_flags_intel;
/* cl_mem_alloc_flags_intel - bitfield */
#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0)
typedef cl_uint cl_mem_info_intel;
/* cl_mem_alloc_info_intel */
#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
/* Enum values 0x419E-0x419F are reserved for future queries. */
typedef cl_uint cl_unified_shared_memory_type_intel;
/* cl_unified_shared_memory_type_intel */
#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196
#define CL_MEM_TYPE_HOST_INTEL 0x4197
#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
#define CL_MEM_TYPE_SHARED_INTEL 0x4199
typedef cl_uint cl_mem_advice_intel;
/* cl_mem_advice_intel */
/* Enum values 0x4208-0x420F are reserved for future memory advices. */
/* cl_kernel_exec_info */
#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200
#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201
#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202
#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203
/* cl_command_type */
#define CL_COMMAND_MEMFILL_INTEL 0x4204
#define CL_COMMAND_MEMCPY_INTEL 0x4205
#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
#define CL_COMMAND_MEMADVISE_INTEL 0x4207
extern CL_API_ENTRY void* CL_API_CALL
clHostMemAllocINTEL(
cl_context context,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
typedef CL_API_ENTRY void* (CL_API_CALL *
clHostMemAllocINTEL_fn)(
cl_context context,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
extern CL_API_ENTRY void* CL_API_CALL
clDeviceMemAllocINTEL(
cl_context context,
cl_device_id device,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
typedef CL_API_ENTRY void* (CL_API_CALL *
clDeviceMemAllocINTEL_fn)(
cl_context context,
cl_device_id device,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
extern CL_API_ENTRY void* CL_API_CALL
clSharedMemAllocINTEL(
cl_context context,
cl_device_id device,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
typedef CL_API_ENTRY void* (CL_API_CALL *
clSharedMemAllocINTEL_fn)(
cl_context context,
cl_device_id device,
const cl_mem_properties_intel* properties,
size_t size,
cl_uint alignment,
cl_int* errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clMemFreeINTEL(
cl_context context,
void* ptr);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clMemFreeINTEL_fn)(
cl_context context,
void* ptr);
extern CL_API_ENTRY cl_int CL_API_CALL
clMemBlockingFreeINTEL(
cl_context context,
void* ptr);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clMemBlockingFreeINTEL_fn)(
cl_context context,
void* ptr);
extern CL_API_ENTRY cl_int CL_API_CALL
clGetMemAllocInfoINTEL(
cl_context context,
const void* ptr,
cl_mem_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clGetMemAllocInfoINTEL_fn)(
cl_context context,
const void* ptr,
cl_mem_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgMemPointerINTEL(
cl_kernel kernel,
cl_uint arg_index,
const void* arg_value);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clSetKernelArgMemPointerINTEL_fn)(
cl_kernel kernel,
cl_uint arg_index,
const void* arg_value);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMemsetINTEL( /* Deprecated */
cl_command_queue command_queue,
void* dst_ptr,
cl_int value,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clEnqueueMemsetINTEL_fn)( /* Deprecated */
cl_command_queue command_queue,
void* dst_ptr,
cl_int value,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMemFillINTEL(
cl_command_queue command_queue,
void* dst_ptr,
const void* pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clEnqueueMemFillINTEL_fn)(
cl_command_queue command_queue,
void* dst_ptr,
const void* pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMemcpyINTEL(
cl_command_queue command_queue,
cl_bool blocking,
void* dst_ptr,
const void* src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clEnqueueMemcpyINTEL_fn)(
cl_command_queue command_queue,
cl_bool blocking,
void* dst_ptr,
const void* src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
#ifdef CL_VERSION_1_2
/* Because these APIs use cl_mem_migration_flags, they require
OpenCL 1.2: */
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemINTEL(
cl_command_queue command_queue,
const void* ptr,
size_t size,
cl_mem_migration_flags flags,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clEnqueueMigrateMemINTEL_fn)(
cl_command_queue command_queue,
const void* ptr,
size_t size,
cl_mem_migration_flags flags,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMemAdviseINTEL(
cl_command_queue command_queue,
const void* ptr,
size_t size,
cl_mem_advice_intel advice,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *
clEnqueueMemAdviseINTEL_fn)(
cl_command_queue command_queue,
const void* ptr,
size_t size,
cl_mem_advice_intel advice,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event);
/***************************************************
* cl_intel_create_buffer_with_properties extension *
****************************************************/
#define cl_intel_create_buffer_with_properties 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBufferWithPropertiesINTEL(
cl_context context,
const cl_mem_properties_intel* properties,
cl_mem_flags flags,
size_t size,
void * host_ptr,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *
clCreateBufferWithPropertiesINTEL_fn)(
cl_context context,
const cl_mem_properties_intel* properties,
cl_mem_flags flags,
size_t size,
void * host_ptr,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
/******************************************
* cl_intel_mem_channel_property extension *
*******************************************/
#define CL_MEM_CHANNEL_INTEL 0x4213
/*********************************
* cl_intel_mem_force_host_memory *
**********************************/
#define cl_intel_mem_force_host_memory 1
/* cl_mem_flags */
#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20)
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_INTEL_H */

@ -0,0 +1,159 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#ifdef CL_VERSION_1_2
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
#endif
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#ifdef CL_VERSION_1_2
#define CL_GL_NUM_SAMPLES 0x2012
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint bufobj,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint renderbuffer,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem memobj,
cl_gl_object_type * gl_object_type,
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem memobj,
cl_gl_texture_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

@ -0,0 +1,40 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl_gl.h>
/*
* cl_khr_gl_event extension
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context context,
cl_GLsync sync,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

@ -0,0 +1,440 @@
/*******************************************************************************
* Copyright (c) 2019-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
/**
* This is a header-only utility library that provides OpenCL host code with
* routines for converting to/from cl_half values.
*
* Example usage:
*
* #include <CL/cl_half.h>
* ...
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
* cl_float f = cl_half_to_float(h);
*/
#ifndef OPENCL_CL_HALF_H
#define OPENCL_CL_HALF_H
#include <CL/cl_platform.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Rounding mode used when converting to cl_half.
*/
typedef enum
{
CL_HALF_RTE, // round to nearest even
CL_HALF_RTZ, // round towards zero
CL_HALF_RTP, // round towards positive infinity
CL_HALF_RTN, // round towards negative infinity
} cl_half_rounding_mode;
/* Private utility macros. */
#define CL_HALF_EXP_MASK 0x7C00
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
/*
* Utility to deal with values that overflow when converting to half precision.
*/
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
uint16_t sign)
{
if (rounding_mode == CL_HALF_RTZ)
{
// Round overflow towards zero -> largest finite number (preserving sign)
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
}
else if (rounding_mode == CL_HALF_RTP && sign)
{
// Round negative overflow towards positive infinity -> most negative finite number
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
}
else if (rounding_mode == CL_HALF_RTN && !sign)
{
// Round positive overflow towards negative infinity -> largest finite number
return CL_HALF_MAX_FINITE_MAG;
}
// Overflow to infinity
return (sign << 15) | CL_HALF_EXP_MASK;
}
/*
* Utility to deal with values that underflow when converting to half precision.
*/
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
uint16_t sign)
{
if (rounding_mode == CL_HALF_RTP && !sign)
{
// Round underflow towards positive infinity -> smallest positive value
return (sign << 15) | 1;
}
else if (rounding_mode == CL_HALF_RTN && sign)
{
// Round underflow towards negative infinity -> largest negative value
return (sign << 15) | 1;
}
// Flush to zero
return (sign << 15);
}
/**
* Convert a cl_float to a cl_half.
*/
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
{
// Type-punning to get direct access to underlying bits
union
{
cl_float f;
uint32_t i;
} f32;
f32.f = f;
// Extract sign bit
uint16_t sign = f32.i >> 31;
// Extract FP32 exponent and mantissa
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
// Remove FP32 exponent bias
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
// Add FP16 exponent bias
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
// Position of the bit that will become the FP16 mantissa LSB
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
// Check for NaN / infinity
if (f_exp == 0xFF)
{
if (f_mant)
{
// NaN -> propagate mantissa and silence it
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
h_mant |= 0x200;
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
}
else
{
// Infinity -> zero mantissa
return (sign << 15) | CL_HALF_EXP_MASK;
}
}
// Check for zero
if (!f_exp && !f_mant)
{
return (sign << 15);
}
// Check for overflow
if (exp >= CL_HALF_MAX_EXP)
{
return cl_half_handle_overflow(rounding_mode, sign);
}
// Check for underflow
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
{
return cl_half_handle_underflow(rounding_mode, sign);
}
// Check for value that will become denormal
if (exp < -14)
{
// Denormal -> include the implicit 1 from the FP32 mantissa
h_exp = 0;
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
// Mantissa shift amount depends on exponent
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
}
// Generate FP16 mantissa by shifting FP32 mantissa
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
// Check whether we need to round
uint32_t halfway = 1 << (lsb_pos - 1);
uint32_t mask = (halfway << 1) - 1;
switch (rounding_mode)
{
case CL_HALF_RTE:
if ((f_mant & mask) > halfway)
{
// More than halfway -> round up
h_mant += 1;
}
else if ((f_mant & mask) == halfway)
{
// Exactly halfway -> round to nearest even
if (h_mant & 0x1)
h_mant += 1;
}
break;
case CL_HALF_RTZ:
// Mantissa has already been truncated -> do nothing
break;
case CL_HALF_RTP:
if ((f_mant & mask) && !sign)
{
// Round positive numbers up
h_mant += 1;
}
break;
case CL_HALF_RTN:
if ((f_mant & mask) && sign)
{
// Round negative numbers down
h_mant += 1;
}
break;
}
// Check for mantissa overflow
if (h_mant & 0x400)
{
h_exp += 1;
h_mant = 0;
}
return (sign << 15) | (h_exp << 10) | h_mant;
}
/**
* Convert a cl_double to a cl_half.
*/
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
{
// Type-punning to get direct access to underlying bits
union
{
cl_double d;
uint64_t i;
} f64;
f64.d = d;
// Extract sign bit
uint16_t sign = f64.i >> 63;
// Extract FP64 exponent and mantissa
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
// Remove FP64 exponent bias
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
// Add FP16 exponent bias
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
// Position of the bit that will become the FP16 mantissa LSB
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
// Check for NaN / infinity
if (d_exp == 0x7FF)
{
if (d_mant)
{
// NaN -> propagate mantissa and silence it
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
h_mant |= 0x200;
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
}
else
{
// Infinity -> zero mantissa
return (sign << 15) | CL_HALF_EXP_MASK;
}
}
// Check for zero
if (!d_exp && !d_mant)
{
return (sign << 15);
}
// Check for overflow
if (exp >= CL_HALF_MAX_EXP)
{
return cl_half_handle_overflow(rounding_mode, sign);
}
// Check for underflow
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
{
return cl_half_handle_underflow(rounding_mode, sign);
}
// Check for value that will become denormal
if (exp < -14)
{
// Include the implicit 1 from the FP64 mantissa
h_exp = 0;
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
// Mantissa shift amount depends on exponent
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
}
// Generate FP16 mantissa by shifting FP64 mantissa
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
// Check whether we need to round
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
uint64_t mask = (halfway << 1) - 1;
switch (rounding_mode)
{
case CL_HALF_RTE:
if ((d_mant & mask) > halfway)
{
// More than halfway -> round up
h_mant += 1;
}
else if ((d_mant & mask) == halfway)
{
// Exactly halfway -> round to nearest even
if (h_mant & 0x1)
h_mant += 1;
}
break;
case CL_HALF_RTZ:
// Mantissa has already been truncated -> do nothing
break;
case CL_HALF_RTP:
if ((d_mant & mask) && !sign)
{
// Round positive numbers up
h_mant += 1;
}
break;
case CL_HALF_RTN:
if ((d_mant & mask) && sign)
{
// Round negative numbers down
h_mant += 1;
}
break;
}
// Check for mantissa overflow
if (h_mant & 0x400)
{
h_exp += 1;
h_mant = 0;
}
return (sign << 15) | (h_exp << 10) | h_mant;
}
/**
* Convert a cl_half to a cl_float.
*/
static inline cl_float cl_half_to_float(cl_half h)
{
// Type-punning to get direct access to underlying bits
union
{
cl_float f;
uint32_t i;
} f32;
// Extract sign bit
uint16_t sign = h >> 15;
// Extract FP16 exponent and mantissa
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = h & 0x3FF;
// Remove FP16 exponent bias
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
// Add FP32 exponent bias
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
// Check for NaN / infinity
if (h_exp == 0x1F)
{
if (h_mant)
{
// NaN -> propagate mantissa and silence it
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
f_mant |= 0x400000;
f32.i = (sign << 31) | 0x7F800000 | f_mant;
return f32.f;
}
else
{
// Infinity -> zero mantissa
f32.i = (sign << 31) | 0x7F800000;
return f32.f;
}
}
// Check for zero / denormal
if (h_exp == 0)
{
if (h_mant == 0)
{
// Zero -> zero exponent
f_exp = 0;
}
else
{
// Denormal -> normalize it
// - Shift mantissa to make most-significant 1 implicit
// - Adjust exponent accordingly
uint32_t shift = 0;
while ((h_mant & 0x400) == 0)
{
h_mant <<= 1;
shift++;
}
h_mant &= 0x3FF;
f_exp -= shift - 1;
}
}
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
return f32.f;
}
#undef CL_HALF_EXP_MASK
#undef CL_HALF_MAX_FINITE_MAG
#ifdef __cplusplus
}
#endif
#endif /* OPENCL_CL_HALF_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,160 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_va_api_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */

@ -0,0 +1,81 @@
/*******************************************************************************
* Copyright (c) 2018-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __CL_VERSION_H
#define __CL_VERSION_H
/* Detect which version to target */
#if !defined(CL_TARGET_OPENCL_VERSION)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
#define CL_TARGET_OPENCL_VERSION 300
#endif
#if CL_TARGET_OPENCL_VERSION != 100 && \
CL_TARGET_OPENCL_VERSION != 110 && \
CL_TARGET_OPENCL_VERSION != 120 && \
CL_TARGET_OPENCL_VERSION != 200 && \
CL_TARGET_OPENCL_VERSION != 210 && \
CL_TARGET_OPENCL_VERSION != 220 && \
CL_TARGET_OPENCL_VERSION != 300
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
#undef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 300
#endif
/* OpenCL Version */
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
#define CL_VERSION_3_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
#define CL_VERSION_2_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
#define CL_VERSION_2_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
#define CL_VERSION_2_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
#define CL_VERSION_1_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
#define CL_VERSION_1_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
#define CL_VERSION_1_0 1
#endif
/* Allow deprecated APIs for older OpenCL versions. */
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
#endif
#endif /* __CL_VERSION_H */

@ -0,0 +1,33 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

@ -0,0 +1 @@
A reminder that this issue tracker is managed by the Khronos Group. Interactions here should follow the Khronos Code of Conduct (https://www.khronos.org/developers/code-of-conduct), which prohibits aggressive or derogatory language. Please keep the discussion friendly and civil.

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,74 @@
# OpenCL<sup>TM</sup> API Headers
This repository contains C language headers for the OpenCL API.
The authoritative public repository for these headers is located at:
https://github.com/KhronosGroup/OpenCL-Headers
Issues, proposed fixes for issues, and other suggested changes should be
created using Github.
## CMake Package
While the headers may just be copied as-is, this repository also contains a
CMake script with an install rule to allow for packaging the headers.
```bash
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=/chosen/install/prefix
cmake --build build --target install
```
To consume the package:
```bash
cmake path/to/opencl/app -DOpenCLHeaders_ROOT=/chosen/install/prefix
```
```cmake
cmake_minimum_required(VERSION 3.0)
cmake_policy(VERSION 3.0...3.18.4)
project(proj)
add_executable(app main.cpp)
find_package(OpenCLHeaders REQUIRED)
target_link_libraries(app PRIVATE OpenCL::Headers)
```
## Branch Structure
The OpenCL API headers in this repository are Unified headers and are designed
to work with all released OpenCL versions. This differs from previous OpenCL
API headers, where version-specific API headers either existed in separate
branches, or in separate folders in a branch.
## Compiling for a Specific OpenCL Version
By default, the OpenCL API headers in this repository are for the latest
OpenCL version (currently OpenCL 2.2). To use these API headers to target
a different OpenCL version, an application may `#define` the preprocessor
value `CL_TARGET_OPENCL_VERSION` before including the OpenCL API headers.
The `CL_TARGET_OPENCL_VERSION` is a three digit decimal value representing
the OpenCL API version.
For example, to enforce usage of no more than the OpenCL 1.2 APIs, you may
include the OpenCL API headers as follows:
```c
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/opencl.h>
```
## Directory Structure
```
README.md This file
LICENSE Source license for the OpenCL API headers
CL/ Unified OpenCL API headers tree
```
## License
See [LICENSE](LICENSE).
---
OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.

@ -208,7 +208,9 @@
// We want the crypto functions to be as fast as possible, so optimize!
// The best compiler optimization in Microchip's free XC32 edition is -O1
#ifndef __APPLE__
#pragma GCC optimize("O1")
#endif
// private, nonlinear function to generate 1 crypto bit
static uint32_t hitag2_crypt(uint64_t x);
@ -367,4 +369,6 @@ uint32_t hitag2_nstep(Hitag_State *pstate, uint32_t steps) {
}
// end of crypto core, revert to default optimization level
#ifndef __APPLE__
#pragma GCC reset_options
#endif

@ -427,7 +427,8 @@ static void *sorttable(void *dd) {
close(fdin);
// sort it
qsort_r(table, numentries, DATASIZE, datacmp, NULL);
void *dummy = NULL; // clang
qsort_r(table, numentries, DATASIZE, datacmp, dummy);
// write to file
sprintf(outfile, "sorted/%02x/%02x.bin", i, j);

@ -2,6 +2,9 @@ MYSRCPATHS = ../common
MYSRCS = ht2crackutils.c hitagcrypto.c
MYCFLAGS =
MYDEFS =
platform = $(shell uname)
ifeq ($(platform),Darwin)
MYLDLIBS ?= -framework OpenCL
else
@ -11,6 +14,7 @@ else
MYLDLIBS ?= -L/opt/nvidia/cuda/lib64 -lOpenCL
endif
MYINCLUDES +=-I ../common
MYINCLUDES +=-I ../common/OpenCL-Headers
BINS = ht2crack5gpu
INSTALLTOOLS = $(BINS)

@ -0,0 +1,2 @@
ht2crack5opencl
ht2crack5opencl.exe

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

@ -0,0 +1,77 @@
MYSRCS = queue.c threads.c opencl.c hitag2.c
MYCFLAGS =
MYDEFS = -D TEST_UNIT=0
platform = $(shell uname)
ifeq ($(platform),Darwin)
MYLDLIBS ?= -framework OpenCL
else
#MYINCLUDES ?=-I/usr/local/cuda-7.5/include
#MYINCLUDES ?=-I/opt/nvidia/cuda/include
#MYLDLIBS ?= -L/usr/local/cuda-7.5/lib64 -lOpenCL
MYLDLIBS ?= -L/opt/nvidia/cuda/lib64 -lOpenCL
endif
MYLDLIBS += -lpthread
MYINCLUDES +=-I ../common
MYINCLUDES +=-I ../common/OpenCL-Headers
BINS = ht2crack5opencl
INSTALLTOOLS = $(BINS)
include ../../../Makefile.host
# checking platform can be done only after Makefile.host
ifneq (,$(findstring MINGW,$(platform)))
# Mingw uses by default Microsoft printf, we want the GNU printf (e.g. for %z)
# and setting _ISOC99_SOURCE sets internally __USE_MINGW_ANSI_STDIO=1
CFLAGS += -D_ISOC99_SOURCE
endif
WITH_CLANG=$(shell $(CC) --version 2>&1 | grep -c "clang")
# disable sanitize on Linux
ifeq ($(SANITIZE),1)
ifeq ($(platform),Linux)
CFLAGS := $(filter-out -fsanitize=address,$(CFLAGS))
CFLAGS := $(filter-out -fno-omit-frame-pointer,$(CFLAGS))
LDFLAGS := $(filter-out -fsanitize=address,$(CFLAGS))
define errMsg
Disabling SANITIZE here, is incompatibe with OpenCL on Linux, due to a bug.
Check this (https://github.com/google/sanitizers/issues/611).
endef
$(warning $(errMsg))
endif
endif
# clang
ifeq ($(WITH_CLANG),1)
ifeq ($(platform),Linux)
CFLAGS += -fPIE
endif
endif
# if debug and clang, add more CFLAGS
ifeq ($(DEBUG),1)
ifeq ($(WITH_CLANG),1)
CFLAGS += -Weverything
CFLAGS += -Wno-reserved-id-macro
ifeq ($(platform),Linux)
CFLAGS += -Wno-error=reserved-id-macro
CFLAGS += -Wno-error=disabled-macro-expansion
endif
endif
$(info CFLAGS are: $(CFLAGS))
endif
ht2crack5opencl : $(OBJDIR)/ht2crack5opencl.o ${MYOBJS}
ifeq ($(platform),Darwin)
ht2crack5opencl_clean:
@rm -rf *.dSYM
clean: ht2crack5opencl_clean
endif

@ -0,0 +1,57 @@
ht2crack5opencl
Build
-----
It requires an OpenCL framework.
If required, edit Makefile and adjust INCLUDE and LIBS directives to your setup.
```
make clean
make
```
Run
---
You'll need just two nR aR pairs. These are the
encrypted nonces and challenge response values. They should be in hex.
```
./ht2crack5opencl <UID> <nR1> <aR1> <nR2> <aR2>
```
UID is the UID of the tag that you used to gather the nR aR values.
Following the help:
```
$ ./ht2crack5opencl
./ht2crack5opencl [options] {UID} {nR1} {aR1} {nR2} {aR2}
Options:
-p : select OpenCL Platform(s). Multiple allowed (1,2,3,etc.). [Default: all]
-d : select OpenCL Device(s). Multiple allowed (1,2,3,etc.). [Default: all]
-D : select OpenCL Device Type. 0: GPU, 1: CPU, 2: all. [Default: GPU]
-S : select the thread scheduler type. 0: sequential, 1: asynchronous. [Default 1]
-P : select the Profile, from 0 to 10. [Default: auto-tuning]
-F : force verify key with OpenCL instead of CPU. [Default: disabled]
-Q : select queue engine. 0: forward, 1: reverse, 2: random. [Default: 0]
-s : show the list of OpenCL platforms/devices, then exit
-V : enable debug messages
-v : show the version
-h : show this help
Example, select devices 1, 2 and 3 using platform 1 and 2, with random queue engine:
./ht2crack5opencl -D 2 -Q 2 -p 1,2 -d 1,2,3 2ab12bf2 4B71E49D 6A606453 D79BD94B 16A2255B
```
You can found the correct OpenCL Platform ID's (-p) and Device ID's (-d) by:
```
./ht2crack5opencl -s
```

@ -0,0 +1,38 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
// trust me...i'm a dolphin :P
// too many allocations, too many free to manage, I need dolphin macros :)
// they could be buggy, but if you know how to fix them, do it
#define MEMORY_FREE_ADD(a) { freeList[freeListIdx++] = (void *)(a); }
#define MEMORY_FREE_ALL { int t=freeListIdx; while (t-- > 0) if (freeList[t]!=NULL) { free (freeList[t]); freeList[t]=NULL; } if (freeList!=NULL) { free (freeList); freeList=NULL; } }
#define MEMORY_FREE_DEL(a) { for (int i=0;i<freeListIdx;i++) { if(freeList[i] && a==freeList[i]) { free(freeList[i]); freeList[i]=NULL; break; } } }
#define MEMORY_FREE_LIST(a,i) { if (i > 0) { int t=(int)i; do { if (a[t]!=NULL) { free(a[t]); a[t]=NULL; } } while (--t >= 0); MEMORY_FREE_DEL(a) } }
#define MEMORY_FREE_LIST_Z(a,i) { int t=(int)i; do { if (a[t]!=NULL) { free(a[t]); a[t]=NULL; } } while (--t >= 0); MEMORY_FREE_DEL(a) }
#define MEMORY_FREE_OPENCL(c,i) { int t=(int)i; do { if (c.contexts[t]) clReleaseContext (c.contexts[t]); if (c.keystreams[t]) clReleaseMemObject (c.keystreams[t]); \
if (c.candidates[t]) clReleaseMemObject (c.candidates[t]); if (c.matches[t]) clReleaseMemObject (c.matches[t]); \
if (c.matches_found[t]) clReleaseMemObject (c.matches_found[t]); if (c.commands[t]) clReleaseCommandQueue (c.commands[t]); \
if (c.kernels[t]) clReleaseKernel (c.kernels[t]); if (c.programs[t]) clReleaseProgram (c.programs[t]); } while (--t >= 0); }

@ -0,0 +1,258 @@
#include "ht2crack5opencl.h"
#include "hitag2.h"
//#if FORCE_HITAG2_FULL == 0
// return a single bit from a value
int bitn (uint64_t x, int bit)
{
const uint64_t bitmask = (uint64_t)(1) << bit;
return (x & bitmask) ? 1 : 0;
}
// the sub-function R that rollback depends upon
int fnR (uint64_t x)
{
// renumbered bits because my state is 0-47, not 1-48
return (bitn(x, 1) ^ bitn(x, 2) ^ bitn(x, 5) ^
bitn(x, 6) ^ bitn(x, 7) ^ bitn(x, 15) ^
bitn(x, 21) ^ bitn(x, 22) ^ bitn(x, 25) ^
bitn(x, 29) ^ bitn(x, 40) ^ bitn(x, 41) ^
bitn(x, 42) ^ bitn(x, 45) ^ bitn(x, 46) ^ bitn(x, 47));
}
// the three filter sub-functions that feed fnf
int fa(unsigned int i) {
return bitn(0x2C79, (int)i);
}
int fb(unsigned int i) {
return bitn(0x6671, (int)i);
}
// the filter function that generates a bit of output from the prng state
int fnf (uint64_t s)
{
const unsigned int x1 = (unsigned int)((bitn(s, 2) << 0) | (bitn(s, 3) << 1) | (bitn(s, 5) << 2) | (bitn(s, 6) << 3));
const unsigned int x2 = (unsigned int)((bitn(s, 8) << 0) | (bitn(s, 12) << 1) | (bitn(s, 14) << 2) | (bitn(s, 15) << 3));
const unsigned int x3 = (unsigned int)((bitn(s, 17) << 0) | (bitn(s, 21) << 1) | (bitn(s, 23) << 2) | (bitn(s, 26) << 3));
const unsigned int x4 = (unsigned int)((bitn(s, 28) << 0) | (bitn(s, 29) << 1) | (bitn(s, 31) << 2) | (bitn(s, 33) << 3));
const unsigned int x5 = (unsigned int)((bitn(s, 34) << 0) | (bitn(s, 43) << 1) | (bitn(s, 44) << 2) | (bitn(s, 46) << 3));
const unsigned int x6 = (unsigned int)((fa(x1) << 0) | (fb(x2) << 1) | (fb(x3) << 2) | (fb(x4) << 3) | (fa(x5) << 4));
return bitn (0x7907287B, (int) x6);
}
uint32_t hitag2_crypt (uint64_t x) {
const uint32_t ht2_function4a = 0x2C79; // 0010 1100 0111 1001
const uint32_t ht2_function4b = 0x6671; // 0110 0110 0111 0001
const uint32_t ht2_function5c = 0x7907287B; // 0111 1001 0000 0111 0010 1000 0111 1011
uint32_t bitindex;
bitindex = (ht2_function4a >> pickbits2_2(x, 1, 4)) & 1;
bitindex |= ((ht2_function4b << 1) >> pickbits1_1_2(x, 7, 11, 13)) & 0x02;
bitindex |= ((ht2_function4b << 2) >> pickbits1x4(x, 16, 20, 22, 25)) & 0x04;
bitindex |= ((ht2_function4b << 3) >> pickbits2_1_1(x, 27, 30, 32)) & 0x08;
bitindex |= ((ht2_function4a << 4) >> pickbits1_2_1(x, 33, 42, 45)) & 0x10;
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("hitag2_crypt bitindex = %02x\n", bitindex);
#endif
return (ht2_function5c >> bitindex) & 1;
}
/*
* Return up to 32 crypto bits.
* Last bit is in least significant bit, earlier bits are shifted left.
* Note that the Hitag transmission protocol is least significant bit,
* so we may want to change this, or add a function, that returns the
* crypto output bits in the other order.
*
* Parameters:
* Hitag_State* pstate - in/out, internal cipher state after initialisation
* uint32_t steps - number of bits requested, (capped at 32)
*/
uint32_t hitag2_nstep(Hitag_State *pstate, uint32_t steps) {
uint64_t cur_state = pstate->shiftreg;
uint32_t result = 0;
uint64_t lfsr = pstate->lfsr;
if (steps == 0) return 0;
do {
// update shift registers
if (lfsr & 1) {
cur_state = (cur_state >> 1) | 0x800000000000;
lfsr = (lfsr >> 1) ^ 0xB38083220073;
// accumulate next bit of crypto
result = (result << 1) | hitag2_crypt(cur_state);
} else {
cur_state >>= 1;
lfsr >>= 1;
result = (result << 1) | hitag2_crypt(cur_state);
}
} while (--steps);
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#ifdef _ISOC99_SOURCE
printf ("hitag2_nstep cur_state = %012I64x, result %02x\n", cur_state, result);
#else
printf ("hitag2_nstep cur_state = %012" STR(OFF_FORMAT_X) ", result %02x\n", cur_state, result);
#endif
#endif // DEBUG_HITAG2
pstate->shiftreg = cur_state;
pstate->lfsr = lfsr;
return result;
}
/*
* Parameters:
* Hitag_State* pstate - output, internal state after initialisation
* uint64_t sharedkey - 48 bit key shared between reader & tag
* uint32_t serialnum - 32 bit tag serial number
* uint32_t initvector - 32 bit random IV from reader, part of tag authentication
*/
void hitag2_init (Hitag_State *pstate, uint64_t sharedkey, uint32_t serialnum, uint32_t initvector)
{
// init state, from serial number and lowest 16 bits of shared key
uint64_t cur_state = ((sharedkey & 0xFFFF) << 32) | serialnum;
// mix the initialisation vector and highest 32 bits of the shared key
initvector ^= (uint32_t) (sharedkey >> 16);
// move 16 bits from (IV xor Shared Key) to top of uint64_t state
// these will be XORed in turn with output of the crypto function
cur_state |= (uint64_t) initvector << 48;
initvector >>= 16;
// unrolled loop is faster on PIC32 (MIPS), do 32 times
// shift register, then calc new bit
cur_state >>= 1;
int i;
for (i = 0; i < 16; i++) cur_state = (cur_state >> 1) ^ (uint64_t) hitag2_crypt(cur_state) << 46;
// highest 16 bits of IV XOR Shared Key
cur_state |= (uint64_t) initvector << 47;
for (i = 0; i < 15; i++) cur_state = (cur_state >> 1) ^ (uint64_t) hitag2_crypt(cur_state) << 46;
cur_state ^= (uint64_t) hitag2_crypt (cur_state) << 47;
pstate->shiftreg = cur_state;
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#ifdef _ISOC99_SOURCE
printf ("hitag2_init shiftreg = %012I64x\n", pstate->shiftreg);
#else
printf ("hitag2_init shiftreg = %012" STR(OFF_FORMAT_X) "\n", pstate->shiftreg);
#endif
#endif // DEBUG_HITAG2
/* naive version for reference, LFSR has 16 taps
pstate->lfsr = state ^ (state >> 2) ^ (state >> 3) ^ (state >> 6)
^ (state >> 7) ^ (state >> 8) ^ (state >> 16) ^ (state >> 22)
^ (state >> 23) ^ (state >> 26) ^ (state >> 30) ^ (state >> 41)
^ (state >> 42) ^ (state >> 43) ^ (state >> 46) ^ (state >> 47);
*/
// optimise with one 64-bit intermediate
uint64_t temp = cur_state ^ (cur_state >> 1);
pstate->lfsr = cur_state ^ (cur_state >> 6) ^ (cur_state >> 16) ^
(cur_state >> 26) ^ (cur_state >> 30) ^ (cur_state >> 41) ^
(temp >> 2) ^ (temp >> 7) ^ (temp >> 22) ^ (temp >> 42) ^ (temp >> 46);
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#ifdef _ISOC99_SOURCE
printf ("hitag2_init lfsr = %012I64x\n", pstate->lfsr);
#else
printf ("hitag2_init lfsr = %012" STR(OFF_FORMAT_X) "\n", pstate->lfsr);
#endif
#endif // DEBUG_HITAG2
}
// try state
// todo, changes arguments, only what is needed
bool try_state (uint64_t s, uint32_t uid, uint32_t aR2, uint32_t nR1, uint32_t nR2, uint64_t *key)
{
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("s : %lu, uid: %u, aR2: %u, nR1: %u, nR2: %u\n", s, uid, aR2, nR1, nR2);
fflush (stdout);
#endif
Hitag_State hstate;
uint64_t keyrev, nR1xk;
uint32_t b = 0;
hstate.shiftreg = s;
//rollback(&hstate, 2);
hstate.shiftreg = (uint64_t)(((hstate.shiftreg << 1) & 0xffffffffffff) | (uint64_t)fnR(hstate.shiftreg));
hstate.shiftreg = (uint64_t)(((hstate.shiftreg << 1) & 0xffffffffffff) | (uint64_t)fnR(hstate.shiftreg));
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("shiftreg : %lu\n", hstate.shiftreg);
fflush (stdout);
#endif
// recover key
keyrev = hstate.shiftreg & 0xffff;
nR1xk = (hstate.shiftreg >> 16) & 0xffffffff;
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("keyrev: %lu, nR1xk: %lu\n", keyrev, nR1xk);
fflush (stdout);
#endif
for (int i = 0; i < 32; i++)
{
hstate.shiftreg = ((hstate.shiftreg) << 1) | ((uid >> (31 - i)) & 0x1);
b = (b << 1) | (unsigned int) fnf(hstate.shiftreg);
}
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("shiftreg: %lu\n", hstate.shiftreg);
fflush (stdout);
#endif
keyrev |= (nR1xk ^ nR1 ^ b) << 16;
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
printf ("keyrev: %lu\n", keyrev);
fflush (stdout);
#endif
// test key
hitag2_init (&hstate, keyrev, uid, nR2);
if ((aR2 ^ hitag2_nstep (&hstate, 32)) == 0xffffffff)
{
*key = rev64 (keyrev);
#if DEBUGME >= 2
#if ENABLE_EMOJ == 1
printf ("\nKey found ╭☞ ");
#else
printf ("\nKey found: ");
#endif
for (int i = 0; i < 6; i++) {
printf ("%02X", (uint8_t)(*key & 0xff));
*key = *key >> 8;
}
printf ("\n");
#endif
return true;
}
return false;
}
//#endif // FORCE_HITAG2_FULL = 0

@ -0,0 +1,84 @@
#ifndef HITAG2_H
#define HITAG2_H
#include <stdint.h>
#include <stdbool.h>
// as the HITAG2 original implementation, with some minor changes
#define i4(x,a,b,c,d) ((uint32_t)((((x)>>(a))&1)<<3)|(((x)>>(b))&1)<<2|(((x)>>(c))&1)<<1|(((x)>>(d))&1))
#define f(state) ((0xdd3929b >> ( (((0x3c65 >> i4(state, 2, 3, 5, 6) ) & 1) <<4) \
| ((( 0xee5 >> i4(state, 8,12,14,15) ) & 1) <<3) \
| ((( 0xee5 >> i4(state,17,21,23,26) ) & 1) <<2) \
| ((( 0xee5 >> i4(state,28,29,31,33) ) & 1) <<1) \
| (((0x3c65 >> i4(state,34,43,44,46) ) & 1) ))) & 1)
#define get_bit(n, word) ((word >> (n)) & 1)
/*
* Hitag Crypto support macros
* These macros reverse the bit order in a byte, or *within* each byte of a
* 16 , 32 or 64 bit unsigned integer. (Not across the whole 16 etc bits.)
*/
#define rev8(X) ((((X) >> 7) &1) + (((X) >> 5) &2) + (((X) >> 3) &4) \
+ (((X) >> 1) &8) + (((X) << 1) &16) + (((X) << 3) &32) \
+ (((X) << 5) &64) + (((X) << 7) &128) )
#define rev16(X) (rev8 (X) + (rev8 (X >> 8) << 8))
#define rev32(X) (rev16(X) + (rev16(X >> 16) << 16))
#define rev64(X) (rev32(X) + (rev32(X >> 32) << 32))
typedef struct
{
uint64_t shiftreg; // naive shift register, required for nonlinear fn input
uint64_t lfsr; // fast lfsr, used to make software faster
} Hitag_State;
// return a single bit from a value
int bitn (uint64_t x, int bit);
// the sub-function R that rollback depends upon
int fnR (uint64_t x);
// the three filter sub-functions that feed fnf
int fa(unsigned int i);
int fb(unsigned int i);
// the filter function that generates a bit of output from the prng state
int fnf (uint64_t s);
// macros to pick out 4 bits in various patterns of 1s & 2s & make a new number
#define pickbits2_2(S, A, B) ( ((S >> A) & 3) | ((S >> (B - 2)) & 0xC) )
#define pickbits1x4(S, A, B, C, D) ( ((S >> A) & 1) | ((S >> (B - 1)) & 2) | ((S >> (C - 2)) & 4) | ((S >> (D - 3)) & 8) )
#define pickbits1_1_2(S, A, B, C) ( ((S >> A) & 1) | ((S >> (B - 1)) & 2) | ((S >> (C - 2)) & 0xC) )
#define pickbits2_1_1(S, A, B, C) ( ((S >> A) & 3) | ((S >> (B - 2)) & 4) | ((S >> (C - 3)) & 8) )
#define pickbits1_2_1(S, A, B, C) ( ((S >> A) & 1) | ((S >> (B - 1)) & 6) | ((S >> (C - 3)) & 8) )
uint32_t hitag2_crypt (uint64_t x);
/*
* Return up to 32 crypto bits.
* Last bit is in least significant bit, earlier bits are shifted left.
* Note that the Hitag transmission protocol is least significant bit,
* so we may want to change this, or add a function, that returns the
* crypto output bits in the other order.
*
* Parameters:
* Hitag_State* pstate - in/out, internal cipher state after initialisation
* uint32_t steps - number of bits requested, (capped at 32)
*/
uint32_t hitag2_nstep(Hitag_State *pstate, uint32_t steps);
/*
* Parameters:
* Hitag_State* pstate - output, internal state after initialisation
* uint64_t sharedkey - 48 bit key shared between reader & tag
* uint32_t serialnum - 32 bit tag serial number
* uint32_t initvector - 32 bit random IV from reader, part of tag authentication
*/
void hitag2_init (Hitag_State *pstate, uint64_t sharedkey, uint32_t serialnum, uint32_t initvector);
// try_state
bool try_state (uint64_t s, uint32_t uid, uint32_t aR2, uint32_t nR1, uint32_t nR2, uint64_t *key);
#endif // HITAG2_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,47 @@
#ifndef HT2CRACK5OPENCL_H
#define HT2CRACK5OPENCL_H
#define VERSION "1.0"
// enable generic debug messages
#define DEBUGME 0
//#define DEBUG_HITAG2 0 // you can set this (1) to enable debug messages in hitag2 cpu code
#ifdef __APPLE__
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#define OFF_FORMAT_U llu
#define OFF_FORMAT_X llx
#endif // DEBUG_HITAG2
#else // ! Apple
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#define OFF_FORMAT_U lu
#define OFF_FORMAT_X lx
#endif // DEBUG_HITAG2
#endif // __APPLE__
#if defined(DEBUG_HITAG2) && DEBUG_HITAG2 == 1
#define STR_STRING(x) #x
#define STR(x) STR_STRING(x)
#endif // DEBUG_HITAG2
// some defines
#define APPLE_GPU_BROKEN 0 // if your Apple GPU is broken, try set to (1).
//#define MAX_OPENCL_DEVICES 16 // max number of concurrent devices (tested up to 4x RTX 3090)
#define GLOBAL_WS_1 1024 // default size of 2nd work-items dimension
#define GLOBAL_WS_2 1 // default size of 3rd work-items dimension
#define PROFILE_DEFAULT 2 // (0) is the best for Intel GPU's (NEO) and Apple GPU's (only Iris tested), (2) for all others. Some limitations are applyed later
#define TDEBUG 0 // (0) hide or (1) enable thread's debug messages
#define EXPERIMENTAL_RECOVERY 0 // untested work-unit recovery logic, in case of failure. supported only with THREAD_SCHEDULER_TYPE as (0)
//#define CLEAN_EXIT 1 // (1) seems to be fixed, but add a global cond_wait/cond_signal pair to make sure threads end before free memory maybe an idea
#define ENABLE_EMOJ 0 // only for fun
#define WGS_MATCHES_FACTOR_MID 1.41421 // Pythagoras, the square of 2, not full but probably good trade-off
#define WGS_MATCHES_FACTOR_FULL 3.14159265359 // Pi, maybe is the correct one
#define WGS_MATCHES_FACTOR WGS_MATCHES_FACTOR_MID // trying with Pythagoras, but if you got the following error, change to Pi: 'clEnqueueReadBuffer(matches) failed (-30)'
#endif // HT2CRACK5OPENCL_H

@ -0,0 +1,866 @@
/* ht2crack5opencl_kernel.cl
* -------------------------
* This code is heavily based on crack5gpu implementation.
*
* Additional changes done by Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
* - generic code optimizations
* - using local memory for keystream, if enabled by OpenCL host engine
* - added the two macros bs_res () and bs_res_lut_1, used during the generation of intermediate results
* - split lut3 function to some variants (tentative to reduce registers usage)
* - add support for devices without lop3.b32 instruction (for ! NVIDIA platforms/devices, like Intel and Apple CPU/GPU, not tested on ADM)
* - add HITAG2 routine to perform key verification, if enabled
* - using local memory for uid, aR2, nR1, nR2 (if HITAG2 routine is enabled)
*/
#define MAX_BITSLICES 32
#define KEYSTREAM_LENGTH 32
typedef uint bitslice_t __attribute__((aligned(MAX_BITSLICES / 8)));
#ifndef HAVE_LOP3
// (0xf0&(0xcc^0xaa))
inline static uint lut3_0x60(uint a, uint b, uint c) {
const uint r = b ^ c;
return a & r;
}
// (~((0xf0|0xcc)^0xaa))
inline static uint lut3_0xa9(uint a, uint b, uint c) {
const uint r = a | b;
return ~(r ^ c);
}
// (~((0xf0|0xcc|0xaa)))
inline static uint lut3_0x01(uint a, uint b, uint c) {
const uint r = a | b;
return ~(r | c);
}
// (((0xf0|0xcc)&0xaa))
inline static uint lut3_0xa8(uint a, uint b, uint c) {
const uint r = a | b;
return r & c;
}
// (((0xf0|0xcc)&0xaa)^0xcc)
inline static uint lut3_0x64(uint a, uint b, uint c) {
const uint r = (a | b) & c;
return r ^ b;
}
// (0xf0^0xaa^0xcc)
inline static uint lut3_0x96(uint a, uint b, uint c) {
const uint r = a ^ b;
return r ^ c;
}
// (((0xf0^0xcc)|0xaa))
inline static uint lut3_0xbe(uint a, uint b, uint c) {
const uint r = a ^ b;
return r | c;
}
// (~((0xf0^0xcc)&(0xaa^0xcc)))
inline static uint lut3_0xdb(uint a, uint b, uint c) {
const uint r = a ^ b;
const uint r2 = c ^ b;
return ~(r & r2);
}
/*
// (0xf0|(0xcc&0xaa))
inline static uint lut3_0xf8(uint a, uint b, uint c) {
const uint r = b & c;
return a | r;
}
*/
/*
// (0xf0|(0xcc&(0x01)))
inline static uint lut3_0xf8_0x1(uint a, uint b) {
const uint r = b & 0x1;
return a | r;
}
*/
#ifdef WITH_HITAG2_FULL
// (0xf0|(0xcc&(0xC)))
inline static uint lut3_0xf8_0xC(uint a, uint b) {
const uint r = b & 0xC;
return a | r;
}
// (0xf0|0xcc|0xaa)
inline static uint lut3_0xfe(uint a, uint b, uint c) {
const uint r = a | b;
return r | c;
}
#endif // WITH_HITAG2_FULL
#else // HAVE_LOP3
inline static uint lut3_0x01(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0x01;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0x60(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0x60;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0x64(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0x64;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0x96(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0xa8(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xa8;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0xa9(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xa9;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0xbe(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xbe;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
inline static uint lut3_0xdb(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xdb;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
/*
inline static uint lut3_0xf8(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xf8;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
*/
/*
inline static uint lut3_0xf8_0x1(uint a, uint b) {
uint r;
asm("lop3.b32 %0, %1, %2, 0x1, 0xf8;" : "=r"(r): "r"(a), "r"(b));
return r;
}
*/
#ifdef WITH_HITAG2_FULL
inline static uint lut3_0xf8_0xC(uint a, uint b) {
uint r;
asm("lop3.b32 %0, %1, %2, 0xC, 0xf8;" : "=r"(r): "r"(a), "r"(b));
return r;
}
inline static uint lut3_0xfe(uint a, uint b, uint c) {
uint r;
asm("lop3.b32 %0, %1, %2, %3, 0xfe;" : "=r"(r): "r"(a), "r"(b), "r"(c));
return r;
}
#endif // WITH_HITAG2_FULL
#endif // HAVE_LOP3
#define f_a_bs(a,b,c,d) (lut3_0xa9(a,d,lut3_0x64(a,b,c))) // 2 luts
#define f_b_bs(a,b,c,d) (lut3_0xa8(d,c,a^b) ^ lut3_0x01(d,a,b)) // 2 luts, 2 xors
#define f_c_bs(a,b,c,d,e) (((lut3_0xdb((lut3_0xbe(c,e,d) & a), b, c)) ^ (lut3_0xbe(d,e,a) & lut3_0xbe(d,b,c))))
#define bs_res(a,b,c) (lut3_0x60(a,b,c))
#define lfsr_bs(i) (lut3_0x96(lut3_0x96(lut3_0x96(state[-2+i+ 0], state[-2+i+ 2], state[-2+i+ 3]), \
lut3_0x96(state[-2+i+ 6], state[-2+i+ 7], state[-2+i+ 8]), \
lut3_0x96(state[-2+i+16], state[-2+i+22], state[-2+i+23])), \
lut3_0x96(state[-2+i+26], state[-2+i+30], state[-2+i+41]), \
lut3_0x96(state[-2+i+42], state[-2+i+43], state[-2+i+46])) ^ state[-2+i+47])
// 46 iterations * 4 ops
static inline void bitslice (bitslice_t *restrict b, ulong x) {
for (uint i = 0; i < 46; ++i) {
b[i] = -(x & 1);
x >>= 1;
}
}
// don't care about the complexity of this function
static inline ulong unbitslice (const bitslice_t *restrict b, const uint s) {
const bitslice_t mask = ((bitslice_t) 1) << s;
ulong result = 0;
for (int i = 47; i >= 0; --i) {
result <<= 1;
result |= (bool)(b[i] & mask);
}
return result;
}
///////////////////////////////
#ifdef WITH_HITAG2_FULL
/*
* Hitag Crypto support macros
* These macros reverse the bit order in a byte, or *within* each byte of a
* 16 , 32 or 64 bit unsigned integer. (Not across the whole 16 etc bits.)
*/
#define rev8(X) ((((X) >> 7) &1) + (((X) >> 5) &2) + (((X) >> 3) &4) \
+ (((X) >> 1) &8) + (((X) << 1) &16) + (((X) << 3) &32) \
+ (((X) << 5) &64) + (((X) << 7) &128) )
#define rev16(X) (rev8 (X) + (rev8 (X >> 8) << 8))
#define rev32(X) (rev16(X) + (rev16(X >> 16) << 16))
#define rev64(X) (rev32(X) + (rev32(X >> 32) << 32))
// (0xf0|(0xcc&0xaa))
#define pickbits2_2_lut(a,b) (lut3_0xf8_0xC(a,b))
#define pickbits2_2(S) pickbits2_2_lut( ((S >> 1) & 3) , (S >> 2) )
// (0xf0|0xcc|0xaa)
#define pickbits1_1_2_lut(a,b,c) (lut3_0xfe(a,b,c))
#define pickbits1_1_2(S) pickbits1_1_2_lut( ((S >> 7) & 1) , ((S >> 10) & 2) , ((S >> 11) & 0xC) )
#define pickbits1x4_lut(a,b,c,d) (lut3_0xfe(a,b,c) | d)
#define pickbits1x4(S) pickbits1x4_lut( ((S >> 16) & 1) , ((S >> 19) & 2) , ((S >> 20) & 4) , ((S >> 22) & 8) )
#define pickbits2_1_1_lut(a,b,c) (lut3_0xfe(a,b,c))
#define pickbits2_1_1(S) pickbits2_1_1_lut( ((S >> 27) & 3) , ((S >> 28) & 4) , ((S >> 29) & 8) )
#define pickbits1_2_1_lut(a,b,c) (lut3_0xfe(a,b,c))
#define pickbits1_2_1(S) pickbits1_2_1_lut( ((S >> 33) & 1) , ((S >> 41) & 6) , ((S >> 42) & 8) )
static uint hitag2_crypt (ulong x)
{
const uint ht2_function4a = 0x2C79; // 0010 1100 0111 1001
const uint ht2_function4b = 0x6671; // 0110 0110 0111 0001
const uint ht2_function5c = 0x7907287B; // 0111 1001 0000 0111 0010 1000 0111 1011
uint bitindex;
bitindex = (ht2_function4a >> pickbits2_2(x)) & 1;
bitindex |= ((ht2_function4b << 1) >> pickbits1_1_2(x)) & 0x02;
bitindex |= ((ht2_function4b << 2) >> pickbits1x4(x)) & 0x04;
bitindex |= ((ht2_function4b << 3) >> pickbits2_1_1(x)) & 0x08;
bitindex |= ((ht2_function4a << 4) >> pickbits1_2_1(x)) & 0x10;
return (ht2_function5c >> bitindex) & 1;
}
static void hitag2_init2 (ulong *shiftreg, ulong *lfsr, const ulong sharedkey, const uint serialnum, uint initvector)
{
ulong state = ((sharedkey & 0xFFFF) << 32) | serialnum;
initvector ^= (uint)(sharedkey >> 16);
state |= (ulong) initvector << 48;
initvector >>= 16;
state >>= 1;
for (uint x = 0; x < 16; x++) state = (state >> 1) ^ (ulong) hitag2_crypt (state) << 46;
state |= (ulong) initvector << 47;
for (uint x = 0; x < 15; x++) state = (state >> 1) ^ (ulong) hitag2_crypt (state) << 46;
state ^= (ulong) hitag2_crypt(state) << 47;
*shiftreg = state;
// ulong temp = state ^ (state >> 1);
// *lfsr = state ^ (state >> 6) ^ (state >> 16) ^ (state >> 26) ^ (state >> 30) ^ (state >> 41) ^
// (temp >> 2) ^ (temp >> 7) ^ (temp >> 22) ^ (temp >> 42) ^ (temp >> 46);
*lfsr = (ulong) state ^
lut3_0x96 ((state >> 2), (state >> 3), (state >> 6)) ^
lut3_0x96 ((state >> 7), (state >> 8), (state >> 16)) ^
lut3_0x96 ((state >> 22), (state >> 23), (state >> 26)) ^
lut3_0x96 ((state >> 30), (state >> 41), (state >> 42)) ^
lut3_0x96 ((state >> 43), (state >> 46), (state >> 47));
}
static uint hitag2_nstep2 (ulong state, ulong lfsr)
{
uint result = 0;
for (uint nsteps = 32; nsteps > 0; nsteps--)
{
if (lfsr & 1)
{
state = (state >> 1) | 0x800000000000;
lfsr = (lfsr >> 1) ^ 0xB38083220073;
result = (result << 1) | hitag2_crypt (state);
}
else
{
state >>= 1;
lfsr >>= 1;
result = (result << 1) | hitag2_crypt (state);
}
}
return result;
}
inline static int bitn(ulong x, int bit)
{
const ulong bitmask = 1UL << bit;
return (x & bitmask) ? 1 : 0;
}
static int fnR (ulong x)
{
return (bitn(x, 1) ^ bitn(x, 2) ^ bitn(x, 5) ^ bitn(x, 6) ^ bitn(x, 7) ^
bitn(x, 15) ^ bitn(x, 21) ^ bitn(x, 22) ^ bitn(x, 25) ^ bitn(x, 29) ^ bitn(x, 40) ^
bitn(x, 41) ^ bitn(x, 42) ^ bitn(x, 45) ^ bitn(x, 46) ^ bitn(x, 47));
}
inline static int fa(unsigned int i) {
return bitn(0x2C79, i);
}
inline static int fb(unsigned int i) {
return bitn(0x6671, i);
}
static int fnf (ulong s)
{
const uint x1 = (bitn(s, 2) << 0) | lut3_0x96( (bitn(s, 3) << 1), (bitn(s, 5) << 2), (bitn(s, 6) << 3));
const uint x2 = (bitn(s, 8) << 0) | lut3_0x96( (bitn(s, 12) << 1), (bitn(s, 14) << 2), (bitn(s, 15) << 3));
const uint x3 = (bitn(s, 17) << 0) | lut3_0x96( (bitn(s, 21) << 1), (bitn(s, 23) << 2), (bitn(s, 26) << 3));
const uint x4 = (bitn(s, 28) << 0) | lut3_0x96( (bitn(s, 29) << 1), (bitn(s, 31) << 2), (bitn(s, 33) << 3));
const uint x5 = (bitn(s, 34) << 0) | lut3_0x96( (bitn(s, 43) << 1), (bitn(s, 44) << 2), (bitn(s, 46) << 3));
const uint x6 = lut3_0x96( (fa(x1) << 0), (fb(x2) << 1), lut3_0x96( (fb(x3) << 2), (fb(x4) << 3), (fa(x5) << 4)));
return bitn(0x7907287B, x6);
}
#endif // WITH_HITAG2_FULL
// format this array with 32 bitsliced vectors of ones and zeroes representing the inverted keystream
__kernel
__attribute__((vec_type_hint(bitslice_t)))
void find_state(const uint candidate_index_base,
__global const ushort *restrict candidates,
// __global const ulong *restrict candidates,
__global const bitslice_t *restrict _keystream,
__global ulong *restrict matches,
#ifndef WITH_HITAG2_FULL
__global uint *restrict matches_found)
#else
__global uint *restrict matches_found,
__global const uint *restrict _checks)
#endif
{
const size_t gid[2] = { get_global_id(0), get_global_id(1) };
// if (gid[0] == 0) printf("work-item 1,%u\n", gid[1]);
#ifdef HAVE_LOCAL_MEMORY
const size_t lid = get_local_id(0);
const size_t lsize = get_local_size(0);
#endif // HAVE_LOCAL_MEMORY
const uint index = 3 * (candidate_index_base + gid[0]); // dimension 0 should at least keep the execution units saturated - 8k is fine
const ulong3 c = { candidates[index], candidates[index + 1], candidates[index + 2] };
const ulong candidate = ( c.x << 32 | c.y << 16 | c.z );
#ifdef HAVE_LOCAL_MEMORY
// store keystream in local memory
__local bitslice_t keystream[32];
for (size_t i = lid; i < 32; i+= lsize) keystream[i] = _keystream[i];
#ifdef WITH_HITAG2_FULL
// store uid, aR2, nR1, nR2 in local memory
__local uint checks[4];
for (uint i = lid; i < 4; i+= lsize) checks[i] = _checks[i];
#endif
// threads syncronization
barrier (CLK_LOCAL_MEM_FENCE);
#else
#define keystream _keystream
#define checks _checks
#endif // HAVE_LOCAL_MEMORY
// we never actually set or use the lowest 2 bits the initial state, so we can save 2 bitslices everywhere
bitslice_t state[-2 + 48 + KEYSTREAM_LENGTH] = { 0 };
// set bits 0+2, 0+3, 0+5, 0+6, 0+8, 0+12, 0+14, 0+15, 0+17, 0+21, 0+23, 0+26, 0+28, 0+29, 0+31, 0+33, 0+34, 0+43, 0+44, 0+46
// get the 48-bit cipher states as 3 16-bit words from the host memory queue (to save 25% throughput)
// using 64bit candidate
// const uint index = (candidate_index_base + gid[0]); // dimension 0 should at least keep the execution units saturated - 8k is fine
// const ulong candidate = candidates[index];
// bitslice (&state[-2 + 2], candidate >> 2);
// set all 48 state bits except the lowest 2
bitslice (&state[-2 + 2], candidate);
// set bits 3, 6, 8, 12, 15
state[-2 + 1 + 3] = 0xaaaaaaaa;
state[-2 + 1 + 6] = 0xcccccccc;
state[-2 + 1 + 8] = 0xf0f0f0f0;
state[-2 + 1 + 12] = 0xff00ff00;
state[-2 + 1 + 15] = 0xffff0000;
const uint i1 = gid[1]; // dimension 1 should be 1024
state[-2 + 18] = -((bool)(i1 & 0x1));
state[-2 + 22] = -((bool)(i1 & 0x2));
state[-2 + 24] = -((bool)(i1 & 0x4));
state[-2 + 27] = -((bool)(i1 & 0x8));
state[-2 + 30] = -((bool)(i1 & 0x10));
state[-2 + 32] = -((bool)(i1 & 0x20));
state[-2 + 35] = -((bool)(i1 & 0x40));
state[-2 + 45] = -((bool)(i1 & 0x80));
state[-2 + 47] = -((bool)(i1 & 0x100));
state[-2 + 48] = -((bool)(i1 & 0x200)); // guess lfsr output 0
// 0xfc07fef3f9fe
const bitslice_t filter1_0 = f_a_bs(state[-2 + 3], state[-2 + 4], state[-2 + 6], state[-2 + 7]);
const bitslice_t filter1_1 = f_b_bs(state[-2 + 9], state[-2 + 13], state[-2 + 15], state[-2 + 16]);
const bitslice_t filter1_2 = f_b_bs(state[-2 + 18], state[-2 + 22], state[-2 + 24], state[-2 + 27]);
const bitslice_t filter1_3 = f_b_bs(state[-2 + 29], state[-2 + 30], state[-2 + 32], state[-2 + 34]);
const bitslice_t filter1_4 = f_a_bs(state[-2 + 35], state[-2 + 44], state[-2 + 45], state[-2 + 47]);
const bitslice_t filter1 = f_c_bs(filter1_0, filter1_1, filter1_2, filter1_3, filter1_4);
const bitslice_t results1 = filter1 ^ keystream[1];
if (!results1) return;
const bitslice_t filter2_0 = f_a_bs(state[-2 + 4], state[-2 + 5], state[-2 + 7], state[-2 + 8]);
const bitslice_t filter2_3 = f_b_bs(state[-2 + 30], state[-2 + 31], state[-2 + 33], state[-2 + 35]);
const bitslice_t filter3_0 = f_a_bs(state[-2 + 5], state[-2 + 6], state[-2 + 8], state[-2 + 9]);
const bitslice_t filter5_2 = f_b_bs(state[-2 + 22], state[-2 + 26], state[-2 + 28], state[-2 + 31]);
const bitslice_t filter6_2 = f_b_bs(state[-2 + 23], state[-2 + 27], state[-2 + 29], state[-2 + 32]);
const bitslice_t filter7_2 = f_b_bs(state[-2 + 24], state[-2 + 28], state[-2 + 30], state[-2 + 33]);
const bitslice_t filter9_1 = f_b_bs(state[-2 + 17], state[-2 + 21], state[-2 + 23], state[-2 + 24]);
const bitslice_t filter9_2 = f_b_bs(state[-2 + 26], state[-2 + 30], state[-2 + 32], state[-2 + 35]);
const bitslice_t filter10_0 = f_a_bs(state[-2 + 12], state[-2 + 13], state[-2 + 15], state[-2 + 16]);
const bitslice_t filter11_0 = f_a_bs(state[-2 + 13], state[-2 + 14], state[-2 + 16], state[-2 + 17]);
const bitslice_t filter12_0 = f_a_bs(state[-2 + 14], state[-2 + 15], state[-2 + 17], state[-2 + 18]);
const bitslice_t filter14_1 = f_b_bs(state[-2 + 22], state[-2 + 26], state[-2 + 28], state[-2 + 29]);
const bitslice_t filter15_1 = f_b_bs(state[-2 + 23], state[-2 + 27], state[-2 + 29], state[-2 + 30]);
const bitslice_t filter15_3 = f_b_bs(state[-2 + 43], state[-2 + 44], state[-2 + 46], state[-2 + 48]);
const bitslice_t filter16_1 = f_b_bs(state[-2 + 24], state[-2 + 28], state[-2 + 30], state[-2 + 31]);
for (uint i2 = 0; i2 < 32; i2++) {
state[-2 + 10] = -((bool)(i2 & 0x1));
state[-2 + 19] = -((bool)(i2 & 0x2));
state[-2 + 25] = -((bool)(i2 & 0x4));
state[-2 + 36] = -((bool)(i2 & 0x8));
state[-2 + 49] = -((bool)(i2 & 0x10)); // guess lfsr output 1
// 0xfe07fffbfdff
const bitslice_t filter2_1 = f_b_bs(state[-2 + 10], state[-2 + 14], state[-2 + 16], state[-2 + 17]);
const bitslice_t filter2_2 = f_b_bs(state[-2 + 19], state[-2 + 23], state[-2 + 25], state[-2 + 28]);
const bitslice_t filter2_4 = f_a_bs(state[-2 + 36], state[-2 + 45], state[-2 + 46], state[-2 + 48]);
const bitslice_t filter2 = f_c_bs(filter2_0, filter2_1, filter2_2, filter2_3, filter2_4);
const bitslice_t results2 = bs_res(results1,filter2,keystream[2]);
if (!results2) continue;
state[-2 + 50] = lfsr_bs(2);
const bitslice_t filter3_3 = f_b_bs(state[-2 + 31], state[-2 + 32], state[-2 + 34], state[-2 + 36]);
const bitslice_t filter4_0 = f_a_bs(state[-2 + 6], state[-2 + 7], state[-2 + 9], state[-2 + 10]);
const bitslice_t filter4_1 = f_b_bs(state[-2 + 12], state[-2 + 16], state[-2 + 18], state[-2 + 19]);
const bitslice_t filter4_2 = f_b_bs(state[-2 + 21], state[-2 + 25], state[-2 + 27], state[-2 + 30]);
const bitslice_t filter7_0 = f_a_bs(state[-2 + 9], state[-2 + 10], state[-2 + 12], state[-2 + 13]);
const bitslice_t filter7_1 = f_b_bs(state[-2 + 15], state[-2 + 19], state[-2 + 21], state[-2 + 22]);
const bitslice_t filter8_2 = f_b_bs(state[-2 + 25], state[-2 + 29], state[-2 + 31], state[-2 + 34]);
const bitslice_t filter10_1 = f_b_bs(state[-2 + 18], state[-2 + 22], state[-2 + 24], state[-2 + 25]);
const bitslice_t filter10_2 = f_b_bs(state[-2 + 27], state[-2 + 31], state[-2 + 33], state[-2 + 36]);
const bitslice_t filter11_1 = f_b_bs(state[-2 + 19], state[-2 + 23], state[-2 + 25], state[-2 + 26]);
const bitslice_t filter13_0 = f_a_bs(state[-2 + 15], state[-2 + 16], state[-2 + 18], state[-2 + 19]);
const bitslice_t filter13_1 = f_b_bs(state[-2 + 21], state[-2 + 25], state[-2 + 27], state[-2 + 28]);
const bitslice_t filter16_0 = f_a_bs(state[-2 + 18], state[-2 + 19], state[-2 + 21], state[-2 + 22]);
const bitslice_t filter16_3 = f_b_bs(state[-2 + 44], state[-2 + 45], state[-2 + 47], state[-2 + 49]);
const bitslice_t filter17_1 = f_b_bs(state[-2 + 25], state[-2 + 29], state[-2 + 31], state[-2 + 32]);
const bitslice_t filter17_3 = f_b_bs(state[-2 + 45], state[-2 + 46], state[-2 + 48], state[-2 + 50]);
for (uint i3 = 0; i3 < 8; i3++) {
state[-2 + 11] = -((bool)(i3 & 0x1));
state[-2 + 20] = -((bool)(i3 & 0x2));
state[-2 + 37] = -((bool)(i3 & 0x4));
// 0xff07ffffffff
const bitslice_t filter3_1 = f_b_bs(state[-2 + 11], state[-2 + 15], state[-2 + 17], state[-2 + 18]);
const bitslice_t filter3_2 = f_b_bs(state[-2 + 20], state[-2 + 24], state[-2 + 26], state[-2 + 29]);
const bitslice_t filter3_4 = f_a_bs(state[-2 + 37], state[-2 + 46], state[-2 + 47], state[-2 + 49]);
const bitslice_t filter3 = f_c_bs(filter3_0, filter3_1, filter3_2, filter3_3, filter3_4);
const bitslice_t results3 = bs_res(results2,filter3,keystream[3]);
if (!results3) continue;
state[-2 + 51] = lfsr_bs(3);
state[-2 + 52] = lfsr_bs(4);
state[-2 + 53] = lfsr_bs(5);
state[-2 + 54] = lfsr_bs(6);
state[-2 + 55] = lfsr_bs(7);
const bitslice_t filter4_3 = f_b_bs(state[-2 + 32], state[-2 + 33], state[-2 + 35], state[-2 + 37]);
const bitslice_t filter5_0 = f_a_bs(state[-2 + 7], state[-2 + 8], state[-2 + 10], state[-2 + 11]);
const bitslice_t filter5_1 = f_b_bs(state[-2 + 13], state[-2 + 17], state[-2 + 19], state[-2 + 20]);
const bitslice_t filter6_0 = f_a_bs(state[-2 + 8], state[-2 + 9], state[-2 + 11], state[-2 + 12]);
const bitslice_t filter6_1 = f_b_bs(state[-2 + 14], state[-2 + 18], state[-2 + 20], state[-2 + 21]);
const bitslice_t filter8_0 = f_a_bs(state[-2 + 10], state[-2 + 11], state[-2 + 13], state[-2 + 14]);
const bitslice_t filter8_1 = f_b_bs(state[-2 + 16], state[-2 + 20], state[-2 + 22], state[-2 + 23]);
const bitslice_t filter9_0 = f_a_bs(state[-2 + 11], state[-2 + 12], state[-2 + 14], state[-2 + 15]);
const bitslice_t filter9_4 = f_a_bs(state[-2 + 43], state[-2 + 52], state[-2 + 53], state[-2 + 55]);
const bitslice_t filter11_2 = f_b_bs(state[-2 + 28], state[-2 + 32], state[-2 + 34], state[-2 + 37]);
const bitslice_t filter12_1 = f_b_bs(state[-2 + 20], state[-2 + 24], state[-2 + 26], state[-2 + 27]);
const bitslice_t filter14_0 = f_a_bs(state[-2 + 16], state[-2 + 17], state[-2 + 19], state[-2 + 20]);
const bitslice_t filter15_0 = f_a_bs(state[-2 + 17], state[-2 + 18], state[-2 + 20], state[-2 + 21]);
const bitslice_t filter17_0 = f_a_bs(state[-2 + 19], state[-2 + 20], state[-2 + 22], state[-2 + 23]);
for (uint i4 = 0; i4 < 2; i4++) {
state[-2 + 38] = -i4;
// 0xff87ffffffff
const bitslice_t filter4_4 = f_a_bs(state[-2 + 38], state[-2 + 47], state[-2 + 48], state[-2 + 50]);
const bitslice_t filter4 = f_c_bs(filter4_0, filter4_1, filter4_2, filter4_3, filter4_4);
const bitslice_t results4 = bs_res(results3,filter4,keystream[4]);
if (!results4) continue;
state[-2 + 56] = lfsr_bs(8);
const bitslice_t filter5_3 = f_b_bs(state[-2 + 33], state[-2 + 34], state[-2 + 36], state[-2 + 38]);
const bitslice_t filter10_4 = f_a_bs(state[-2 + 44], state[-2 + 53], state[-2 + 54], state[-2 + 56]);
const bitslice_t filter12_2 = f_b_bs(state[-2 + 29], state[-2 + 33], state[-2 + 35], state[-2 + 38]);
for (uint i5 = 0; i5 < 2; i5++) {
state[-2 + 39] = -i5;
// 0xffc7ffffffff
const bitslice_t filter5_4 = f_a_bs(state[-2 + 39], state[-2 + 48], state[-2 + 49], state[-2 + 51]);
const bitslice_t filter5 = f_c_bs(filter5_0, filter5_1, filter5_2, filter5_3, filter5_4);
const bitslice_t results5 = bs_res(results4,filter5,keystream[5]);
if (!results5) continue;
state[-2 + 57] = lfsr_bs(9);
const bitslice_t filter6_3 = f_b_bs(state[-2 + 34], state[-2 + 35], state[-2 + 37], state[-2 + 39]);
const bitslice_t filter11_4 = f_a_bs(state[-2 + 45], state[-2 + 54], state[-2 + 55], state[-2 + 57]);
const bitslice_t filter13_2 = f_b_bs(state[-2 + 30], state[-2 + 34], state[-2 + 36], state[-2 + 39]);
for (uint i6 = 0; i6 < 2; i6++) {
state[-2 + 40] = -i6;
// 0xffe7ffffffff
const bitslice_t filter6_4 = f_a_bs(state[-2 + 40], state[-2 + 49], state[-2 + 50], state[-2 + 52]);
const bitslice_t filter6 = f_c_bs(filter6_0, filter6_1, filter6_2, filter6_3, filter6_4);
const bitslice_t results6 = bs_res(results5,filter6,keystream[6]);
if (!results6) continue;
state[-2 + 58] = lfsr_bs(10);
const bitslice_t filter7_3 = f_b_bs(state[-2 + 35], state[-2 + 36], state[-2 + 38], state[-2 + 40]);
const bitslice_t filter12_4 = f_a_bs(state[-2 + 46], state[-2 + 55], state[-2 + 56], state[-2 + 58]);
const bitslice_t filter14_2 = f_b_bs(state[-2 + 31], state[-2 + 35], state[-2 + 37], state[-2 + 40]);
const bitslice_t filter17_2 = f_b_bs(state[-2 + 34], state[-2 + 38], state[-2 + 40], state[-2 + 43]);
#pragma unroll
for (uint i7 = 0; i7 < 2; i7++) {
state[-2 + 41] = -i7;
// 0xfff7ffffffff
const bitslice_t filter7_4 = f_a_bs(state[-2 + 41], state[-2 + 50], state[-2 + 51], state[-2 + 53]);
const bitslice_t filter7 = f_c_bs(filter7_0, filter7_1, filter7_2, filter7_3, filter7_4);
const bitslice_t results7 = bs_res(results6,filter7,keystream[7]);
if (!results7) continue;
state[-2 + 59] = lfsr_bs(11);
const bitslice_t filter8_3 = f_b_bs(state[-2 + 36], state[-2 + 37], state[-2 + 39], state[-2 + 41]);
const bitslice_t filter10_3 = f_b_bs(state[-2 + 38], state[-2 + 39], state[-2 + 41], state[-2 + 43]);
const bitslice_t filter10 = f_c_bs(filter10_0, filter10_1, filter10_2, filter10_3, filter10_4);
const bitslice_t filter12_3 = f_b_bs(state[-2 + 40], state[-2 + 41], state[-2 + 43], state[-2 + 45]);
const bitslice_t filter12 = f_c_bs(filter12_0, filter12_1, filter12_2, filter12_3, filter12_4);
const bitslice_t filter13_4 = f_a_bs(state[-2 + 47], state[-2 + 56], state[-2 + 57], state[-2 + 59]);
const bitslice_t filter15_2 = f_b_bs(state[-2 + 32], state[-2 + 36], state[-2 + 38], state[-2 + 41]);
#pragma unroll
for (uint i8 = 0; i8 < 2; i8++) {
state[-2 + 42] = -i8;
// 0xffffffffffff
const bitslice_t filter8_4 = f_a_bs(state[-2 + 42], state[-2 + 51], state[-2 + 52], state[-2 + 54]);
const bitslice_t filter8 = f_c_bs(filter8_0, filter8_1, filter8_2, filter8_3, filter8_4);
bitslice_t results8 = bs_res(results7,filter8,keystream[8]);
if (!results8) continue;
const bitslice_t filter9_3 = f_b_bs(state[-2 + 37], state[-2 + 38], state[-2 + 40], state[-2 + 42]);
const bitslice_t filter9 = f_c_bs(filter9_0, filter9_1, filter9_2, filter9_3, filter9_4);
results8 &= (filter9 ^ keystream[9]);
if (!results8) continue;
results8 &= (filter10 ^ keystream[10]);
if (!results8) continue;
const bitslice_t filter11_3 = f_b_bs(state[-2 + 39], state[-2 + 40], state[-2 + 42], state[-2 + 44]);
const bitslice_t filter11 = f_c_bs(filter11_0, filter11_1, filter11_2, filter11_3, filter11_4);
results8 &= (filter11 ^ keystream[11]);
if (!results8) continue;
results8 &= (filter12 ^ keystream[12]);
if (!results8) continue;
const bitslice_t filter13_3 = f_b_bs(state[-2 + 41], state[-2 + 42], state[-2 + 44], state[-2 + 46]);
const bitslice_t filter13 = f_c_bs(filter13_0, filter13_1, filter13_2, filter13_3, filter13_4);
results8 &= (filter13 ^ keystream[13]);
if (!results8) continue;
state[-2 + 60] = lfsr_bs(12);
const bitslice_t filter14_3 = f_b_bs(state[-2 + 42], state[-2 + 43], state[-2 + 45], state[-2 + 47]);
const bitslice_t filter14_4 = f_a_bs(state[-2 + 48], state[-2 + 57], state[-2 + 58], state[-2 + 60]);
const bitslice_t filter14 = f_c_bs(filter14_0, filter14_1, filter14_2, filter14_3, filter14_4);
results8 &= (filter14 ^ keystream[14]);
if (!results8) continue;
state[-2 + 61] = lfsr_bs(13);
const bitslice_t filter15_4 = f_a_bs(state[-2 + 49], state[-2 + 58], state[-2 + 59], state[-2 + 61]);
const bitslice_t filter15 = f_c_bs(filter15_0, filter15_1, filter15_2, filter15_3, filter15_4);
results8 &= (filter15 ^ keystream[15]);
if (!results8) continue;
state[-2 + 62] = lfsr_bs(14);
const bitslice_t filter16_2 = f_b_bs(state[-2 + 33], state[-2 + 37], state[-2 + 39], state[-2 + 42]);
const bitslice_t filter16_4 = f_a_bs(state[-2 + 50], state[-2 + 59], state[-2 + 60], state[-2 + 62]);
const bitslice_t filter16 = f_c_bs(filter16_0, filter16_1, filter16_2, filter16_3, filter16_4);
results8 &= (filter16 ^ keystream[16]);
if (!results8) continue;
state[-2 + 63] = lfsr_bs(15);
const bitslice_t filter17_4 = f_a_bs(state[-2 + 51], state[-2 + 60], state[-2 + 61], state[-2 + 63]);
const bitslice_t filter17 = f_c_bs(filter17_0, filter17_1, filter17_2, filter17_3, filter17_4);
results8 &= (filter17 ^ keystream[17]);
if (!results8) continue;
state[-2 + 64] = lfsr_bs(16);
const bitslice_t filter18_0 = f_a_bs(state[-2 + 20], state[-2 + 21], state[-2 + 23], state[-2 + 24]);
const bitslice_t filter18_1 = f_b_bs(state[-2 + 26], state[-2 + 30], state[-2 + 32], state[-2 + 33]);
const bitslice_t filter18_2 = f_b_bs(state[-2 + 35], state[-2 + 39], state[-2 + 41], state[-2 + 44]);
const bitslice_t filter18_3 = f_b_bs(state[-2 + 46], state[-2 + 47], state[-2 + 49], state[-2 + 51]);
const bitslice_t filter18_4 = f_a_bs(state[-2 + 52], state[-2 + 61], state[-2 + 62], state[-2 + 64]);
const bitslice_t filter18 = f_c_bs(filter18_0, filter18_1, filter18_2, filter18_3, filter18_4);
results8 &= (filter18 ^ keystream[18]);
if (!results8) continue;
state[-2 + 65] = lfsr_bs(17);
const bitslice_t filter19_0 = f_a_bs(state[-2 + 21], state[-2 + 22], state[-2 + 24], state[-2 + 25]);
const bitslice_t filter19_1 = f_b_bs(state[-2 + 27], state[-2 + 31], state[-2 + 33], state[-2 + 34]);
const bitslice_t filter19_2 = f_b_bs(state[-2 + 36], state[-2 + 40], state[-2 + 42], state[-2 + 45]);
const bitslice_t filter19_3 = f_b_bs(state[-2 + 47], state[-2 + 48], state[-2 + 50], state[-2 + 52]);
const bitslice_t filter19_4 = f_a_bs(state[-2 + 53], state[-2 + 62], state[-2 + 63], state[-2 + 65]);
const bitslice_t filter19 = f_c_bs(filter19_0, filter19_1, filter19_2, filter19_3, filter19_4);
results8 &= (filter19 ^ keystream[19]);
if (!results8) continue;
state[-2 + 66] = lfsr_bs(18);
const bitslice_t filter20_0 = f_a_bs(state[-2 + 22], state[-2 + 23], state[-2 + 25], state[-2 + 26]);
const bitslice_t filter20_1 = f_b_bs(state[-2 + 28], state[-2 + 32], state[-2 + 34], state[-2 + 35]);
const bitslice_t filter20_2 = f_b_bs(state[-2 + 37], state[-2 + 41], state[-2 + 43], state[-2 + 46]);
const bitslice_t filter20_3 = f_b_bs(state[-2 + 48], state[-2 + 49], state[-2 + 51], state[-2 + 53]);
const bitslice_t filter20_4 = f_a_bs(state[-2 + 54], state[-2 + 63], state[-2 + 64], state[-2 + 66]);
const bitslice_t filter20 = f_c_bs(filter20_0, filter20_1, filter20_2, filter20_3, filter20_4);
results8 &= (filter20 ^ keystream[20]);
if (!results8) continue;
state[-2 + 67] = lfsr_bs(19);
const bitslice_t filter21_0 = f_a_bs(state[-2 + 23], state[-2 + 24], state[-2 + 26], state[-2 + 27]);
const bitslice_t filter21_1 = f_b_bs(state[-2 + 29], state[-2 + 33], state[-2 + 35], state[-2 + 36]);
const bitslice_t filter21_2 = f_b_bs(state[-2 + 38], state[-2 + 42], state[-2 + 44], state[-2 + 47]);
const bitslice_t filter21_3 = f_b_bs(state[-2 + 49], state[-2 + 50], state[-2 + 52], state[-2 + 54]);
const bitslice_t filter21_4 = f_a_bs(state[-2 + 55], state[-2 + 64], state[-2 + 65], state[-2 + 67]);
const bitslice_t filter21 = f_c_bs(filter21_0, filter21_1, filter21_2, filter21_3, filter21_4);
results8 &= (filter21 ^ keystream[21]);
if (!results8) continue;
state[-2 + 68] = lfsr_bs(20);
const bitslice_t filter22_0 = f_a_bs(state[-2 + 24], state[-2 + 25], state[-2 + 27], state[-2 + 28]);
const bitslice_t filter22_1 = f_b_bs(state[-2 + 30], state[-2 + 34], state[-2 + 36], state[-2 + 37]);
const bitslice_t filter22_2 = f_b_bs(state[-2 + 39], state[-2 + 43], state[-2 + 45], state[-2 + 48]);
const bitslice_t filter22_3 = f_b_bs(state[-2 + 50], state[-2 + 51], state[-2 + 53], state[-2 + 55]);
const bitslice_t filter22_4 = f_a_bs(state[-2 + 56], state[-2 + 65], state[-2 + 66], state[-2 + 68]);
const bitslice_t filter22 = f_c_bs(filter22_0, filter22_1, filter22_2, filter22_3, filter22_4);
results8 &= (filter22 ^ keystream[22]);
if (!results8) continue;
state[-2 + 69] = lfsr_bs(21);
const bitslice_t filter23_0 = f_a_bs(state[-2 + 25], state[-2 + 26], state[-2 + 28], state[-2 + 29]);
const bitslice_t filter23_1 = f_b_bs(state[-2 + 31], state[-2 + 35], state[-2 + 37], state[-2 + 38]);
const bitslice_t filter23_2 = f_b_bs(state[-2 + 40], state[-2 + 44], state[-2 + 46], state[-2 + 49]);
const bitslice_t filter23_3 = f_b_bs(state[-2 + 51], state[-2 + 52], state[-2 + 54], state[-2 + 56]);
const bitslice_t filter23_4 = f_a_bs(state[-2 + 57], state[-2 + 66], state[-2 + 67], state[-2 + 69]);
const bitslice_t filter23 = f_c_bs(filter23_0, filter23_1, filter23_2, filter23_3, filter23_4);
results8 &= (filter23 ^ keystream[23]);
if (!results8) continue;
state[-2 + 70] = lfsr_bs(22);
const bitslice_t filter24_0 = f_a_bs(state[-2 + 26], state[-2 + 27], state[-2 + 29], state[-2 + 30]);
const bitslice_t filter24_1 = f_b_bs(state[-2 + 32], state[-2 + 36], state[-2 + 38], state[-2 + 39]);
const bitslice_t filter24_2 = f_b_bs(state[-2 + 41], state[-2 + 45], state[-2 + 47], state[-2 + 50]);
const bitslice_t filter24_3 = f_b_bs(state[-2 + 52], state[-2 + 53], state[-2 + 55], state[-2 + 57]);
const bitslice_t filter24_4 = f_a_bs(state[-2 + 58], state[-2 + 67], state[-2 + 68], state[-2 + 70]);
const bitslice_t filter24 = f_c_bs(filter24_0, filter24_1, filter24_2, filter24_3, filter24_4);
results8 &= (filter24 ^ keystream[24]);
if (!results8) continue;
state[-2 + 71] = lfsr_bs(23);
const bitslice_t filter25_0 = f_a_bs(state[-2 + 27], state[-2 + 28], state[-2 + 30], state[-2 + 31]);
const bitslice_t filter25_1 = f_b_bs(state[-2 + 33], state[-2 + 37], state[-2 + 39], state[-2 + 40]);
const bitslice_t filter25_2 = f_b_bs(state[-2 + 42], state[-2 + 46], state[-2 + 48], state[-2 + 51]);
const bitslice_t filter25_3 = f_b_bs(state[-2 + 53], state[-2 + 54], state[-2 + 56], state[-2 + 58]);
const bitslice_t filter25_4 = f_a_bs(state[-2 + 59], state[-2 + 68], state[-2 + 69], state[-2 + 71]);
const bitslice_t filter25 = f_c_bs(filter25_0, filter25_1, filter25_2, filter25_3, filter25_4);
results8 &= (filter25 ^ keystream[25]);
if (!results8) continue;
state[-2 + 72] = lfsr_bs(24);
const bitslice_t filter26_0 = f_a_bs(state[-2 + 28], state[-2 + 29], state[-2 + 31], state[-2 + 32]);
const bitslice_t filter26_1 = f_b_bs(state[-2 + 34], state[-2 + 38], state[-2 + 40], state[-2 + 41]);
const bitslice_t filter26_2 = f_b_bs(state[-2 + 43], state[-2 + 47], state[-2 + 49], state[-2 + 52]);
const bitslice_t filter26_3 = f_b_bs(state[-2 + 54], state[-2 + 55], state[-2 + 57], state[-2 + 59]);
const bitslice_t filter26_4 = f_a_bs(state[-2 + 60], state[-2 + 69], state[-2 + 70], state[-2 + 72]);
const bitslice_t filter26 = f_c_bs(filter26_0, filter26_1, filter26_2, filter26_3, filter26_4);
results8 &= (filter26 ^ keystream[26]);
if (!results8) continue;
state[-2 + 73] = lfsr_bs(25);
const bitslice_t filter27_0 = f_a_bs(state[-2 + 29], state[-2 + 30], state[-2 + 32], state[-2 + 33]);
const bitslice_t filter27_1 = f_b_bs(state[-2 + 35], state[-2 + 39], state[-2 + 41], state[-2 + 42]);
const bitslice_t filter27_2 = f_b_bs(state[-2 + 44], state[-2 + 48], state[-2 + 50], state[-2 + 53]);
const bitslice_t filter27_3 = f_b_bs(state[-2 + 55], state[-2 + 56], state[-2 + 58], state[-2 + 60]);
const bitslice_t filter27_4 = f_a_bs(state[-2 + 61], state[-2 + 70], state[-2 + 71], state[-2 + 73]);
const bitslice_t filter27 = f_c_bs(filter27_0, filter27_1, filter27_2, filter27_3, filter27_4);
results8 &= (filter27 ^ keystream[27]);
if (!results8) continue;
state[-2 + 74] = lfsr_bs(26);
const bitslice_t filter28_0 = f_a_bs(state[-2 + 30], state[-2 + 31], state[-2 + 33], state[-2 + 34]);
const bitslice_t filter28_1 = f_b_bs(state[-2 + 36], state[-2 + 40], state[-2 + 42], state[-2 + 43]);
const bitslice_t filter28_2 = f_b_bs(state[-2 + 45], state[-2 + 49], state[-2 + 51], state[-2 + 54]);
const bitslice_t filter28_3 = f_b_bs(state[-2 + 56], state[-2 + 57], state[-2 + 59], state[-2 + 61]);
const bitslice_t filter28_4 = f_a_bs(state[-2 + 62], state[-2 + 71], state[-2 + 72], state[-2 + 74]);
const bitslice_t filter28 = f_c_bs(filter28_0, filter28_1, filter28_2, filter28_3, filter28_4);
results8 &= (filter28 ^ keystream[28]);
if (!results8) continue;
state[-2 + 75] = lfsr_bs(27);
const bitslice_t filter29_0 = f_a_bs(state[-2 + 31], state[-2 + 32], state[-2 + 34], state[-2 + 35]);
const bitslice_t filter29_1 = f_b_bs(state[-2 + 37], state[-2 + 41], state[-2 + 43], state[-2 + 44]);
const bitslice_t filter29_2 = f_b_bs(state[-2 + 46], state[-2 + 50], state[-2 + 52], state[-2 + 55]);
const bitslice_t filter29_3 = f_b_bs(state[-2 + 57], state[-2 + 58], state[-2 + 60], state[-2 + 62]);
const bitslice_t filter29_4 = f_a_bs(state[-2 + 63], state[-2 + 72], state[-2 + 73], state[-2 + 75]);
const bitslice_t filter29 = f_c_bs(filter29_0, filter29_1, filter29_2, filter29_3, filter29_4);
results8 &= (filter29 ^ keystream[29]);
if (!results8) continue;
state[-2 + 76] = lfsr_bs(28);
const bitslice_t filter30_0 = f_a_bs(state[-2 + 32], state[-2 + 33], state[-2 + 35], state[-2 + 36]);
const bitslice_t filter30_1 = f_b_bs(state[-2 + 38], state[-2 + 42], state[-2 + 44], state[-2 + 45]);
const bitslice_t filter30_2 = f_b_bs(state[-2 + 47], state[-2 + 51], state[-2 + 53], state[-2 + 56]);
const bitslice_t filter30_3 = f_b_bs(state[-2 + 58], state[-2 + 59], state[-2 + 61], state[-2 + 63]);
const bitslice_t filter30_4 = f_a_bs(state[-2 + 64], state[-2 + 73], state[-2 + 74], state[-2 + 76]);
const bitslice_t filter30 = f_c_bs(filter30_0, filter30_1, filter30_2, filter30_3, filter30_4);
results8 &= (filter30 ^ keystream[30]);
if (!results8) continue;
state[-2 + 77] = lfsr_bs(29);
const bitslice_t filter31_0 = f_a_bs(state[-2 + 33], state[-2 + 34], state[-2 + 36], state[-2 + 37]);
const bitslice_t filter31_1 = f_b_bs(state[-2 + 39], state[-2 + 43], state[-2 + 45], state[-2 + 46]);
const bitslice_t filter31_2 = f_b_bs(state[-2 + 48], state[-2 + 52], state[-2 + 54], state[-2 + 57]);
const bitslice_t filter31_3 = f_b_bs(state[-2 + 59], state[-2 + 60], state[-2 + 62], state[-2 + 64]);
const bitslice_t filter31_4 = f_a_bs(state[-2 + 65], state[-2 + 74], state[-2 + 75], state[-2 + 77]);
const bitslice_t filter31 = f_c_bs(filter31_0, filter31_1, filter31_2, filter31_3, filter31_4);
results8 &= (filter31 ^ keystream[31]);
if (!results8) continue;
for (uint match_index = 0; match_index < MAX_BITSLICES && results8;) {
const uint shift = clz(results8) + 1;
match_index += shift;
#ifdef WITH_HITAG2_FULL
ulong state_check = unbitslice (&state[-2 + 2], MAX_BITSLICES - match_index);
// 2 rollback
state_check = (ulong)(((state_check << 1) & 0xffffffffffff) | (ulong)fnR(state_check));
state_check = (ulong)(((state_check << 1) & 0xffffffffffff) | (ulong)fnR(state_check));
// recover key
ulong keyrev = state_check & 0xffff;
ulong nR1xk = (state_check >> 16) & 0xffffffff;
uint b = 0;
for (uint i = 0; i < 32; i++) {
state_check = ((state_check) << 1) | ((checks[0] >> (31 - i)) & 0x1);
b = (b << 1) | fnf (state_check);
}
keyrev |= (nR1xk ^ checks[2] ^ b) << 16;
ulong lfsr = 0;
// test key
hitag2_init2 (&state_check, &lfsr, keyrev, checks[0], checks[3]);
if ((checks[1] ^ hitag2_nstep2 (state_check, lfsr)) == 0xffffffff)
{
// there can be only one (Highlander) :P
matches[atomic_inc(matches_found)] = rev64 (keyrev);
return;
}
#else
// take the state from layer 2 so we can recover the lowest 2 bits on the host by inverting the LFSR
matches[atomic_inc(matches_found)] = unbitslice (&state[-2 + 2], MAX_BITSLICES - match_index);
#endif // WITH_HITAG2_FULL
results8 <<= shift;
} // key check
} // 8
} // 7
} // 6
} // 5
} // 4
} // 3
} // 2
} // 1

@ -0,0 +1,163 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "opencl.h"
bool plat_dev_enabled (unsigned int id, unsigned int *sel, unsigned int cnt, unsigned int cur_type, unsigned int allow_type)
{
// usefull only with devices
if (allow_type != CL_DEVICE_TYPE_ALL)
{
if (cur_type != allow_type) return false;
}
if (sel[0] == 0xff) return true; // all
else
{
for (unsigned int i = 0; i < cnt; i++)
{
if (sel[i] == (id+1)) return true;
}
}
return false;
}
int runKernel (opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id)
{
int err = 0;
size_t global_ws[3] = { ctx->global_ws[id], GLOBAL_WS_1, GLOBAL_WS_2 };
size_t local_ws[3] = { ctx->local_ws[id], 1, 1 };
if (ctx->profiling)
{
printf ("[%zu] global_ws %zu, ctx->local_ws: %zu\n", id, global_ws[0], local_ws[0]);
fflush (stdout);
}
*matches_found = 0;
// Write our data set into the input array in device memory
err = clEnqueueWriteBuffer (ctx->commands[id], ctx->matches_found[id], CL_TRUE, 0, sizeof(uint32_t), matches_found, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clEnqueueWriteBuffer (matches_found) failed (%d)\n", id, err);
return -1;
}
// Set the arguments to our compute kernel
err = clSetKernelArg (ctx->kernels[id], 0, sizeof(uint32_t), &cand_base);
err |= clSetKernelArg (ctx->kernels[id], 4, sizeof(cl_mem), &ctx->matches_found[id]);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clSetKernelArg (cand_base|ctx->matches_found) failed (%d)\n", id, err);
return -1;
}
cl_event event;
err = clEnqueueNDRangeKernel (ctx->commands[id], ctx->kernels[id], 2, NULL, global_ws, local_ws, 0, NULL, &event);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clEnqueueNDRangeKernel() failed (%d)\n", id, err);
return -1;
}
// todo, check if is possible remove
err = clFlush (ctx->commands[id]);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clFlush() failed (%d)\n", id, err);
return -1;
}
if (ctx->profiling)
{
err = clWaitForEvents (1, &event);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clWaitForEvents() failed (%d)\n", id, err);
return -1;
}
cl_ulong gpu_t_start = 0, gpu_t_end = 0;
err = clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &gpu_t_start, NULL);
err |= clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &gpu_t_end, NULL);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clGetEventOPENCL_PROFILINGInfo() failed (%d)\n", id, err);
return -1;
}
const double time_ms = (double) (gpu_t_end - gpu_t_start) / 1000000;
printf ("[%zu] kernel exec time (ms): %.2f]\n", id, time_ms);
fflush (stdout);
}
err = clReleaseEvent (event);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clReleaseEvent() failed (%d)\n", id, err);
return -1;
}
// Wait for the command commands to get serviced before reading back results
// todo, check if is possible remove, because of blocking clEnqueueReadBuffer (CL_TRUE)
err = clFinish (ctx->commands[id]);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clFinish() failed (%d)\n", id, err);
return -1;
}
// read back the matches counter first
err = clEnqueueReadBuffer (ctx->commands[id], ctx->matches_found[id], CL_TRUE, 0, sizeof(uint32_t), matches_found, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clEnqueueReadBuffer(matches_found) failed (%d)\n", id, err);
return -1;
}
if (matches_found[0] > 0)
{
if (ctx->force_hitag2_opencl)
{
if (matches_found[0] != 1) printf ("[%zu] BUG: if match the counter must be 1. Here %u are founds\n", id, matches_found[0]);
}
else
{
if (matches_found[0] > (uint32_t)(ctx->global_ws[id]*WGS_MATCHES_FACTOR))
{
printf ("[%zu] BUG: the next clEnqueueReadBuffer will crash. 'matches' buffer (%u) is lower than requested (%u)\n", id, (uint32_t)(ctx->global_ws[id]*WGS_MATCHES_FACTOR), matches_found[0]);
}
}
err = clEnqueueReadBuffer(ctx->commands[id], ctx->matches[id], CL_TRUE, 0, sizeof(uint64_t) * matches_found[0], matches, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf ("[%zu] Error: clEnqueueReadBuffer(matches) failed (%d)\n", id, err);
return -1;
}
// key found
return 1;
}
// nada
return 0;
}

@ -0,0 +1,129 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#ifndef OPENCL_H
#define OPENCL_H
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#define CL_TARGET_OPENCL_VERSION 220
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl.h>
#endif
#include "ht2crack5opencl.h"
#include "queue.h"
#include <stdbool.h>
#include <stdio.h>
// max number of concurrent devices (tested up to 4x RTX 3090)
#define MAX_OPENCL_DEVICES 16
// defines structures
typedef struct compute_device_ctx
{
char name[0xff];
char vendor[0x40];
char version[0x40];
char driver_version[0x40];
bool is_gpu, is_apple_gpu, is_nv;
bool have_lop3, have_local_memory;
bool warning, unsupported;
bool selected;
bool enabled;
unsigned char pad1[4];
unsigned int sm_maj;
unsigned int sm_min;
unsigned int compute_units;
cl_device_id device_id;
cl_platform_id platform_id;
} compute_device_ctx_t;
typedef struct compute_platform_ctx
{
unsigned int device_cnt;
unsigned int compute_units_max;
bool is_nv, is_apple, is_intel;
bool warning;
bool selected;
unsigned char pad1[3];
compute_device_ctx_t device[0x10];
char name[0xff];
char vendor[0x40];
char version[0x40];
unsigned char pad2[1];
cl_platform_id platform_id;
cl_context context;
cl_program program;
} compute_platform_ctx_t;
typedef struct opencl_ctx
{
char *kernelSource[1];
size_t kernelSource_len;
size_t *global_ws;
size_t *local_ws;
int *profiles;
cl_device_id *device_ids; // compute device id's array
cl_context *contexts; // compute contexts
cl_command_queue *commands; // compute command queue (for each device)
cl_program *programs; // compute program's
cl_kernel *kernels; // compute kernel's
// cl_mem cand_base; // device memory used for the candidate base
cl_mem *keystreams; // device memory used for the keystream array
cl_mem *candidates; // device memory used for the candidates array
cl_mem *matches; // device memory used for the matches array
cl_mem *matches_found; // device memory used for the matches_found array
cl_mem *checks; // device memory used for uid, aR2, nR1, nR2
wu_queue_ctx_t queue_ctx;
bool profiling;
unsigned char pad2[1];
short thread_sched_type;
bool force_hitag2_opencl;
unsigned char pad3[3];
} opencl_ctx_t;
bool plat_dev_enabled (unsigned int id, unsigned int *sel, unsigned int cnt, unsigned int cur_type, unsigned int allow_type);
int runKernel (opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id);
#endif // OPENCL_H

@ -0,0 +1,551 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "queue.h"
#if TEST_UNIT == 1
int wu_queue_print (wu_queue_ctx_t *ctx)
{
wu_queue_item_t *ptr = 0; //NULL;
size_t sum = 0;
int ret = -1;
if (!ctx) return -1;
if (!ctx->init) return ERROR_CTX_IS_NOT_INIT;
pthread_mutex_lock (&ctx->queue_mutex);
if ((ret = wu_queue_done (ctx)) != 0)
{
pthread_mutex_unlock (&ctx->queue_mutex);
return ret;
}
switch (ctx->queue_type)
{
case QUEUE_TYPE_FORWARD:
ptr = ctx->queue_head;
printf ("> show queue contents in FORWARD mode, from head\n");
break;
case QUEUE_TYPE_REVERSE:
ptr = ctx->queue_tail;
printf ("> show queue contents in REVERSE mode, from tail\n");
break;
case QUEUE_TYPE_RANDOM:
ptr = ctx->queue_head;
printf ("> show queue contents in RANDOM mode, from head\n");
default:
pthread_mutex_unlock (&ctx->queue_mutex);
return ERROR_QUEUE_TYPE_INVALID;
}
int cnt = 0;
printf ("# Queue size: %zu\n", ctx->queue_size);
do
{
sum += ptr->data.id;
if (cnt++ < 4) printf ("# ID %zu, OFF %zu, MAX %zu\n", ptr->data.id, ptr->data.off, ptr->data.max);
if (ctx->queue_type == QUEUE_TYPE_FORWARD || ctx->queue_type == QUEUE_TYPE_RANDOM)
{
if (!ptr->next) break;
ptr = ptr->next;
}
else if (ctx->queue_type == QUEUE_TYPE_REVERSE)
{
if (!ptr->prev) break;
ptr = ptr->prev;
}
} while (ptr);
if (!ptr)
{
printf ("! Fail: ptr must be not null here\n");
pthread_mutex_unlock (&ctx->queue_mutex);
return -1;
}
switch (ctx->queue_type)
{
case QUEUE_TYPE_RANDOM:
printf ("> show queue contents in RANDOM mode, from end to head\n");
break;
case QUEUE_TYPE_FORWARD:
printf ("> show queue contents in FORWARD mode, from end to head\n");
break;
case QUEUE_TYPE_REVERSE:
printf ("> show queue contents in REVERSE mode, from start to tail\n");
break;
}
cnt = 0;
do
{
sum -= ptr->data.id;
if (cnt++ < 4) printf ("# ID %zu, OFF %zu, MAX %zu\n", ptr->data.id, ptr->data.off, ptr->data.max);
if (ctx->queue_type == QUEUE_TYPE_FORWARD || ctx->queue_type == QUEUE_TYPE_RANDOM)
{
if (!ptr->prev) break;
ptr = ptr->prev;
}
else if (ctx->queue_type == QUEUE_TYPE_REVERSE)
{
if (!ptr->next) break;
ptr = ptr->next;
}
} while (ptr);
if (sum != 0)
{
printf ("! Fail: sum is not zero\n");
pthread_mutex_unlock (&ctx->queue_mutex);
return -1;
}
pthread_mutex_unlock (&ctx->queue_mutex);
return 0;
}
#endif
const char *wu_queue_strerror (int error)
{
switch (error)
{
case QUEUE_EMPTY: return (const char *) "QUERY_EMPTY";
case NO_ERROR: return (const char *) "NO_ERROR";
case ERROR_GENERIC: return (const char *) "ERROR_GENERIC";
case ERROR_QUEUE_TYPE_INVALID: return (const char *) "ERROR_QUEUE_TYPE_INVALID";
case ERROR_CTX_NULL: return (const char *) "ERROR_CTX_NULL";
case ERROR_CTX_IS_INIT: return (const char *) "ERROR_CTX_IS_INIT";
case ERROR_CTX_IS_NOT_INIT: return (const char *) "ERROR_CTX_IS_NOT_INIT";
case ERROR_MUTEXATTR_INIT: return (const char *) "ERROR_MUTEXATTR_INIT";
case ERROR_MUTEXATTR_SETTYPE: return (const char *) "ERROR_MUTEXATTR_SETTYPE";
case ERROR_MUTEX_INIT: return (const char *) "ERROR_MUTEX_INIT";
case ERROR_ALLOC: return (const char *) "ERROR_ALLOC";
case ERROR_UNDEFINED:
default:
return (const char *) "ERROR_UNDEFINED";
}
}
const char *wu_queue_strdesc (wu_queue_type_t type)
{
switch (type)
{
case QUEUE_TYPE_FORWARD: return (const char *) "FORWARD";
case QUEUE_TYPE_REVERSE: return (const char *) "REVERSE";
case QUEUE_TYPE_RANDOM: return (const char *) "RANDOM";
}
return (const char *) "UNKNOWN";
}
int wu_queue_init (wu_queue_ctx_t *ctx, wu_queue_type_t queue_type)
{
#if TEST_UNIT == 1
fprintf (stdout, "[%s] enter\n", __func__);
fflush (stdout);
#endif
if (!ctx) return ERROR_CTX_NULL;
// Conditional jump or move depends on uninitialised value(s). It's good as it
if (ctx->init) return ERROR_CTX_IS_INIT;
if (queue_type == QUEUE_TYPE_RANDOM) srand ((unsigned int) time(0));
else if (queue_type != QUEUE_TYPE_FORWARD && queue_type != QUEUE_TYPE_REVERSE)
{
#if TEST_UNIT == 1
fprintf (stderr, "! Error, invalid 'queue_type'.\n");
#endif
return ERROR_QUEUE_TYPE_INVALID;
}
memset (ctx, 0, sizeof (wu_queue_ctx_t));
ctx->queue_type = queue_type;
ctx->queue_head = 0; //NULL;
ctx->queue_tail = 0; //NULL;
int ret = 0;
if ((ret = pthread_mutexattr_init (&ctx->queue_mutex_attr)) != 0)
{
#if TEST_UNIT == 1
fprintf (stderr, "! Error, pthread_mutexattr_init() failed (%d): %s\n", ret, strerror (ret));
#endif
memset (ctx, 0, sizeof (wu_queue_ctx_t));
return ERROR_MUTEXATTR_INIT;
}
if ((ret = pthread_mutexattr_settype (&ctx->queue_mutex_attr, PTHREAD_MUTEX_ERRORCHECK)) != 0)
{
#if TEST_UNIT == 1
fprintf (stderr, "! Error, pthread_mutexattr_settype(PTHREAD_MUTEX_ERRORCHECK) failed (%d): %s\n", ret, strerror (ret));
#endif
pthread_mutexattr_destroy (&ctx->queue_mutex_attr);
memset (ctx, 0, sizeof (wu_queue_ctx_t));
return ERROR_MUTEXATTR_SETTYPE;
}
if ((ret = pthread_mutex_init (&ctx->queue_mutex, &ctx->queue_mutex_attr)) != 0)
{
#if TEST_UNIT == 1
fprintf (stderr, "! Error, pthread_mutex_init() failed (%d): %s\n", ret, strerror (ret));
#endif
pthread_mutexattr_destroy (&ctx->queue_mutex_attr);
memset (ctx, 0, sizeof (wu_queue_ctx_t));
return ERROR_MUTEX_INIT;
}
ctx->init = 1;
return NO_ERROR;
}
int wu_queue_done (wu_queue_ctx_t *ctx)
{
if (!ctx) return ERROR_CTX_NULL;
if (!ctx->init) return ERROR_CTX_IS_NOT_INIT;
switch (ctx->queue_type)
{
case QUEUE_TYPE_RANDOM: return (ctx->queue_head == NULL);
case QUEUE_TYPE_FORWARD: return (ctx->queue_head == NULL);
case QUEUE_TYPE_REVERSE: return (ctx->queue_tail == NULL);
}
return ERROR_QUEUE_TYPE_INVALID;
}
int wu_queue_push (wu_queue_ctx_t *ctx, size_t id, size_t off, size_t max)
{
if (!ctx) return ERROR_CTX_NULL;
if (!ctx->init) return ERROR_CTX_IS_NOT_INIT;
pthread_mutex_lock (&ctx->queue_mutex);
wu_queue_item_t *ptr = 0; //NULL;
short first = 0;
if (ctx->queue_head == 0) first = 1;
if (!(ptr = (wu_queue_item_t *) malloc (sizeof (wu_queue_item_t))))
{
#if TEST_UNIT == 1
fprintf (stderr, "! Error: malloc() failed (%d): %s\n", errno, strerror (errno));
#endif
pthread_mutex_unlock (&ctx->queue_mutex);
return ERROR_ALLOC;
}
ptr->data.id = id;
ptr->data.off = off;
ptr->data.max = max;
ptr->next = 0; //NULL;
ptr->prev = 0; //NULL;
if (first) {
ctx->queue_head = ptr;
ctx->queue_tail = ptr;
ctx->queue_size++;
pthread_mutex_unlock (&ctx->queue_mutex);
return NO_ERROR;
}
// set tail
ptr->prev = ctx->queue_tail;
ctx->queue_tail = ptr;
// set head
wu_queue_item_t *last = ctx->queue_head;
while (last->next != 0) last = last->next;
last->next = ptr;
ctx->queue_size++;
pthread_mutex_unlock (&ctx->queue_mutex);
return NO_ERROR;
}
int wu_queue_pop (wu_queue_ctx_t *ctx, wu_queue_data_t *wu, short remove)
{
if (!ctx) return ERROR_CTX_NULL;
if (!ctx->init) return ERROR_CTX_IS_NOT_INIT;
int ret = -1;
int rnd = 0;
wu_queue_item_t *ptr = 0, *ptrPrev = 0;
pthread_mutex_lock (&ctx->queue_mutex);
if ((ret = wu_queue_done (ctx)) != 0)
{
#if TEST_UNIT == 1
fprintf (stderr, "ret from wu_queue_done() (%d): %s\n", ret, wu_queue_strerror (ret));
#endif
pthread_mutex_unlock (&ctx->queue_mutex);
return ret;
}
switch (ctx->queue_type)
{
case QUEUE_TYPE_FORWARD:
ptr = ctx->queue_head;
break;
case QUEUE_TYPE_REVERSE:
ptr = ctx->queue_tail;
break;
case QUEUE_TYPE_RANDOM:
ptr = ctx->queue_head;
rnd = rand() % (int) ctx->queue_size;
for (int r = 0; r < rnd; r++)
{
ptrPrev = ptr;
ptr = ptr->next;
}
break;
}
if (!ptr)
{
pthread_mutex_unlock (&ctx->queue_mutex);
return ERROR_GENERIC;
}
if (!remove)
{
wu->id = ptr->data.id;
wu->off = ptr->data.off;
wu->max = ptr->data.max;
}
switch (ctx->queue_type)
{
case QUEUE_TYPE_FORWARD:
ctx->queue_head = (ctx->queue_head)->next;
break;
case QUEUE_TYPE_REVERSE:
ctx->queue_tail = (ctx->queue_tail)->prev;
break;
case QUEUE_TYPE_RANDOM: // from the head
#if TEST_UNIT == 1
fprintf (stdout, "pop id %ld\n", wu->id);
fflush (stdout);
#endif
if (ptrPrev == NULL)
{
ctx->queue_head = (ctx->queue_head)->next;
}
else
{
ptrPrev->next = ptr->next;
}
break;
}
memset (ptr, 0, sizeof (wu_queue_item_t));
free (ptr);
ptr = 0; //NULL;
ctx->queue_size--;
if (!remove) wu->rem = ctx->queue_size;
pthread_mutex_unlock (&ctx->queue_mutex);
return NO_ERROR;
}
int wu_queue_destroy (wu_queue_ctx_t *ctx)
{
#if TEST_UNIT == 1
fprintf (stdout, "[%s] enter\n", __func__);
fflush (stdout);
#endif
if (!ctx) return ERROR_CTX_NULL;
if (!ctx->init) return ERROR_CTX_IS_NOT_INIT;
pthread_mutex_lock (&ctx->queue_mutex);
int ret = -1;
// unload the queue
while ((ret = wu_queue_pop (ctx, 0, 1)) == 0) {};
if (ret != QUEUE_EMPTY)
{
#if TEST_UNIT
fprintf (stderr, "! Error, wu_queue_pop() failed (%d): %s\n", ret, wu_queue_strerror (ret));
#endif
return ret;
}
#if TEST_UNIT == 1
printf ("ret from wu_queue_pop() (%d): %s\n", ret, wu_queue_strerror (ret));
#endif
#if TEST_UNIT == 1
if (ctx->queue_head != 0) fprintf (stderr, "queue_head not null\n");
if (ctx->queue_tail != 0) fprintf (stderr, "queue_tail not null\n");
#endif
ctx->queue_head = 0; //NULL;
ctx->queue_tail = 0; //NULL;
ctx->init = 0;
pthread_mutex_unlock (&ctx->queue_mutex);
pthread_mutex_destroy (&ctx->queue_mutex);
pthread_mutexattr_destroy (&ctx->queue_mutex_attr);
memset (ctx, 0, sizeof (wu_queue_ctx_t));
//ctx = 0; //NULL;
return (ret == 1) ? NO_ERROR : ERROR_GENERIC;
}
#if TEST_UNIT == 1
int main (void)
{
unsigned int profiles[11][2] =
{
{ 16384, 5 }, // 0, best for Intel GPU's with Neo
{ 8192, 6 }, // 1, only for Intel NEO
{ 4096, 7 }, // 2 (old 0) seems the best for all others (also NVIDIA) :D Apple/Intel GPU's stable here
{ 2048, 8 }, // 3 (old 1) usefull for any kind of CPU's
{ 1024, 9 },
{ 512, 10 },
{ 256, 11 },
{ 128, 12 }, // 7, (old 5) the last good value with NVIDIA GPU's
{ 64, 13 },
{ 32, 14 },
{ 16, 15 },
};
size_t err = 0, err_max = 1;
size_t id = 0;
size_t max = profiles[0][0];
size_t chunk = profiles[0][1];
size_t sum = 0;
int i = 0;
wu_queue_ctx_t ctx;
memset (&ctx, 0, sizeof (wu_queue_ctx_t));
printf("Selected the following config: max %zu, chunk %zu\n", max, chunk);
fflush (stdout);
wu_queue_type_t types[4] = { QUEUE_TYPE_FORWARD, QUEUE_TYPE_REVERSE, QUEUE_TYPE_RANDOM, 1234 };
int types_max = (int) (sizeof (types) / sizeof (wu_queue_type_t));
int ret = 0;
for (i = 0; i < types_max; i++)
{
printf ("[%d] trying wu_queue_init() in %s mode\n", i, wu_queue_strdesc (types[i]));
if ((ret = wu_queue_init (&ctx, types[i])) != 0)
{
fprintf (stderr, "[%d] Error: wu_queue_init(%s) failed (%d): %s\n", i, wu_queue_strdesc (types[i]), ret, wu_queue_strerror (ret));
err++;
continue;
}
printf ("[%d] trying wu_queue_push()\n", i);
for (id = 0; id < max; id++)
{
sum += id;
ret = wu_queue_push (&ctx, id, id << chunk, max);
if (ret != 0)
{
fprintf (stderr, "[%d] Error: wu_queue_push(%zu) failed (%d): %s\n", i, id, ret, wu_queue_strerror (ret));
err++;
continue;
}
}
printf("[%d] push sum: %zu\n", i, sum);
if (wu_queue_print (&ctx) == -1)
{
fprintf (stderr, "[%d] wu_queue_print() error\n", i);
err++;
continue;
}
wu_queue_data_t wu;
while ((ret = wu_queue_pop (&ctx, &wu, 0)) == 0) sum -= wu.id;
if (ret != QUEUE_EMPTY)
{
fprintf (stderr, "[%d] Error: wu_queue_pop() failed (%d): %s\n", i, ret, wu_queue_strerror (ret));
err++;
continue;
}
printf("[%d] pop sum: %zu\n", i, sum);
if (sum != 0)
{
fprintf (stderr, "[%d] Fail: sum is not zero (%zu)\n", i, sum);
err++;
continue;
}
if (wu_queue_print (&ctx) == -1)
{
fprintf (stderr, "[%d] wu_queue_print() error\n", i);
err++;
continue;
}
printf ("[%d] trying wu_queue_destroy()\n", i);
if ((ret = wu_queue_destroy (&ctx)) != 0)
{
fprintf (stderr, "! Error: wu_queue_destroy() failed (%d): %s\n", ret, wu_queue_strerror (ret));
err++;
continue;
}
}
printf ("Catched %zu/%zu error(s).\n", err, err_max);
if (err == err_max)
{
printf ("Self-Test pass\n");
return 0;
}
printf ("Self-Test fail\n");
return -1;
}
#endif // TEST_UNIT

@ -0,0 +1,122 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#ifndef QUEUE_H
#define QUEUE_H
// set 1 to enable the test unit
#ifndef TEST_UNIT
#define TEST_UNIT 0
#endif
#if TEST_UNIT == 1
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#endif
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <pthread.h>
// enum errors
typedef enum wu_queue_error
{
QUEUE_EMPTY = 1,
NO_ERROR = 0,
ERROR_GENERIC = -1,
ERROR_QUEUE_TYPE_INVALID = -2,
ERROR_CTX_NULL = -3,
ERROR_CTX_IS_INIT = -4,
ERROR_CTX_IS_NOT_INIT = -5,
ERROR_MUTEXATTR_INIT = -6,
ERROR_MUTEXATTR_SETTYPE = -7,
ERROR_MUTEX_INIT = -8,
ERROR_ALLOC = -9,
ERROR_UNDEFINED = -10
} wu_queue_error_t;
// enum queue types
typedef enum wu_queue_type
{
QUEUE_TYPE_FORWARD = 0,
QUEUE_TYPE_REVERSE,
QUEUE_TYPE_RANDOM
} wu_queue_type_t;
// hold wu data
typedef struct wu_queue_data
{
size_t id;
size_t off;
size_t max;
size_t rem;
} wu_queue_data_t;
// lists
typedef struct wu_queue_item wu_queue_item_t;
struct wu_queue_item
{
wu_queue_data_t data;
wu_queue_item_t *next;
wu_queue_item_t *prev;
};
// main ctx
typedef struct wu_queue_ctx
{
unsigned int init;
wu_queue_type_t queue_type;
wu_queue_item_t *queue_head;
wu_queue_item_t *queue_tail;
size_t queue_size;
// mutex
pthread_mutexattr_t queue_mutex_attr;
// unsigned char pad1[4];
pthread_mutex_t queue_mutex;
} wu_queue_ctx_t;
// exports
int wu_queue_init (wu_queue_ctx_t *ctx, wu_queue_type_t queue_type);
int wu_queue_done (wu_queue_ctx_t *ctx);
int wu_queue_push (wu_queue_ctx_t *ctx, size_t id, size_t off, size_t max);
int wu_queue_pop (wu_queue_ctx_t *ctx, wu_queue_data_t *wu, short remove);
int wu_queue_destroy (wu_queue_ctx_t *ctx);
const char *wu_queue_strdesc (wu_queue_type_t type);
const char *wu_queue_strerror (int error);
#if TEST_UNIT == 1
int wu_queue_print (wu_queue_ctx_t *ctx);
#endif
#endif // QUEUE_H

@ -0,0 +1,730 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "threads.h"
const char *thread_strerror (int error)
{
switch (error)
{
case THREAD_NOERROR: return (const char *) "No error";
}
return (const char *) "GENERIC";
}
int thread_init (thread_ctx_t *ctx, short type, size_t thread_count)
{
if (!ctx) return THREAD_ERROR_CTX_IS_NULL;
if (ctx->init) return THREAD_ERROR_CTX_IS_INIT;
if (type != THREAD_TYPE_ASYNC && type != THREAD_TYPE_SEQ) return THREAD_ERROR_TYPE_INVALID;
if (thread_count == 0) return THREAD_ERROR_COUNT_INVALID;
memset (ctx, 0, sizeof (thread_ctx_t));
ctx->thread_count = thread_count;
ctx->type = type;
ctx->enable_condusleep = (type == THREAD_TYPE_ASYNC && thread_count == 1);
ctx->thread_handles = (pthread_t *) calloc (thread_count, sizeof (pthread_t));
if (!ctx->thread_handles)
{
return THREAD_ERROR_ALLOC;
}
ctx->thread_mutexs = (pthread_mutex_t *) calloc (thread_count, sizeof (pthread_mutex_t));
if (!ctx->thread_mutexs)
{
free (ctx->thread_handles);
return THREAD_ERROR_ALLOC;
}
ctx->thread_conds = (pthread_cond_t *) calloc (thread_count, sizeof (pthread_cond_t));
if (!ctx->thread_conds)
{
free (ctx->thread_handles);
free (ctx->thread_mutexs);
return THREAD_ERROR_ALLOC;
}
if (pthread_attr_init (&ctx->attr) != 0)
{
free (ctx->thread_handles);
free (ctx->thread_mutexs);
free (ctx->thread_conds);
return THREAD_ERROR_ATTR;
}
pthread_attr_setdetachstate (&ctx->attr, PTHREAD_CREATE_JOINABLE);
if (pthread_mutexattr_init (&ctx->mutex_attr) != 0)
{
free (ctx->thread_handles);
free (ctx->thread_mutexs);
free (ctx->thread_conds);
pthread_attr_destroy (&ctx->attr);
return THREAD_ERROR_MUTEXATTR;
}
pthread_mutexattr_settype (&ctx->mutex_attr, PTHREAD_MUTEX_ERRORCHECK);
if (ctx->enable_condusleep)
{
if (pthread_mutex_init (&ctx->thread_mutex_usleep, NULL) != 0)
{
free (ctx->thread_handles);
free (ctx->thread_mutexs);
free (ctx->thread_conds);
pthread_attr_destroy (&ctx->attr);
pthread_mutexattr_destroy (&ctx->mutex_attr);
return THREAD_ERROR_MUTEX_USLEEP;
}
if (pthread_cond_init (&ctx->thread_cond_usleep, NULL) != 0)
{
free (ctx->thread_handles);
free (ctx->thread_mutexs);
free (ctx->thread_conds);
pthread_mutex_destroy (&ctx->thread_mutex_usleep);
pthread_attr_destroy (&ctx->attr);
pthread_mutexattr_destroy (&ctx->mutex_attr);
return THREAD_ERROR_COND_USLEEP;
}
}
int err = 0;
int z = 0;
for (z = 0; z < (int) ctx->thread_count; z++)
{
if (ctx->type == THREAD_TYPE_ASYNC)
{
if (pthread_mutex_init (&ctx->thread_mutexs[z], NULL) != 0)
{
err = THREAD_ERROR_MUTEX;
break;
}
if (pthread_cond_init (&ctx->thread_conds[z], NULL) != 0)
{
pthread_mutex_destroy (&ctx->thread_mutexs[z]);
err = THREAD_ERROR_COND;
break;
}
}
}
if (err != 0)
{
z--; // step back
for (; z >= 0; z--)
{
pthread_cond_destroy (&ctx->thread_conds[z]);
pthread_mutex_destroy (&ctx->thread_mutexs[z]);
}
if (ctx->enable_condusleep)
{
pthread_mutex_destroy (&ctx->thread_mutex_usleep);
pthread_cond_destroy (&ctx->thread_cond_usleep);
}
free (ctx->thread_handles);
free (ctx->thread_mutexs);
free (ctx->thread_conds);
pthread_attr_destroy (&ctx->attr);
pthread_mutexattr_destroy (&ctx->mutex_attr);
return err;
}
ctx->init = 1;
return 0;
}
int thread_destroy (thread_ctx_t *ctx)
{
if (!ctx) return -1;
if (!ctx->init) return -2;
if (ctx->enable_condusleep)
{
pthread_cond_destroy (&ctx->thread_cond_usleep);
pthread_mutex_destroy (&ctx->thread_mutex_usleep);
}
for (size_t z = 0; z < ctx->thread_count; z++)
{
pthread_cond_destroy (&ctx->thread_conds[z]);
pthread_mutex_destroy (&ctx->thread_mutexs[z]);
}
pthread_mutexattr_destroy (&ctx->mutex_attr);
pthread_attr_init (&ctx->attr);
free (ctx->thread_conds);
free (ctx->thread_mutexs);
free (ctx->thread_handles);
memset (ctx, 0, sizeof (thread_ctx_t));
ctx->init = 0;
return 0;
}
int thread_start (thread_ctx_t *ctx, thread_args_t *t_arg)
{
int err = 0;
int z = 0;
for (z = 0; z < (int) ctx->thread_count; z++)
{
if (pthread_create (&ctx->thread_handles[z], &ctx->attr, (ctx->type == THREAD_TYPE_ASYNC) ? computing_process_async : computing_process, (void *) &t_arg[z]) != 0)
{
err = THREAD_ERROR_CREATE;
break;
}
}
if (err != 0)
{
z--; // step back
for (; z >= 0; z++)
{
pthread_cancel (ctx->thread_handles[z]);
pthread_join (ctx->thread_handles[z], NULL);
}
return err;
}
return 0;
}
int thread_stop (thread_ctx_t *ctx)
{
for (size_t z = 0; z < ctx->thread_count; z++)
{
if (ctx->type == THREAD_TYPE_ASYNC) pthread_cancel (ctx->thread_handles[z]);
pthread_join (ctx->thread_handles[z], NULL);
}
return 0;
}
__attribute__ ((format (printf, 1, 2)))
void tprintf (const char * restrict format, ...)
{
flockfile (stdout);
va_list va_args;
va_start (va_args, format);
vprintf (format, va_args);
va_end (va_args);
funlockfile (stdout);
fflush (stdout);
}
const char *thread_status_strdesc (thread_status_t s)
{
switch (s)
{
case TH_START: return (const char *) "START";
case TH_WAIT: return (const char *) "WAIT";
case TH_PROCESSING: return (const char *) "PROCESSING";
case TH_ERROR: return (const char *) "ERROR";
case TH_STOP: return (const char *) "STOP";
case TH_FOUND_KEY: return (const char *) "FOUND_KEY";
case TH_END: return (const char *) "END";
}
return (const char *) "... or die tryin'";
}
bool thread_setEnd (thread_ctx_t *ctx, thread_args_t *t_arg)
{
bool found = false;
size_t z;
int m_ret = 0;
int c_ret = 0;
for (z = 0; z < ctx->thread_count; z++)
{
m_ret = pthread_mutex_lock (&ctx->thread_mutexs[z]);
if (m_ret != 0)
{
tprintf ("[%zu] [%s] Error: pthread_mutex_lock() failed (%d): %s\n", z, __func__, m_ret, strerror (m_ret));
}
thread_status_t tmp = t_arg[z].status;
#if DEBUGME > 0
tprintf ("[%zu] [%s] Thread status: %s\n", z, __func__, thread_status_strdesc(t_arg[z].status));
#endif
if (tmp == TH_FOUND_KEY || tmp == TH_END || tmp == TH_ERROR)
{
if (tmp == TH_FOUND_KEY) found = true;
pthread_mutex_unlock (&ctx->thread_mutexs[z]);
continue;
}
#if DEBUGME > 0
tprintf ("[%zu] [%s] Set thread status to TH_STOP\n", z, __func__);
#endif
t_arg[z].status = TH_STOP;
if (tmp == TH_WAIT)
{
#if DEBUGME > 0
tprintf ("[%zu] [%s] Send cond_signal to thread\n", z, __func__);
#endif
c_ret = pthread_cond_signal (&ctx->thread_conds[z]);
if (c_ret != 0)
{
tprintf ("[%zu] [%s] Error: pthread_cond_signal() failed (%d): %s\n", z, __func__, c_ret, strerror (c_ret));
}
}
pthread_mutex_unlock (&ctx->thread_mutexs[z]);
}
return found;
}
void *computing_process (void *arg)
{
thread_args_t *a = (thread_args_t *) arg;
uint64_t off = 0;
size_t z = a->device_id;
uint64_t *matches = a->matches;
uint32_t *matches_found = a->matches_found;
uint32_t uid = a->uid;
uint32_t aR2 = a->aR2;
uint32_t nR1 = a->nR1;
uint32_t nR2 = a->nR2;
opencl_ctx_t *ctx = a->ocl_ctx;
wu_queue_data_t wu;
wu_queue_pop (&ctx->queue_ctx, &wu, false);
off = wu.off;
a->slice = wu.id+1;
if (ctx->queue_ctx.queue_type == QUEUE_TYPE_RANDOM)
{
#if DEBUGME > 0
printf ("[%zu] Slice %zu (off %zu), max %zu, remain %zu slice(s)\n", z, wu.id+1, wu.off, wu.max, wu.rem);
#else
printf ("[%zu] Slice %zu/%zu (%zu remain)\n", z, wu.id+1, wu.max, wu.rem);
#endif // DEBUGME
}
else
{
#if DEBUGME > 0
printf ("[%zu] Slice %zu/%zu, off %zu\n", z, wu.id+1, wu.max, wu.off);
#else
printf ("[%zu] Slice %zu/%zu\n", z, wu.id+1, wu.max);
#endif // DEBUGME
}
fflush (stdout);
int ret = runKernel (ctx, (uint32_t) off, matches, matches_found, z);
a->r = false;
a->err = false;
if (ret < 1) // error or nada
{
if (ret == -1) a->err = true;
pthread_exit (NULL);
}
if (!ctx->force_hitag2_opencl)
{
#if DEBUGME >= 2
printf ("[slave][%zu] master, I found %5u candidates @ slice %zu\n", z, matches_found[0], a->slice+1);
fflush (stdout);
#endif
for (uint32_t match = 0; match < matches_found[0]; match++)
{
a->r = try_state (matches[match], uid, aR2, nR1, nR2, &a->key);
if (a->r) break;
}
}
else
{
// the OpenCL kernel return only one key if found, else nothing
#if TDEBUG >= 1
printf ("[slave][%zu] master, I found the key @ slice %zu\n", z, a->slice+1);
fflush (stdout);
#endif
a->r = true;
a->key = matches[0];
}
pthread_exit (NULL);
}
void *computing_process_async (void *arg)
{
thread_args_t *a = (thread_args_t *) arg;
size_t z = a->device_id;
// TH_START, not really needed lock with mutex here
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
// fetching data from thread struct, I hope they are good
thread_status_t status = a->status;
uint32_t uid = a->uid;
uint32_t aR2 = a->aR2;
uint32_t nR1 = a->nR1;
uint32_t nR2 = a->nR2;
uint64_t *matches = a->matches;
uint32_t *matches_found = a->matches_found;
size_t max_step = a->max_step;
opencl_ctx_t *ctx = a->ocl_ctx;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
uint64_t off = 0;
// size_t slice = 0;
int ret = 0;
if (status == TH_START)
{
#if TDEBUG >= 1
printf ("[slave][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc (status));
#endif
status = TH_WAIT;
// proceed to next
}
do // slave
{
if (status == TH_WAIT)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
// update thread status to WAIT, todo: check with multiple devices
if (a->status == TH_END) // other threads found the key
{
fflush(stdout);
status = TH_END;
a->quit = true;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
pthread_exit (NULL);
}
else
{
a->status = TH_WAIT;
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slate][%zu] after pthread_cond_signal TH_WAIT\n", z);
fflush (stdout);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
}
#if TDEBUG >= 1
printf ("[slave][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", z, thread_status_strdesc (status));
fflush (stdout);
#endif
pthread_cond_wait (&a->thread_ctx->thread_conds[z], &a->thread_ctx->thread_mutexs[z]);
status = a->status; // read new status from master
#if TDEBUG >= 2
printf ("[slave][%zu] master, got the signal with new state: %s.\n", z, thread_status_strdesc (status));
fflush (stdout);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
if (status == TH_WAIT)
{
#if TDEBUG >=1
printf ("[slave] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n");
fflush (stdout);
#endif
break;
}
}
if (status == TH_ERROR)
{
#if TDEBUG >= 1
printf ("[slave][%zu] master, got error signal, proceed with exit\n", z);
fflush (stdout);
#endif
pthread_exit (NULL);
}
if (status == TH_PROCESSING)
{
#if TDEBUG >= 2
printf ("[slave][%zu] master, got a work-unit, processing ...\n", z);
fflush (stdout);
#endif
wu_queue_data_t wu;
wu_queue_pop (&ctx->queue_ctx, &wu, false);
off = wu.off;
a->slice = wu.id+1;
if (ctx->queue_ctx.queue_type == QUEUE_TYPE_RANDOM)
{
#if DEBUGME > 0
printf ("[%zu] Slice %zu (off %zu), max %zu, remain %zu slice(s)\n", z, wu.id+1, wu.off, wu.max, wu.rem);
#else
printf ("[%zu] Slice %zu/%zu (%zu remain)\n", z, wu.id+1, wu.max, wu.rem);
#endif // DEBUGME
}
else
{
#if DEBUGME > 0
printf ("[%zu] Slice %zu/%zu, off %zu\n", z, wu.id+1, wu.max, wu.off);
#else
printf ("[%zu] Slice %zu/%zu\n", z, wu.id+1, wu.max);
#endif // DEBUGME
}
fflush (stdout);
ret = runKernel (ctx, (uint32_t) off, matches, matches_found, z);
if (ret < 1) // error or nada
{
if (ret == -1)
{
// untested code
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
a->err = true;
a->status = TH_ERROR;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf ("[slave][%zu] master, something is broken, exit\n", z);
fflush (stdout);
#endif
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_ERROR\n", z);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
pthread_exit (NULL);
// end of unstested code
}
#if TDEBUG >= 1
printf ("[slave][%zu] master, process is done but no candidates found\n", z);
fflush (stdout);
#endif
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
if (a->slice >= max_step) a->status = TH_END;
else a->status = TH_WAIT;
status = a->status;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_WAIT\n", z);
fflush (stdout);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
continue;
}
if (!ctx->force_hitag2_opencl)
{
#if TDEBUG >= 1
printf ("[slave][%zu] master, we got %5u candidates. Proceed to validation\n", z, matches_found[0]);
fflush (stdout);
#endif
for (uint32_t match = 0; match < matches_found[0]; match++)
{
if (a->quit)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
a->status = TH_END;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf ("[slave][%zu] master, Another thread found the key, quit 2 \n", z);
fflush (stdout);
#endif
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_END\n", z);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
pthread_exit (NULL);
}
a->r = try_state (matches[match], uid, aR2, nR1, nR2, &a->key);
if (a->r)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
a->s = matches[match];
status = a->status = TH_FOUND_KEY;
a->quit = true;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf ("[slave][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", z, a->s, a->slice+1);
fflush (stdout);
#endif
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
pthread_exit (NULL);
}
}
if (a->quit)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
a->status = TH_END;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf ("[slave][%zu] master, Another thread found the key, quit 1 \n", z);
fflush (stdout);
#endif
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_END\n", z);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
pthread_exit (NULL);
}
// setting internal status to wait
status = TH_WAIT;
continue;
}
else
{
// the OpenCL kernel return only one key if found, else nothing
pthread_mutex_lock (&a->thread_ctx->thread_mutexs[z]);
a->r = true;
a->key = matches[0];
status = a->status = TH_FOUND_KEY;
a->quit = true;
pthread_mutex_unlock (&a->thread_ctx->thread_mutexs[z]);
#if TDEBUG >= 1
printf ("[slave][%zu] master, I found the key at slice %zu\n", z, a->slice+1);
fflush (stdout);
#endif
if (a->thread_ctx->enable_condusleep)
{
pthread_mutex_lock (&a->thread_ctx->thread_mutex_usleep);
pthread_cond_signal (&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond
#if TDEBUG >= 1
printf ("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z);
#endif
pthread_mutex_unlock (&a->thread_ctx->thread_mutex_usleep);
}
pthread_exit (NULL);
}
}
if (status >= TH_FOUND_KEY)
{
#if TDEBUG >= 1
if (status == TH_FOUND_KEY)
{
printf ("[slave][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", z);
fflush (stdout);
}
else if (status == TH_END)
{
printf ("[slave][%zu] master, TH_END reached\n", z);
fflush (stdout);
}
#endif
pthread_exit (NULL);
}
} while (status < TH_FOUND_KEY);
pthread_exit (NULL);
}

@ -0,0 +1,143 @@
/****************************************************************************
Author : Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
Date : Sun Jan 10 13:59:37 CET 2021
Version: 0.1beta
License: GNU General Public License v3 or any later version (see LICENSE.txt)
*****************************************************************************
Copyright (C) 2020-2021 <Gabriele Gristina>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#ifndef THREADS_H
#define THREADS_H
#include <stdio.h>
#include <stdarg.h>
#include <stdbool.h>
#include <pthread.h>
#include "ht2crack5opencl.h"
#include "opencl.h"
#include "hitag2.h"
typedef enum thread_status
{
TH_START = 0,
TH_WAIT,
TH_PROCESSING,
TH_STOP,
TH_ERROR,
TH_FOUND_KEY,
TH_END
} thread_status_t;
typedef enum thread_type
{
THREAD_TYPE_SEQ = 0,
THREAD_TYPE_ASYNC
} thread_type_t;
typedef enum thread_error
{
THREAD_NOERROR = 0,
THREAD_ERROR_CTX_IS_NULL = -1,
THREAD_ERROR_CTX_IS_INIT = -2,
THREAD_ERROR_TYPE_INVALID = -3,
THREAD_ERROR_COUNT_INVALID = -4,
THREAD_ERROR_ATTR_SETDETACH = -5,
THREAD_ERROR_ATTR = -6,
THREAD_ERROR_MUTEXATTR = -7,
THREAD_ERROR_CREATE = -8,
THREAD_ERROR_MUTEX = -9,
THREAD_ERROR_COND = -10,
THREAD_ERROR_MUTEX_USLEEP = -11,
THREAD_ERROR_COND_USLEEP = -12,
THREAD_ERROR_GENERIC = -13,
THREAD_ERROR_ALLOC = -14
} thread_error_t;
typedef struct threads_ctx
{
short init;
short type;
unsigned char pad1[4];
size_t thread_count;
pthread_t *thread_handles;
pthread_mutex_t *thread_mutexs;
pthread_cond_t *thread_conds;
short enable_condusleep;
// get rid of sleep/usleep call to synchronize threads
unsigned char pad2[6];
pthread_mutex_t thread_mutex_usleep;
pthread_cond_t thread_cond_usleep;
pthread_attr_t attr;
pthread_mutexattr_t mutex_attr;
} thread_ctx_t;
// used by threads engine
typedef struct thread_arg
{
thread_status_t status;
unsigned char pad1[4];
size_t max_threads;
uint64_t s;
uint32_t uid, nR1, aR1, nR2, aR2;
bool r;
bool err;
bool quit;
bool async;
uint64_t off;
uint64_t *matches;
uint32_t *matches_found;
size_t slice;
size_t max_step;
size_t device_id;
uint64_t key;
opencl_ctx_t *ocl_ctx;
thread_ctx_t *thread_ctx;
} thread_args_t;
int thread_init (thread_ctx_t *ctx, short type, size_t thread_count);
int thread_start (thread_ctx_t *ctx, thread_args_t *args);
int thread_stop (thread_ctx_t *ctx);
void tprintf (const char * restrict format, ...);
const char *thread_strerror (int error);
const char *thread_status_strdesc (thread_status_t s);
bool thread_setEnd (thread_ctx_t *ctx, thread_args_t *t_arg);
void *computing_process (void *arg);
void *computing_process_async (void *arg);
int thread_destroy (thread_ctx_t *ctx);
#endif // THREADS_H