sycl/syclwrap

Search:
Group by:

SYCL wrapper for Nim - Multi-Type Native Kernel Edition

This module provides low-level SYCL bindings using C++ interop. Supports: float32, float64, int32, int64

The SYCL C++ wrapper (sycl_wrapper.cpp) is compiled separately into libreliq_sycl.so and loaded at runtime.

Types

ESycl = object of CatchableError
SYCL exception type
SyclBuffer = pointer
SyclContext = pointer
SyclDevice = pointer
SyclDeviceType = enum
  sdtDefault = 0, sdtCPU = 1, sdtGPU = 2, sdtAccelerator = 3, sdtAll = 4
SyclElementType = enum
  setFloat32, setFloat64, setInt32, setInt64
SyclEvent = pointer
SyclQueue = pointer
SyclResult = enum
  srSuccess = 0, srInvalidDevice = 1, srInvalidContext = 2, srInvalidQueue = 3,
  srInvalidBuffer = 4, srInvalidKernel = 5, srCompilationError = 6, srRuntimeError = 7 ## Element type codes for dispatch

Procs

proc allocate(queue: SyclQueue; size: int): SyclBuffer {....raises: [ESycl],
    tags: [], forbids: [].}
proc check(result: SyclResult) {....raises: [ESycl], tags: [], forbids: [].}
Check SYCL result and raise exception on error
proc createQueue(dtype: SyclDeviceType = sdtDefault; deviceIdx: int = 0): SyclQueue {.
    ...raises: [ESycl], tags: [], forbids: [].}
proc deallocate(queue: SyclQueue; buf: SyclBuffer) {....raises: [], tags: [],
    forbids: [].}
proc destroyQueue(queue: SyclQueue) {....raises: [], tags: [], forbids: [].}
proc finish(queue: SyclQueue): int {.discardable, ...raises: [], tags: [],
                                     forbids: [].}
Wait for queue to finish and return 0
proc getBufferPtr(buffer: SyclBuffer): pointer {....raises: [], tags: [],
    forbids: [].}
proc getDeviceCount(dtype: SyclDeviceType = sdtDefault): int {....raises: [],
    tags: [], forbids: [].}
proc getDeviceName(dtype: SyclDeviceType = sdtDefault; deviceIdx: int = 0): string {.
    ...raises: [], tags: [], forbids: [].}
proc isCpu(dtype: SyclDeviceType = sdtDefault; deviceIdx: int = 0): bool {.
    ...raises: [], tags: [], forbids: [].}
proc isGpu(dtype: SyclDeviceType = sdtDefault; deviceIdx: int = 0): bool {.
    ...raises: [], tags: [], forbids: [].}
proc kernelAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numElements: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelAdd[T](queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                  numElements: int; dummy: typedesc[T]) {.inline.}
proc kernelComplexAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                      numComplexElements: int) {.inline, ...raises: [], tags: [],
    forbids: [].}
proc kernelComplexMatAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numSites: int; rows, cols: int; vectorWidth: int;
                         numVectorGroups: int) {.inline, ...raises: [], tags: [],
    forbids: [].}
proc kernelComplexMatMul(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numSites: int; rows, cols, inner: int;
                         vectorWidth: int; numVectorGroups: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc kernelComplexMatVec(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                         numSites: int; rows, cols: int; vectorWidth: int;
                         numVectorGroups: int) {.inline, ...raises: [], tags: [],
    forbids: [].}
proc kernelComplexScalarMul(queue: SyclQueue; bufA: SyclBuffer; re, im: float64;
                            bufC: SyclBuffer; numComplexElements: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc kernelComplexTensorScalarMul(queue: SyclQueue; bufA: SyclBuffer;
                                  re, im: float64; bufC: SyclBuffer;
                                  numSites: int; elemsPerSite: int;
                                  vectorWidth: int; numVectorGroups: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelComplexVecAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numSites: int; vecLen: int; vectorWidth: int;
                         numVectorGroups: int) {.inline, ...raises: [], tags: [],
    forbids: [].}
proc kernelCopy(queue: SyclQueue; bufA, bufC: SyclBuffer; numElements: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
Copy buffer A to buffer C (float64 default for backward compatibility)
proc kernelCopy[T](queue: SyclQueue; bufA, bufC: SyclBuffer; numElements: int;
                   dummy: typedesc[T]) {.inline.}
Type-specific copy kernel
proc kernelMatAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numSites: int;
                  rows, cols: int; vectorWidth: int; numVectorGroups: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelMatMul(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numSites: int;
                  rows, cols, inner: int; vectorWidth: int; numVectorGroups: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelMatMul[T](queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                     numSites: int; rows, cols, inner: int; vectorWidth: int;
                     numVectorGroups: int; dummy: typedesc[T]) {.inline.}
proc kernelMatVec(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer; numSites: int;
                  rows, cols: int; vectorWidth: int; numVectorGroups: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelMatVec[T](queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                     numSites: int; rows, cols: int; vectorWidth: int;
                     numVectorGroups: int; dummy: typedesc[T]) {.inline.}
proc kernelMul(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numElements: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelMul[T](queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                  numElements: int; dummy: typedesc[T]) {.inline.}
proc kernelScalarAdd(queue: SyclQueue; bufA: SyclBuffer; scalar: float64;
                     bufC: SyclBuffer; numElements: int) {.inline, ...raises: [],
    tags: [], forbids: [].}
proc kernelScalarAdd[T](queue: SyclQueue; bufA: SyclBuffer; scalar: T;
                        bufC: SyclBuffer; numElements: int; dummy: typedesc[T]) {.
    inline.}
proc kernelScalarMul(queue: SyclQueue; bufA: SyclBuffer; scalar: float64;
                     bufC: SyclBuffer; numElements: int) {.inline, ...raises: [],
    tags: [], forbids: [].}
proc kernelScalarMul[T](queue: SyclQueue; bufA: SyclBuffer; scalar: T;
                        bufC: SyclBuffer; numElements: int; dummy: typedesc[T]) {.
    inline.}
proc kernelSetElement(queue: SyclQueue; bufC: SyclBuffer; elementIdx: int;
                      value: float64; numSites: int; elemsPerSite: int;
                      vectorWidth: int; numVectorGroups: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc kernelSetElements(queue: SyclQueue; bufC: SyclBuffer;
                       elementIndices: openArray[int32];
                       values: openArray[float64]; numSites: int;
                       elemsPerSite: int; vectorWidth: int; numVectorGroups: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelStencilAdd(queue: SyclQueue;
                      bufSrcA, bufSrcB, bufDst, bufOffsets: SyclBuffer;
                      pointIdx: int; nPoints: int; numSites: int;
                      elemsPerSite: int; vectorWidth: int) {.inline, ...raises: [],
    tags: [], forbids: [].}
Gather add: dstn = srcAn + srcBneighbor(n, pointIdx)
proc kernelStencilAdd[T](queue: SyclQueue;
                         bufSrcA, bufSrcB, bufDst, bufOffsets: SyclBuffer;
                         pointIdx: int; nPoints: int; numSites: int;
                         elemsPerSite: int; vectorWidth: int; dummy: typedesc[T]) {.
    inline.}
proc kernelStencilCopy(queue: SyclQueue; bufSrc, bufDst, bufOffsets: SyclBuffer;
                       pointIdx: int; nPoints: int; numSites: int;
                       elemsPerSite: int; vectorWidth: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
Gather copy: dstn = srcneighbor(n, pointIdx)
proc kernelStencilCopy[T](queue: SyclQueue;
                          bufSrc, bufDst, bufOffsets: SyclBuffer; pointIdx: int;
                          nPoints: int; numSites: int; elemsPerSite: int;
                          vectorWidth: int; dummy: typedesc[T]) {.inline.}
proc kernelStencilScalarMul(queue: SyclQueue; bufSrc: SyclBuffer;
                            scalar: float64; bufDst, bufOffsets: SyclBuffer;
                            pointIdx: int; nPoints: int; numSites: int;
                            elemsPerSite: int; vectorWidth: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
Gather scalar mul: dstn = scalar * srcneighbor(n, pointIdx)
proc kernelStencilScalarMul[T](queue: SyclQueue; bufSrc: SyclBuffer; scalar: T;
                               bufDst, bufOffsets: SyclBuffer; pointIdx: int;
                               nPoints: int; numSites: int; elemsPerSite: int;
                               vectorWidth: int; dummy: typedesc[T]) {.inline.}
proc kernelSub(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numElements: int) {.
    inline, ...raises: [], tags: [], forbids: [].}
proc kernelSub[T](queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                  numElements: int; dummy: typedesc[T]) {.inline.}
proc kernelTensorScalarAdd(queue: SyclQueue; bufA: SyclBuffer; scalar: float64;
                           bufC: SyclBuffer; numSites: int; elemsPerSite: int;
                           vectorWidth: int; numVectorGroups: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc kernelTensorScalarMul(queue: SyclQueue; bufA: SyclBuffer; scalar: float64;
                           bufC: SyclBuffer; numSites: int; elemsPerSite: int;
                           vectorWidth: int; numVectorGroups: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc kernelVecAdd(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer; numSites: int;
                  vecLen: int; vectorWidth: int; numVectorGroups: int) {.inline,
    ...raises: [], tags: [], forbids: [].}
proc read(queue: SyclQueue; dest: pointer; buf: SyclBuffer; size: int) {.
    ...raises: [], tags: [], forbids: [].}
proc read[T](queue: SyclQueue; data: var openArray[T]; buf: SyclBuffer)
proc sycl_allocate(queue: SyclQueue; size: csize_t): SyclBuffer {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_create_queue(dtype: cint; deviceIdx: cint): SyclQueue {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_deallocate(queue: SyclQueue; buf: SyclBuffer) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_destroy_queue(queue: SyclQueue) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_device_is_cpu(dtype: cint; deviceIdx: cint): cint {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_device_is_gpu(dtype: cint; deviceIdx: cint): cint {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_get_buffer_ptr(buffer: SyclBuffer): pointer {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_get_device_count(dtype: cint): cint {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_get_device_name(dtype: cint; deviceIdx: cint): cstring {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_add_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_add_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_add_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_add_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_complex_add_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                                 numComplexElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_add_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                                 numComplexElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matadd_f32(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; rows, cols: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matadd_f64(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; rows, cols: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matmul_f32(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; rows, cols, inner: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matmul_f64(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; rows, cols, inner: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matvec_f32(queue: SyclQueue;
                                    bufA, bufX, bufY: SyclBuffer;
                                    numSites: csize_t; rows, cols: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_matvec_f64(queue: SyclQueue;
                                    bufA, bufX, bufY: SyclBuffer;
                                    numSites: csize_t; rows, cols: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_scalar_mul_f32(queue: SyclQueue; bufA: SyclBuffer;
                                        scalar_re, scalar_im: cfloat;
                                        bufC: SyclBuffer;
                                        numComplexElements: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_scalar_mul_f64(queue: SyclQueue; bufA: SyclBuffer;
                                        scalar_re, scalar_im: cdouble;
                                        bufC: SyclBuffer;
                                        numComplexElements: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_tensor_scalar_mul_f32(queue: SyclQueue;
    bufA: SyclBuffer; scalar_re, scalar_im: cfloat; bufC: SyclBuffer;
    numSites: csize_t; elemsPerSite: cint; vectorWidth: cint;
    numVectorGroups: csize_t) {.cdecl, importc, ...raises: [], tags: [],
                                forbids: [].}
proc sycl_kernel_complex_tensor_scalar_mul_f64(queue: SyclQueue;
    bufA: SyclBuffer; scalar_re, scalar_im: cdouble; bufC: SyclBuffer;
    numSites: csize_t; elemsPerSite: cint; vectorWidth: cint;
    numVectorGroups: csize_t) {.cdecl, importc, ...raises: [], tags: [],
                                forbids: [].}
proc sycl_kernel_complex_vecadd_f32(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; vecLen: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_complex_vecadd_f64(queue: SyclQueue;
                                    bufA, bufB, bufC: SyclBuffer;
                                    numSites: csize_t; vecLen: cint;
                                    vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_copy_f32(queue: SyclQueue; bufA, bufC: SyclBuffer;
                          numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_copy_f64(queue: SyclQueue; bufA, bufC: SyclBuffer;
                          numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_copy_i32(queue: SyclQueue; bufA, bufC: SyclBuffer;
                          numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_copy_i64(queue: SyclQueue; bufA, bufC: SyclBuffer;
                          numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_matadd_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matadd_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matadd_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matadd_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matmul_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols, inner: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matmul_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols, inner: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matmul_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols, inner: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matmul_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; rows, cols, inner: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matvec_f32(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matvec_f64(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matvec_i32(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_matvec_i64(queue: SyclQueue; bufA, bufX, bufY: SyclBuffer;
                            numSites: csize_t; rows, cols: cint;
                            vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_mul_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_mul_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_mul_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_mul_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_scalar_add_f32(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cfloat; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_add_f64(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cdouble; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_add_i32(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cint; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_add_i64(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: clonglong; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_mul_f32(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cfloat; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_mul_f64(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cdouble; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_mul_i32(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: cint; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_scalar_mul_i64(queue: SyclQueue; bufA: SyclBuffer;
                                scalar: clonglong; bufC: SyclBuffer;
                                numElements: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_element_f32(queue: SyclQueue; bufC: SyclBuffer;
                                 elementIdx: cint; value: cfloat;
                                 numSites: csize_t; elemsPerSite: cint;
                                 vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_element_f64(queue: SyclQueue; bufC: SyclBuffer;
                                 elementIdx: cint; value: cdouble;
                                 numSites: csize_t; elemsPerSite: cint;
                                 vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_element_i32(queue: SyclQueue; bufC: SyclBuffer;
                                 elementIdx: cint; value: cint;
                                 numSites: csize_t; elemsPerSite: cint;
                                 vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_element_i64(queue: SyclQueue; bufC: SyclBuffer;
                                 elementIdx: cint; value: clonglong;
                                 numSites: csize_t; elemsPerSite: cint;
                                 vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_elements_f32(queue: SyclQueue; bufC: SyclBuffer;
                                  elementIndices: ptr cint; values: ptr cfloat;
                                  numWrites: cint; numSites: csize_t;
                                  elemsPerSite: cint; vectorWidth: cint;
                                  numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_elements_f64(queue: SyclQueue; bufC: SyclBuffer;
                                  elementIndices: ptr cint; values: ptr cdouble;
                                  numWrites: cint; numSites: csize_t;
                                  elemsPerSite: cint; vectorWidth: cint;
                                  numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_elements_i32(queue: SyclQueue; bufC: SyclBuffer;
                                  elementIndices: ptr cint; values: ptr cint;
                                  numWrites: cint; numSites: csize_t;
                                  elemsPerSite: cint; vectorWidth: cint;
                                  numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_set_elements_i64(queue: SyclQueue; bufC: SyclBuffer;
                                  elementIndices: ptr cint;
                                  values: ptr clonglong; numWrites: cint;
                                  numSites: csize_t; elemsPerSite: cint;
                                  vectorWidth: cint; numVectorGroups: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_add_f32(queue: SyclQueue; bufSrcA, bufSrcB, bufDst,
    bufOffsets: SyclBuffer; pointIdx: cint; nPoints: cint; numSites: csize_t;
                                 elemsPerSite: cint; vectorWidth: cint) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_add_f64(queue: SyclQueue; bufSrcA, bufSrcB, bufDst,
    bufOffsets: SyclBuffer; pointIdx: cint; nPoints: cint; numSites: csize_t;
                                 elemsPerSite: cint; vectorWidth: cint) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_add_i32(queue: SyclQueue; bufSrcA, bufSrcB, bufDst,
    bufOffsets: SyclBuffer; pointIdx: cint; nPoints: cint; numSites: csize_t;
                                 elemsPerSite: cint; vectorWidth: cint) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_add_i64(queue: SyclQueue; bufSrcA, bufSrcB, bufDst,
    bufOffsets: SyclBuffer; pointIdx: cint; nPoints: cint; numSites: csize_t;
                                 elemsPerSite: cint; vectorWidth: cint) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_copy_f32(queue: SyclQueue;
                                  bufSrc, bufDst, bufOffsets: SyclBuffer;
                                  pointIdx: cint; nPoints: cint;
                                  numSites: csize_t; elemsPerSite: cint;
                                  vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_copy_f64(queue: SyclQueue;
                                  bufSrc, bufDst, bufOffsets: SyclBuffer;
                                  pointIdx: cint; nPoints: cint;
                                  numSites: csize_t; elemsPerSite: cint;
                                  vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_copy_i32(queue: SyclQueue;
                                  bufSrc, bufDst, bufOffsets: SyclBuffer;
                                  pointIdx: cint; nPoints: cint;
                                  numSites: csize_t; elemsPerSite: cint;
                                  vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_copy_i64(queue: SyclQueue;
                                  bufSrc, bufDst, bufOffsets: SyclBuffer;
                                  pointIdx: cint; nPoints: cint;
                                  numSites: csize_t; elemsPerSite: cint;
                                  vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_scalar_mul_f32(queue: SyclQueue; bufSrc: SyclBuffer;
                                        scalar: cfloat;
                                        bufDst, bufOffsets: SyclBuffer;
                                        pointIdx: cint; nPoints: cint;
                                        numSites: csize_t; elemsPerSite: cint;
                                        vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_scalar_mul_f64(queue: SyclQueue; bufSrc: SyclBuffer;
                                        scalar: cdouble;
                                        bufDst, bufOffsets: SyclBuffer;
                                        pointIdx: cint; nPoints: cint;
                                        numSites: csize_t; elemsPerSite: cint;
                                        vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_scalar_mul_i32(queue: SyclQueue; bufSrc: SyclBuffer;
                                        scalar: cint;
                                        bufDst, bufOffsets: SyclBuffer;
                                        pointIdx: cint; nPoints: cint;
                                        numSites: csize_t; elemsPerSite: cint;
                                        vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_stencil_scalar_mul_i64(queue: SyclQueue; bufSrc: SyclBuffer;
                                        scalar: clonglong;
                                        bufDst, bufOffsets: SyclBuffer;
                                        pointIdx: cint; nPoints: cint;
                                        numSites: csize_t; elemsPerSite: cint;
                                        vectorWidth: cint) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_sub_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_sub_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_sub_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_sub_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                         numElements: csize_t) {.cdecl, importc, ...raises: [],
    tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_add_f32(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cfloat; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_add_f64(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cdouble; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_add_i32(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cint; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_add_i64(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: clonglong; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_mul_f32(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cfloat; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_mul_f64(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cdouble; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_mul_i32(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: cint; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_tensor_scalar_mul_i64(queue: SyclQueue; bufA: SyclBuffer;
                                       scalar: clonglong; bufC: SyclBuffer;
                                       numSites: csize_t; elemsPerSite: cint;
                                       vectorWidth: cint;
                                       numVectorGroups: csize_t) {.cdecl,
    importc, ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_vecadd_f32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; vecLen: cint; vectorWidth: cint;
                            numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_vecadd_f64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; vecLen: cint; vectorWidth: cint;
                            numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_vecadd_i32(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; vecLen: cint; vectorWidth: cint;
                            numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_kernel_vecadd_i64(queue: SyclQueue; bufA, bufB, bufC: SyclBuffer;
                            numSites: csize_t; vecLen: cint; vectorWidth: cint;
                            numVectorGroups: csize_t) {.cdecl, importc,
    ...raises: [], tags: [], forbids: [].}
proc sycl_read(queue: SyclQueue; dest: pointer; buf: SyclBuffer; size: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc sycl_wait(queue: SyclQueue) {.cdecl, importc, ...raises: [], tags: [],
                                   forbids: [].}
proc sycl_write(queue: SyclQueue; buf: SyclBuffer; src: pointer; size: csize_t) {.
    cdecl, importc, ...raises: [], tags: [], forbids: [].}
proc wait(queue: SyclQueue) {....raises: [], tags: [], forbids: [].}
proc write(queue: SyclQueue; buf: SyclBuffer; src: pointer; size: int) {.
    ...raises: [], tags: [], forbids: [].}
proc write[T](queue: SyclQueue; buf: SyclBuffer; data: openArray[T])