SYCL Dispatch Module for TensorFieldView - Native Kernel Edition
This module provides the each macro for SYCL backend, using native pre-compiled C++ SYCL kernels instead of JIT-compiled OpenCL C.
Architecture:
- Macro analyzes the loop body at compile time to detect operation type
- At runtime, dispatches to the appropriate pre-compiled native kernel
- This approach works on all SYCL devices (CPU, GPU, accelerators)
Supported Operations:
- Copy: Cn = An
- Add/Sub: Cn = An +/- Bn
- Scalar multiply: Cn = scalar * An
- Matrix multiply: Cn = An * Bn
- Matrix-vector multiply: yn = Mn * xn
Types
DispatchInfo = object kind*: DispatchKind lhsView*: string rhsViews*: seq[string] scalar*: float64 isComplex*: bool
DispatchKind = enum dkCopy, dkAdd, dkSub, dkMul, dkScalarMul, dkScalarAdd, dkMatMul, dkMatVec, dkMatAdd, dkVecAdd, dkStencilCopy, dkStencilScalarMul, dkStencilAdd, dkUnknown
ElementType = enum etFloat32, etFloat64, etInt32, etInt64
- Element type for kernel dispatch
ExecOperand = object isTemp*: bool viewName*: string tempId*: int
- Represents an operand: either a view name or a temp buffer ID
ExecPlan = object steps*: seq[ExecStep] numTemps*: int finalDest*: ExecOperand
ExecStep = object kind*: ExecStepKind dest*: ExecOperand srcA*: ExecOperand srcB*: ExecOperand scalar*: float64
ExecStepKind = enum eskCopy, eskAdd, eskSub, eskMatMul, eskMatVec, eskScalarMul, eskScalarAdd
ExprInfo = object kind*: ExprKind viewName*: string viewRank*: int isComplex*: bool isNeighborAccess*: bool scalar*: float64 scalarIm*: float64 left*, right*: ref ExprInfo
ExprKind = enum ekSiteProxy, ekMatMul, ekMatVec, ekMatAdd, ekMatSub, ekScalarMul, ekScalarAdd, ekLiteral, ekUnknown
KernelInfo = object views*: seq[ViewInfo] viewRanks*: Table[string, int] loopVar*: NimNode loopVarStr*: string outputRank*: int outputRows*: int outputCols*: int isComplex*: bool elemType*: ElementType stencilBindings*: seq[StencilBinding] hasStencil*: bool
Consts
DebugKernels {.booldefine.} = false
VectorWidth {.intdefine.} = 8
Procs
proc analyzeExpr(n: NimNode; viewNames: seq[string]; nbrVarNames: seq[string] = @[]): ExprInfo {....raises: [], tags: [], forbids: [].}
- Analyze an expression to determine its type and structure nbrVarNames: variable names that are stencil neighbor indices
Exports
-
isGpu, kernelScalarAdd, kernelStencilAdd, syclQueues, buffer, kernelMatMul, SyclResult, kernelScalarAdd, kernelCopy, kernelMatAdd, wait, write, SyclEvent, kernelScalarMul, kernelStencilAdd, release, kernelScalarMul, clQueues, kernelComplexScalarMul, SyclDevice, TClResult, kernelComplexMatVec, kernelComplexMatMul, kernelMatVec, PCommandQueue, kernelSetElement, kernelStencilScalarMul, clDevices, createQueue, SyclDeviceType, kernelStencilScalarMul, initSycl, getBufferPtr, destroyQueue, finish, kernelComplexAdd, getDeviceName, bufferLike, kernelStencilCopy, kernelSetElements, kernelComplexMatAdd, kernelSub, deallocate, clContext, kernelComplexVecAdd, PMem, read, check, kernelMatVec, finalizeSycl, allocate, kernelCopy, kernelAdd, kernelVecAdd, release, read, kernelMatMul, kernelSub, kernelTensorScalarAdd, write, kernelMul, SyclQueue, SyclBuffer, kernelStencilCopy, SyclContext, kernelTensorScalarMul, kernelComplexTensorScalarMul, syclDeviceType, kernelMul, ESycl, kernelAdd, isCpu, getDeviceCount, SyclQueue, SyclBuffer