simd/simdtypes

Search:
Group by:

SIMD Vector Types

This module provides generic SIMD vector wrapper types that abstract over different SIMD widths (SSE, AVX2, AVX-512) and support runtime SIMD width selection within the AoSoA memory layout.

The SimdVec[N, T] type represents N values of type T that can be processed in parallel using SIMD instructions.

Reference: QEX simdWrap.nim and nimsimd for design patterns

Types

SimdF32x4 = SimdVec[4, float32]
SSE: 4 x float32
SimdF32x8 = SimdVec[8, float32]
AVX2: 8 x float32
SimdF32x16 = SimdVec[16, float32]
AVX-512: 16 x float32
SimdF64x2 = SimdVec[2, float64]
SSE: 2 x float64
SimdF64x4 = SimdVec[4, float64]
AVX2: 4 x float64
SimdF64x8 = SimdVec[8, float64]
AVX-512: 8 x float64
SimdI32x4 = SimdVec[4, int32]
SSE: 4 x int32
SimdI32x8 = SimdVec[8, int32]
AVX2: 8 x int32
SimdI32x16 = SimdVec[16, int32]
AVX-512: 16 x int32
SimdI64x2 = SimdVec[2, int64]
SSE: 2 x int64
SimdI64x4 = SimdVec[4, int64]
AVX2: 4 x int64
SimdI64x8 = SimdVec[8, int64]
AVX-512: 8 x int64
SimdVec[N; T] = object
  data*: array[N, T]
Generic SIMD vector holding N elements of type T N is the number of SIMD lanes (e.g., 4, 8, 16) T is the scalar type (float32, float64, int32, int64)
SimdVecDyn[T] = object
  width*: int
  data*: seq[T]
Dynamic SIMD vector with runtime-determined width Used when SIMD width is not known at compile time

Procs

proc `$`[N: static[int]; T](v: SimdVec[N, T]): string
proc `$`[T](v: SimdVecDyn[T]): string
proc `*`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
Element-wise multiplication
proc `*`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `*`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `*`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `*`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `*`[T](a: T; b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `*=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `*=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `+`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
Element-wise addition
proc `+`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `+`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `+`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `+`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `+`[T](a: T; b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `+=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `+=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `-`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
Element-wise subtraction
proc `-`[N: static[int]; T](a: SimdVec[N, T]): SimdVec[N, T] {.inline.}
Unary negation
proc `-`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `-`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `-`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `-`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `-=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `-=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `/`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
Element-wise division
proc `/`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `/`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `/`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `/=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `/=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `[]`[N: static[int]; T](v: SimdVec[N, T]; i: int): T {.inline.}
Access individual lane
proc `[]=`[N: static[int]; T](v: var SimdVec[N, T]; i: int; val: T) {.inline.}
Set individual lane
proc len[N: static[int]; T](v: SimdVec[N, T]): int {.inline.}
Return number of SIMD lanes
proc load[N: static[int]; T](p: ptr T): SimdVec[N, T] {.inline.}
Load N consecutive values from memory
proc loadStrided[N: static[int]; T](p: ptr T; stride: int): SimdVec[N, T] {.
    inline.}
Load N values with given stride (gather)
proc max[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}
Find maximum across all lanes
proc min[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}
Find minimum across all lanes
proc newSimdVecDyn[T](width: int): SimdVecDyn[T]
Create a zero-initialized dynamic SIMD vector
proc newSimdVecDyn[T](width: int; val: T): SimdVecDyn[T]
Create a dynamic SIMD vector filled with a scalar value
proc product[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}
Multiply all lanes
proc splat[N: static[int]; T](val: T): SimdVec[N, T] {.inline.}
Broadcast a scalar to all SIMD lanes
proc store[N: static[int]; T](v: SimdVec[N, T]; p: ptr T) {.inline.}
Store N values to consecutive memory locations
proc storeStrided[N: static[int]; T](v: SimdVec[N, T]; p: ptr T; stride: int) {.
    inline.}
Store N values with given stride (scatter)
proc sum[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}
Sum all lanes
proc sum[T](v: SimdVecDyn[T]): T {.inline.}
proc zero[N: static[int]; T](): SimdVec[N, T] {.inline.}
Create a zero-initialized SIMD vector