SIMD Vector Types

This module provides generic SIMD vector wrapper types that abstract over different SIMD widths (SSE, AVX2, AVX-512) and support runtime SIMD width selection within the AoSoA memory layout.

The SimdVec[N, T] type represents N values of type T that can be processed in parallel using SIMD instructions.

Reference: QEX simdWrap.nim and nimsimd for design patterns

Imports

x86wrap, avx2wrap, avx512wrap

Types

SimdF32x4 = SimdVec[4, float32]: SSE: 4 x float32
SimdF32x8 = SimdVec[8, float32]: AVX2: 8 x float32
SimdF32x16 = SimdVec[16, float32]: AVX-512: 16 x float32
SimdF64x2 = SimdVec[2, float64]: SSE: 2 x float64
SimdF64x4 = SimdVec[4, float64]: AVX2: 4 x float64
SimdF64x8 = SimdVec[8, float64]: AVX-512: 8 x float64
SimdI32x4 = SimdVec[4, int32]: SSE: 4 x int32
SimdI32x8 = SimdVec[8, int32]: AVX2: 8 x int32
SimdI32x16 = SimdVec[16, int32]: AVX-512: 16 x int32
SimdI64x2 = SimdVec[2, int64]: SSE: 2 x int64
SimdI64x4 = SimdVec[4, int64]: AVX2: 4 x int64
SimdI64x8 = SimdVec[8, int64]: AVX-512: 8 x int64
SimdVec[N; T] = object data*: array[N, T]: Generic SIMD vector holding N elements of type T N is the number of SIMD lanes (e.g., 4, 8, 16) T is the scalar type (float32, float64, int32, int64)
SimdVecDyn[T] = object width*: int data*: seq[T]: Dynamic SIMD vector with runtime-determined width Used when SIMD width is not known at compile time

Procs

proc `$`[N: static[int]; T](v: SimdVec[N, T]): string
proc `$`[T](v: SimdVecDyn[T]): string
proc `*`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}: Element-wise multiplication
proc `*`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `*`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `*`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `*`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `*`[T](a: T; b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `*=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `*=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `+`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}: Element-wise addition
proc `+`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `+`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `+`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `+`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `+`[T](a: T; b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `+=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `+=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `-`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}: Element-wise subtraction
proc `-`[N: static[int]; T](a: SimdVec[N, T]): SimdVec[N, T] {.inline.}: Unary negation
proc `-`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `-`[N: static[int]; T](a: T; b: SimdVec[N, T]): SimdVec[N, T] {.inline.}
proc `-`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `-`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `-=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `-=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `/`[N: static[int]; T](a, b: SimdVec[N, T]): SimdVec[N, T] {.inline.}: Element-wise division
proc `/`[N: static[int]; T](a: SimdVec[N, T]; b: T): SimdVec[N, T] {.inline.}
proc `/`[T](a, b: SimdVecDyn[T]): SimdVecDyn[T] {.inline.}
proc `/`[T](a: SimdVecDyn[T]; b: T): SimdVecDyn[T] {.inline.}
proc `/=`[N: static[int]; T](a: var SimdVec[N, T]; b: SimdVec[N, T]) {.inline.}
proc `/=`[N: static[int]; T](a: var SimdVec[N, T]; b: T) {.inline.}
proc `[]`[N: static[int]; T](v: SimdVec[N, T]; i: int): T {.inline.}: Access individual lane
proc `[]=`[N: static[int]; T](v: var SimdVec[N, T]; i: int; val: T) {.inline.}: Set individual lane
proc len[N: static[int]; T](v: SimdVec[N, T]): int {.inline.}: Return number of SIMD lanes
proc load[N: static[int]; T](p: ptr T): SimdVec[N, T] {.inline.}: Load N consecutive values from memory
proc loadStrided[N: static[int]; T](p: ptr T; stride: int): SimdVec[N, T] {. inline.}: Load N values with given stride (gather)
proc max[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}: Find maximum across all lanes
proc min[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}: Find minimum across all lanes
proc newSimdVecDyn[T](width: int): SimdVecDyn[T]: Create a zero-initialized dynamic SIMD vector
proc newSimdVecDyn[T](width: int; val: T): SimdVecDyn[T]: Create a dynamic SIMD vector filled with a scalar value
proc product[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}: Multiply all lanes
proc splat[N: static[int]; T](val: T): SimdVec[N, T] {.inline.}: Broadcast a scalar to all SIMD lanes
proc store[N: static[int]; T](v: SimdVec[N, T]; p: ptr T) {.inline.}: Store N values to consecutive memory locations
proc storeStrided[N: static[int]; T](v: SimdVec[N, T]; p: ptr T; stride: int) {. inline.}: Store N values with given stride (scatter)
proc sum[N: static[int]; T](v: SimdVec[N, T]): T {.inline.}: Sum all lanes
proc sum[T](v: SimdVecDyn[T]): T {.inline.}
proc zero[N: static[int]; T](): SimdVec[N, T] {.inline.}: Create a zero-initialized SIMD vector

Exports

mmask16, m128d, m512, m512h, m64, m512d, m256d, m128, mmask8, m256i, m256, m512i, m128h, m256h, mmask64, mmask32, m128i

simd/simdtypes

Imports

Types

Procs

Exports