456 lines
11 KiB
D

/**
* `core.simd` emulation layer.
*
* Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019.
* cet 2024.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
*/
module inteli.types;
pure:
nothrow:
@nogc:
version(GNU)
{
// Note: for GDC support, be sure to use https://explore.dgnu.org/
// Future: just detect vectors, do not base upon arch.
version(X86_64)
{
enum MMXSizedVectorsAreEmulated = false;
enum SSESizedVectorsAreEmulated = false;
// Does GDC support AVX-sized vectors?
static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only.
{
enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4])));
}
else
{
enum AVXSizedVectorsAreEmulated = true;
}
import gcc.builtins;
}
else
{
enum MMXSizedVectorsAreEmulated = true;
enum SSESizedVectorsAreEmulated = true;
enum AVXSizedVectorsAreEmulated = true;
}
}
else version(LDC)
{
public import ldc.simd;
// Use this alias to mention it should only be used with LDC,
// for example when emulated shufflevector would just be wasteful.
alias shufflevectorLDC = shufflevector;
enum MMXSizedVectorsAreEmulated = false;
enum SSESizedVectorsAreEmulated = false;
enum AVXSizedVectorsAreEmulated = false;
}
else version(DigitalMars)
{
public import core.simd;
static if (__VERSION__ >= 2100)
{
// Note: turning this true is very desirable for DMD performance,
// but also leads to many bugs being discovered upstream.
// The fact that it works at all relies on many workardounds.
// In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs,
// and a very strong DMD codegen test suite.
// What happens typically is that contributors end up on a DMD bug in their PR.
// But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions.
enum bool tryToEnableCoreSimdWithDMD = true;
}
else
{
enum bool tryToEnableCoreSimdWithDMD = false;
}
version(D_SIMD)
{
enum MMXSizedVectorsAreEmulated = true;
enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD;
// Note: with DMD, AVX-sized vectors can't be enabled yet.
// On linux + x86_64, this will fail since a few operands seem to be missing.
// FUTURE: enable AVX-sized vectors in DMD. :)
//
// Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284
// Probably other, unreported issues.
version(D_AVX)
enum AVXSizedVectorsAreEmulated = true;
else
enum AVXSizedVectorsAreEmulated = true;
}
else
{
// Some DMD 32-bit targets don't have D_SIMD
enum MMXSizedVectorsAreEmulated = true;
enum SSESizedVectorsAreEmulated = true;
enum AVXSizedVectorsAreEmulated = true;
}
}
enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated;
static if (CoreSimdIsEmulated)
{
// core.simd is emulated in some capacity: introduce `VectorOps`
mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N)
{
enum Count = N;
alias Base = BaseType;
BaseType* ptr() return pure nothrow @nogc
{
return array.ptr;
}
// Unary operators
VectorType opUnary(string op)() pure nothrow @safe @nogc
{
VectorType res = void;
mixin("res.array[] = " ~ op ~ "array[];");
return res;
}
// Binary operators
VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc
{
VectorType res = void;
mixin("res.array[] = array[] " ~ op ~ " other.array[];");
return res;
}
// Assigning a BaseType value
void opAssign(BaseType e) pure nothrow @safe @nogc
{
array[] = e;
}
// Assigning a static array
void opAssign(ArrayType v) pure nothrow @safe @nogc
{
array[] = v[];
}
void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc
{
mixin("array[] " ~ op ~ "= other.array[];");
}
// Assigning a dyn array
this(ArrayType v) pure nothrow @safe @nogc
{
array[] = v[];
}
// Broadcast constructor
this(BaseType x) pure nothrow @safe @nogc
{
array[] = x;
}
/// We can't support implicit conversion but do support explicit casting.
/// "Vector types of the same size can be implicitly converted among each other."
/// Casting to another vector type is always just a raw copy.
VecDest opCast(VecDest)() pure const nothrow @trusted @nogc
if (VecDest.sizeof == VectorType.sizeof)
{
VecDest dest = void;
// Copy
dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[];
return dest;
}
ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc
{
return array[i];
}
}
}
else
{
public import core.simd;
// GDC cannot convert implicitely __vector from signed to unsigned, but LDC can
// And GDC sometimes need those unsigned vector types for some intrinsics.
// For internal use only.
package alias ushort8 = Vector!(ushort[8]);
package alias ubyte8 = Vector!(ubyte[8]);
package alias ubyte16 = Vector!(ubyte[16]);
static if (!AVXSizedVectorsAreEmulated)
{
package alias ushort16 = Vector!(ushort[16]);
package alias ubyte32 = Vector!(ubyte[32]);
}
}
// Emulate ldc.simd cmpMask and other masks.
// Note: these should be deprecated on non-LDC,
// since it's slower to generate that code.
version(LDC)
{}
else
{
// TODO: deprecated and write plain versions instead
private template BaseType(V)
{
alias typeof( ( { V v; return v; }()).array[0]) BaseType;
}
private template TrueMask(V)
{
alias Elem = BaseType!V;
static if (is(Elem == float))
{
immutable uint m1 = 0xffffffff;
enum Elem TrueMask = *cast(float*)(&m1);
}
else static if (is(Elem == double))
{
immutable ulong m1 = 0xffffffff_ffffffff;
enum Elem TrueMask = *cast(double*)(&m1);
}
else // integer case
{
enum Elem TrueMask = -1;
}
}
Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison
{
enum size_t Count = Vec.array.length;
Vec result;
foreach(int i; 0..Count)
{
bool cond = a.array[i] == b.array[i];
result.ptr[i] = cond ? TrueMask!Vec : 0;
}
return result;
}
Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison
{
enum size_t Count = Vec.array.length;
Vec result;
foreach(int i; 0..Count)
{
bool cond = a.array[i] > b.array[i];
result.ptr[i] = cond ? TrueMask!Vec : 0;
}
return result;
}
}
unittest
{
float4 a = [1, 3, 5, 7];
float4 b = [2, 3, 4, 5];
int4 c = cast(int4)(greaterMask!float4(a, b));
static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff];
assert(c.array == correct);
}
static if (MMXSizedVectorsAreEmulated)
{
/// MMX-like SIMD types
struct float2
{
float[2] array;
mixin VectorOps!(float2, float[2]);
}
struct byte8
{
byte[8] array;
mixin VectorOps!(byte8, byte[8]);
}
struct short4
{
short[4] array;
mixin VectorOps!(short4, short[4]);
}
struct int2
{
int[2] array;
mixin VectorOps!(int2, int[2]);
}
struct long1
{
long[1] array;
mixin VectorOps!(long1, long[1]);
}
}
else
{
// For this compiler, defining MMX-sized vectors is working.
public import core.simd;
alias long1 = Vector!(long[1]);
alias float2 = Vector!(float[2]);
alias int2 = Vector!(int[2]);
alias short4 = Vector!(short[4]);
alias byte8 = Vector!(byte[8]);
}
static assert(float2.sizeof == 8);
static assert(byte8.sizeof == 8);
static assert(short4.sizeof == 8);
static assert(int2.sizeof == 8);
static assert(long1.sizeof == 8);
static if (SSESizedVectorsAreEmulated)
{
/// SSE-like SIMD types
struct float4
{
float[4] array;
mixin VectorOps!(float4, float[4]);
}
struct byte16
{
byte[16] array;
mixin VectorOps!(byte16, byte[16]);
}
struct short8
{
short[8] array;
mixin VectorOps!(short8, short[8]);
}
struct int4
{
int[4] array;
mixin VectorOps!(int4, int[4]);
}
struct long2
{
long[2] array;
mixin VectorOps!(long2, long[2]);
}
struct double2
{
double[2] array;
mixin VectorOps!(double2, double[2]);
}
}
static assert(float4.sizeof == 16);
static assert(byte16.sizeof == 16);
static assert(short8.sizeof == 16);
static assert(int4.sizeof == 16);
static assert(long2.sizeof == 16);
static assert(double2.sizeof == 16);
static if (AVXSizedVectorsAreEmulated)
{
/// AVX-like SIMD types
struct float8
{
float[8] array;
mixin VectorOps!(float8, float[8]);
}
struct byte32
{
byte[32] array;
mixin VectorOps!(byte32, byte[32]);
}
struct short16
{
short[16] array;
mixin VectorOps!(short16, short[16]);
}
struct int8
{
int[8] array;
mixin VectorOps!(int8, int[8]);
}
struct long4
{
long[4] array;
mixin VectorOps!(long4, long[4]);
}
struct double4
{
double[4] array;
mixin VectorOps!(double4, double[4]);
}
}
else
{
public import core.simd;
}
static assert(float8.sizeof == 32);
static assert(byte32.sizeof == 32);
static assert(short16.sizeof == 32);
static assert(int8.sizeof == 32);
static assert(long4.sizeof == 32);
static assert(double4.sizeof == 32);
alias __m256 = float8;
alias __m256i = long4; // long long __vector with ICC, GCC, and clang
alias __m256d = double4;
alias __m128 = float4;
alias __m128i = int4;
alias __m128d = double2;
alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long
int _MM_SHUFFLE2(int x, int y) pure @safe
{
assert(x >= 0 && x <= 1);
assert(y >= 0 && y <= 1);
return (x << 1) | y;
}
int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe
{
assert(x >= 0 && x <= 3);
assert(y >= 0 && y <= 3);
assert(z >= 0 && z <= 3);
assert(w >= 0 && w <= 3);
return (z<<6) | (y<<4) | (x<<2) | w;
}
// test assignment from scalar to vector type
unittest
{
float4 A = 3.0f;
float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f];
assert(A.array == correctA);
int2 B = 42;
int[2] correctB = [42, 42];
assert(B.array == correctB);
}