/** * `core.simd` emulation layer. * * Copyright: Copyright Guillaume Piolat 2016-2020, Stefanos Baziotis 2019. * cet 2024. * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) */ module inteli.types; pure: nothrow: @nogc: version(GNU) { // Note: for GDC support, be sure to use https://explore.dgnu.org/ // Future: just detect vectors, do not base upon arch. version(X86_64) { enum MMXSizedVectorsAreEmulated = false; enum SSESizedVectorsAreEmulated = false; // Does GDC support AVX-sized vectors? static if (__VERSION__ >= 2100) // Starting at GDC 12.1 only. { enum AVXSizedVectorsAreEmulated = !(is(__vector(double[4]))); } else { enum AVXSizedVectorsAreEmulated = true; } import gcc.builtins; } else { enum MMXSizedVectorsAreEmulated = true; enum SSESizedVectorsAreEmulated = true; enum AVXSizedVectorsAreEmulated = true; } } else version(LDC) { public import ldc.simd; // Use this alias to mention it should only be used with LDC, // for example when emulated shufflevector would just be wasteful. alias shufflevectorLDC = shufflevector; enum MMXSizedVectorsAreEmulated = false; enum SSESizedVectorsAreEmulated = false; enum AVXSizedVectorsAreEmulated = false; } else version(DigitalMars) { public import core.simd; static if (__VERSION__ >= 2100) { // Note: turning this true is very desirable for DMD performance, // but also leads to many bugs being discovered upstream. // The fact that it works at all relies on many workardounds. // In particular intel-intrinsics with this "on" is a honeypot for DMD backend bugs, // and a very strong DMD codegen test suite. // What happens typically is that contributors end up on a DMD bug in their PR. // But finally, in 2022 D_SIMD has been activated, at least for SSE and some instructions. enum bool tryToEnableCoreSimdWithDMD = true; } else { enum bool tryToEnableCoreSimdWithDMD = false; } version(D_SIMD) { enum MMXSizedVectorsAreEmulated = true; enum SSESizedVectorsAreEmulated = !tryToEnableCoreSimdWithDMD; // Note: with DMD, AVX-sized vectors can't be enabled yet. // On linux + x86_64, this will fail since a few operands seem to be missing. // FUTURE: enable AVX-sized vectors in DMD. :) // // Blockers: https://issues.dlang.org/show_bug.cgi?id=24283 and 24284 // Probably other, unreported issues. version(D_AVX) enum AVXSizedVectorsAreEmulated = true; else enum AVXSizedVectorsAreEmulated = true; } else { // Some DMD 32-bit targets don't have D_SIMD enum MMXSizedVectorsAreEmulated = true; enum SSESizedVectorsAreEmulated = true; enum AVXSizedVectorsAreEmulated = true; } } enum CoreSimdIsEmulated = MMXSizedVectorsAreEmulated || SSESizedVectorsAreEmulated || AVXSizedVectorsAreEmulated; static if (CoreSimdIsEmulated) { // core.simd is emulated in some capacity: introduce `VectorOps` mixin template VectorOps(VectorType, ArrayType: BaseType[N], BaseType, size_t N) { enum Count = N; alias Base = BaseType; BaseType* ptr() return pure nothrow @nogc { return array.ptr; } // Unary operators VectorType opUnary(string op)() pure nothrow @safe @nogc { VectorType res = void; mixin("res.array[] = " ~ op ~ "array[];"); return res; } // Binary operators VectorType opBinary(string op)(VectorType other) pure const nothrow @safe @nogc { VectorType res = void; mixin("res.array[] = array[] " ~ op ~ " other.array[];"); return res; } // Assigning a BaseType value void opAssign(BaseType e) pure nothrow @safe @nogc { array[] = e; } // Assigning a static array void opAssign(ArrayType v) pure nothrow @safe @nogc { array[] = v[]; } void opOpAssign(string op)(VectorType other) pure nothrow @safe @nogc { mixin("array[] " ~ op ~ "= other.array[];"); } // Assigning a dyn array this(ArrayType v) pure nothrow @safe @nogc { array[] = v[]; } // Broadcast constructor this(BaseType x) pure nothrow @safe @nogc { array[] = x; } /// We can't support implicit conversion but do support explicit casting. /// "Vector types of the same size can be implicitly converted among each other." /// Casting to another vector type is always just a raw copy. VecDest opCast(VecDest)() pure const nothrow @trusted @nogc if (VecDest.sizeof == VectorType.sizeof) { VecDest dest = void; // Copy dest.array[] = (cast(typeof(dest.array))cast(void[VectorType.sizeof])array)[]; return dest; } ref inout(BaseType) opIndex(size_t i) inout return pure nothrow @safe @nogc { return array[i]; } } } else { public import core.simd; // GDC cannot convert implicitely __vector from signed to unsigned, but LDC can // And GDC sometimes need those unsigned vector types for some intrinsics. // For internal use only. package alias ushort8 = Vector!(ushort[8]); package alias ubyte8 = Vector!(ubyte[8]); package alias ubyte16 = Vector!(ubyte[16]); static if (!AVXSizedVectorsAreEmulated) { package alias ushort16 = Vector!(ushort[16]); package alias ubyte32 = Vector!(ubyte[32]); } } // Emulate ldc.simd cmpMask and other masks. // Note: these should be deprecated on non-LDC, // since it's slower to generate that code. version(LDC) {} else { // TODO: deprecated and write plain versions instead private template BaseType(V) { alias typeof( ( { V v; return v; }()).array[0]) BaseType; } private template TrueMask(V) { alias Elem = BaseType!V; static if (is(Elem == float)) { immutable uint m1 = 0xffffffff; enum Elem TrueMask = *cast(float*)(&m1); } else static if (is(Elem == double)) { immutable ulong m1 = 0xffffffff_ffffffff; enum Elem TrueMask = *cast(double*)(&m1); } else // integer case { enum Elem TrueMask = -1; } } Vec equalMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "oeq" comparison { enum size_t Count = Vec.array.length; Vec result; foreach(int i; 0..Count) { bool cond = a.array[i] == b.array[i]; result.ptr[i] = cond ? TrueMask!Vec : 0; } return result; } Vec greaterMask(Vec)(Vec a, Vec b) @trusted // for floats, equivalent to "ogt" comparison { enum size_t Count = Vec.array.length; Vec result; foreach(int i; 0..Count) { bool cond = a.array[i] > b.array[i]; result.ptr[i] = cond ? TrueMask!Vec : 0; } return result; } } unittest { float4 a = [1, 3, 5, 7]; float4 b = [2, 3, 4, 5]; int4 c = cast(int4)(greaterMask!float4(a, b)); static immutable int[4] correct = [0, 0, 0xffff_ffff, 0xffff_ffff]; assert(c.array == correct); } static if (MMXSizedVectorsAreEmulated) { /// MMX-like SIMD types struct float2 { float[2] array; mixin VectorOps!(float2, float[2]); } struct byte8 { byte[8] array; mixin VectorOps!(byte8, byte[8]); } struct short4 { short[4] array; mixin VectorOps!(short4, short[4]); } struct int2 { int[2] array; mixin VectorOps!(int2, int[2]); } struct long1 { long[1] array; mixin VectorOps!(long1, long[1]); } } else { // For this compiler, defining MMX-sized vectors is working. public import core.simd; alias long1 = Vector!(long[1]); alias float2 = Vector!(float[2]); alias int2 = Vector!(int[2]); alias short4 = Vector!(short[4]); alias byte8 = Vector!(byte[8]); } static assert(float2.sizeof == 8); static assert(byte8.sizeof == 8); static assert(short4.sizeof == 8); static assert(int2.sizeof == 8); static assert(long1.sizeof == 8); static if (SSESizedVectorsAreEmulated) { /// SSE-like SIMD types struct float4 { float[4] array; mixin VectorOps!(float4, float[4]); } struct byte16 { byte[16] array; mixin VectorOps!(byte16, byte[16]); } struct short8 { short[8] array; mixin VectorOps!(short8, short[8]); } struct int4 { int[4] array; mixin VectorOps!(int4, int[4]); } struct long2 { long[2] array; mixin VectorOps!(long2, long[2]); } struct double2 { double[2] array; mixin VectorOps!(double2, double[2]); } } static assert(float4.sizeof == 16); static assert(byte16.sizeof == 16); static assert(short8.sizeof == 16); static assert(int4.sizeof == 16); static assert(long2.sizeof == 16); static assert(double2.sizeof == 16); static if (AVXSizedVectorsAreEmulated) { /// AVX-like SIMD types struct float8 { float[8] array; mixin VectorOps!(float8, float[8]); } struct byte32 { byte[32] array; mixin VectorOps!(byte32, byte[32]); } struct short16 { short[16] array; mixin VectorOps!(short16, short[16]); } struct int8 { int[8] array; mixin VectorOps!(int8, int[8]); } struct long4 { long[4] array; mixin VectorOps!(long4, long[4]); } struct double4 { double[4] array; mixin VectorOps!(double4, double[4]); } } else { public import core.simd; } static assert(float8.sizeof == 32); static assert(byte32.sizeof == 32); static assert(short16.sizeof == 32); static assert(int8.sizeof == 32); static assert(long4.sizeof == 32); static assert(double4.sizeof == 32); alias __m256 = float8; alias __m256i = long4; // long long __vector with ICC, GCC, and clang alias __m256d = double4; alias __m128 = float4; alias __m128i = int4; alias __m128d = double2; alias __m64 = long1; // like in Clang, __m64 is a vector of 1 long int _MM_SHUFFLE2(int x, int y) pure @safe { assert(x >= 0 && x <= 1); assert(y >= 0 && y <= 1); return (x << 1) | y; } int _MM_SHUFFLE(int z, int y, int x, int w) pure @safe { assert(x >= 0 && x <= 3); assert(y >= 0 && y <= 3); assert(z >= 0 && z <= 3); assert(w >= 0 && w <= 3); return (z<<6) | (y<<4) | (x<<2) | w; } // test assignment from scalar to vector type unittest { float4 A = 3.0f; float[4] correctA = [3.0f, 3.0f, 3.0f, 3.0f]; assert(A.array == correctA); int2 B = 42; int[2] correctB = [42, 42]; assert(B.array == correctB); }