dlib/math.d

1815 lines
33 KiB
D

module dlib.math;
import dlib.aliases;
import dlib.util;
import std.traits;
import std.math.traits;
import std.meta;
public import std.math.rounding : round;
public import core.math : sqrt, cos, sin;
enum PI = 3.14159L;
const u16[128] _RSQRT_TABLE = [
0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
];
union Union64
{
f64 f;
u64 u;
}
union Union32
{
f32 f;
u32 u;
}
auto
AsUint(T)(T v) if(isFloatingPoint!(T))
{
static if(is(T == f32))
{
Union32 u = { f: v };
}
else
{
Union64 u = { f: v };
}
return u.u;
}
auto
AsFloat(T)(T v) if(isIntegral!(T))
{
static if(T.sizeof == 4)
{
Union32 u = { u: v };
}
else
{
Union64 u = { u: v };
}
return u.f;
}
T
_MathInvalid(T)(T v) if(isFloatingPoint!(T))
{
return (v - v) / (v - v);
}
u32
Mul32(T)(T a, T b) if(isIntegral!(T))
{
return cast(u32)(cast(u64)(a*b) >> 32);
}
u64
Mul64(u64 a, u64 b)
{
u64 ahi = a>>32;
u64 alo = a&0xffffffff;
u64 bhi = b>>32;
u64 blo = b&0xffffffff;
return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
}
/*
T
Sqrt(T)(T x) if(isFloatingPoint!(T))
{
u64 ix, top, m;
ix = AsUint(x);
top = ix >> 52;
if (top - 0x001 >= 0x7ff - 0x001) {
if (ix * 2 == 0)
{
return x;
}
if (ix == 0x7ff0000000000000)
{
return x;
}
if (ix > 0x7ff0000000000000)
{
return _MathInvalid(x);
}
ix = AsUint(x * 0x1p52);
top = ix >> 52;
top -= 52;
}
i32 even = top & 1;
m = (ix << 11) | 0x8000000000000000;
if (even)
{
m >>= 1;
}
top = (top + 0x3ff) >> 1;
static const u64 three = 0xc0000000;
u64 r, s, d, u, i;
i = (ix >> 46) % 128;
r = cast(u32)(_RSQRT_TABLE[cast(usize)i] << 16);
s = Mul32(m>>32, r);
d = Mul32(s, r);
u = three - d;
r = Mul32(r, u) << 1;
s = Mul32(s, u) << 1;
d = Mul32(s, r);
u = three - d;
r = Mul32(r, u) << 1;
r = r << 32;
s = Mul64(m, r);
d = Mul64(s, r);
u = (three<<32) - d;
s = Mul64(s, u);
s = (s - 2) >> 9;
u64 d0, d1, d2;
f64 y, t;
d0 = (m << 42) - s*s;
d1 = s - d0;
d2 = d1 + s + 1;
s += d1 >> 63;
s &= 0x000fffffffffffff;
s |= top << 52;
y = AsFloat(s);
static if(true)
{
u64 tiny = d2==0 ? 0 : 0x0010000000000000;
tiny |= (d1^d2) & 0x8000000000000000;
t = AsFloat(tiny);
y = cast(f64)(y + t);
}
return y;
}
*/
T
Abs(T)(T x) if(isSigned!(T))
{
static if(isFloatingPoint!(T))
{
static if(is(T == f32))
{
Union32 f = { f: x };
}
else
{
Union64 f = { f: x };
}
f.u &= cast(typeof(f.u))-1 / 2;
x = f.f;
}
else
{
x = x >= 0 ? x : -x;
}
return x;
}
T
Ceil(T)(T x) if(is(T == f32) || is(T == f64))
{
if(!isNaN(x) && !isInfinity(x) && x != 0.0)
{
x = _Floor(x+1.0);
}
return x;
}
T
Floor(T)(T x) if(is(T == f32) || is(T == f64))
{
if(!isNaN(x) && !isInfinity(x) && x != 0.0)
{
x = _Floor(x);
}
return x;
}
T
_Floor(T)(const T x) @trusted pure nothrow @nogc
{
alias F = floatTraits!(T);
// Take care not to trigger library calls from the compiler,
// while ensuring that we don't get defeated by some optimizers.
union floatBits
{
T rv;
ushort[T.sizeof/2] vu;
// Other kinds of extractors for real formats.
static if (F.realFormat == RealFormat.ieeeSingle)
uint vi;
else static if (F.realFormat == RealFormat.ieeeDouble)
ulong vi;
}
floatBits y = void;
y.rv = x;
// Find the exponent (power of 2)
// Do this by shifting the raw value so that the exponent lies in the low bits,
// then mask out the sign bit, and subtract the bias.
static if (F.realFormat == RealFormat.ieeeSingle)
{
int exp = ((y.vi >> (T.mant_dig - 1)) & 0xff) - 0x7f;
enum mantissa_mask = F.MANTISSAMASK_INT;
enum sign_shift = 31;
}
else static if (F.realFormat == RealFormat.ieeeDouble)
{
long exp = ((y.vi >> (T.mant_dig - 1)) & 0x7ff) - 0x3ff;
enum mantissa_mask = F.MANTISSAMASK_LONG;
enum sign_shift = 63;
}
else static if (F.realFormat == RealFormat.ieeeExtended ||
F.realFormat == RealFormat.ieeeExtended53)
{
int exp = (y.vu[F.EXPPOS_SHORT] & 0x7fff) - 0x3fff;
version (LittleEndian)
int pos = 0;
else
int pos = 4;
}
else static if (F.realFormat == RealFormat.ieeeQuadruple)
{
int exp = (y.vu[F.EXPPOS_SHORT] & 0x7fff) - 0x3fff;
version (LittleEndian)
int pos = 0;
else
int pos = 7;
}
else
static assert(false, "Not implemented for this architecture");
if (exp < 0)
{
if (x < 0.0)
return -1.0;
else
return 0.0;
}
static if (F.realFormat == RealFormat.ieeeSingle ||
F.realFormat == RealFormat.ieeeDouble)
{
if (exp < (T.mant_dig - 1))
{
// Clear all bits representing the fraction part.
// Note: the fraction mask represents the floating point number 0.999999...
// i.e: `2.0 ^^ (exp - T.mant_dig + 1) * (fraction_mask + 1) == 1.0`
const fraction_mask = mantissa_mask >> exp;
if ((y.vi & fraction_mask) != 0)
{
// If 'x' is negative, then first substract (1.0 - T.epsilon) from the value.
if (y.vi >> sign_shift)
y.vi += fraction_mask;
y.vi &= ~fraction_mask;
}
}
}
else
{
static if (F.realFormat == RealFormat.ieeeExtended53)
exp = (T.mant_dig + 11 - 1) - exp; // mant_dig is really 64
else
exp = (T.mant_dig - 1) - exp;
// Zero 16 bits at a time.
while (exp >= 16)
{
version (LittleEndian)
y.vu[pos++] = 0;
else
y.vu[pos--] = 0;
exp -= 16;
}
// Clear the remaining bits.
if (exp > 0)
y.vu[pos] &= 0xffff ^ ((1 << exp) - 1);
if ((x < 0.0) && (x != y.rv))
y.rv -= 1.0;
}
return y.rv;
}
T
_MinOrMaxInner(T, bool max, Args...)(T first, Args args)
{
T value = first;
static foreach(i; 0 .. args.length)
{
static if(max) if(cast(T)(args[i]) > value) value = cast(T)(args[i]);
static if(!max) if(cast(T)(args[i]) < value) value = cast(T)(args[i]);
}
return value;
}
T
Max(T, Args...)(T first, Args args)
{
return _MinOrMaxInner!(T, true, Args)(first, args);
}
T
Min(T, Args...)(T first, Args args)
{
return _MinOrMaxInner!(T, false, Args)(first, args);
}
T
RoundUp(T)(T a, T b)
{
a += b - 1;
a -= a%b;
return a;
}
T AlignPow2(T)(T v, T a)
{
return (v + a - 1) & ~(a - 1);
}
f32 Radians(f32 deg)
{
return deg * (PI / 180.0);
}
struct Vector(T, int N)
{
static assert(N > 1 && N <= 4);
enum _N = N;
alias T _T;
union
{
T[N] v = 0;
struct
{
T x;
alias x r;
T y;
alias y g;
static if(N > 2)
{
T z;
alias z b;
}
static if(N > 3)
{
T w;
alias w a;
}
};
}
this(Vec3, f32)(Vec3 v3, f32 f) if(N == 4 && is(T: f32))
{
x = v3.x;
y = v3.y;
z = v3.z;
w = f;
}
this(Arr)(Arr arr) if(is(Arr: typeof(v)))
{
this.v = arr;
}
this(Args...)(Args args)
{
static if(args.length == 1)
{
enum bool is_array = isArray!(Args[0]);
enum bool is_assignable = isAssignable!(T, Args[0]);
enum bool types_are_numeric = TypesAreNumeric!(T, Args[0]);
static if(is_array || is_assignable || types_are_numeric)
{
opAssign!(Args[0])(args[0]);
}
else static assert(false, "Invalid Vector constructor");
}
else static if(args.length == N)
{
static foreach(i; 0 .. N)
{
v[i] = cast(T)args[i];
}
}
else static if(args.length == 2 && N == 4)
{
v[0] = args[0];
v[1] = args[0];
v[2] = args[0];
v[3] = args[1];
}
else
{
static assert(false, "Invalid Vector constructor");
}
}
ref Vector opAssign(U)(U x) if(is(U: T) || isAssignable!(T, U) || TypesAreNumeric!(T, U))
{
v[] = cast(T)x;
return this;
}
ref Vector opAssign(U)(U u) if(isStaticArray!(U) && U.length == N && isAssignable!(T, typeof(*U.init.ptr)))
{
static foreach(i; 0 .. N)
{
v[i] = cast(T)u[i];
}
return this;
}
ref Vector opAssign(U)(U u) if(is(U : Vector))
{
v[] = u.v[];
return this;
}
inout(T)* ptr() inout @property
{
return v.ptr;
}
bool opEquals(U)(U other) if(is(U: Vector!(T, N)))
{
bool result = true;
foreach(i; 0 .. N)
{
if(Abs(v[i] - other.v[i]) > 0.0000009)
{
result = false;
break;
}
}
return result;
}
int opDollar()
{
return N;
}
T[] opSlice()
{
return v[];
}
Vector opUnary(string op)() if(op == "+" || op == "-" || op == "~" || op == "!")
{
Vector res;
static foreach(i; 0 .. N)
{
mixin("res.v[", i, "] = " ~ op ~ "v[", i, "];\n");
}
return res;
}
ref Vector opOpAssign(string op, U)(U value) if(is(U: Vector))
{
static foreach(i; 0 .. N)
{
mixin("v[", i, "] " ~op~ "= value.v[", i, "];");
}
return this;
}
ref Vector opOpAssign(string op, U)(U value) if(IsConvertible!(U))
{
Vector conv = value;
return opOpAssign!(op)(conv);
}
@property auto opDispatch(string op, U = void)() if(ValidSwizzle!(op) && op.length <= 4)
{
Vector!(T, op.length) result;
enum index_tuple = SwizzleTuple!(op);
static foreach(i, index; index_tuple)
{
result.v[i] = v[index];
}
return result;
}
@property void opDispatch(string op, U)(U x) if((op.length > 1) && ValidUniqueSwizzle!(op) && is(typeof(Vector!(T, op.length)(x))))
{
Vector!(T, op.length) conv = x;
enum index_tuple = SwizzleTuple!(op);
static foreach(i, index; index_tuple)
{
v[index] = conv[i];
}
}
Vector
opBinaryRight(string op, U)(U operand) if(IsConvertible!(U) && (op == "*" || op == "+" || op == "-" || op == "/"))
{
Vector res;
static foreach(i; 0 .. N)
{
mixin("res.v[", i, "] = v[", i, "] " ~op~ " operand;");
}
return res;
}
static if(N == 4)
{
Vector opBinary(string op, U)(U operand) if((is(U: Vector!(f32, 4)) && is(T: f32)) && (op == "*" || op == "+" || op == "-" || op == "/"))
{
Vector result;
version(X86_64)
{
f32* l = &x;
f32* r = &operand.x;
f32* res = &result.x;
asm
{
mov R8, l;
mov R9, r;
mov R10, res;
movups XMM0, x.offsetof[R8];
movups XMM1, operand.x.offsetof[R9];
}
static if(op == "*") asm { mulps XMM0, XMM1; }
else static if(op == "-") asm { subps XMM0, XMM1; }
else static if(op == "+") asm { addps XMM0, XMM1; }
else static if(op == "/") asm { divps XMM0, XMM1; }
asm
{
movups result.x.offsetof[R10], XMM0;
}
}
else
{
static foreach(i; 0 .. N)
{
result.v[i] = mixin("this.v[", i, "] " ~op~ " operand.v[", i, "]");
}
}
return result;
}
Vector opBinary(string op, U)(U operand) if(IsConvertible!(U) && (op == "*" || op == "+" || op == "-" || op == "/"))
{
Vector result;
version(X86_64)
{
Vector other = operand;
f32* l = &x;
f32* r = &other.x;
f32* res = &result.x;
asm
{
mov R8, l;
mov R9, r;
mov R10, res;
movups XMM0, x.offsetof[R8];
movups XMM1, other.x.offsetof[R9];
}
static if(op == "*") asm { mulps XMM0, XMM1; }
else static if(op == "-") asm { subps XMM0, XMM1; }
else static if(op == "+") asm { addps XMM0, XMM1; }
else static if(op == "/") asm { divps XMM0, XMM1; }
asm
{
movups result.x.offsetof[R10], XMM0;
}
}
else
{
static foreach(i; 0 .. N)
{
mixin("result.v[", i, "] = this.v[", i, "] " ~op~ " operand;");
}
}
return result;
}
}
else
{
Vector opBinary(string op, U)(U operand) if(is(U: Vector) && U._N == N && (op == "*" || op == "+" || op == "-" || op == "/"))
{
Vector res;
static foreach(i; 0 .. N)
{
mixin("res.v[", i, "] = v[", i, "] " ~ op ~ " operand.v[", i, "];");
}
return res;
}
Vector opBinary(string op, U)(U operand) if(IsConvertible!(U) && (op == "*" || op == "+" || op == "-" || op == "/"))
{
Vector res;
Vector other = operand;
static foreach(i; 0 .. N)
{
mixin("res.v[", i, "] = v[", i, "] " ~ op ~ " other.v[", i, "];");
}
return res;
}
}
ref T opIndex(size_t i)
{
return v[i];
}
T opIndexAssign(U : T)(U x, size_t i)
{
return v[i] = x;
}
U opCast(U)() if(IsVector!(U) && (U._N == _N))
{
U result;
static foreach(i; 0 .. N)
{
rev.v[i] = cast(U._T)v[i];
}
return result;
}
template IsConvertible(U)
{
enum bool IsConvertible = !is(U == Vector) && __traits(compiles, { U u; T t = cast(T)u; } );
}
template TypesAreNumeric(U, V)
{
enum bool TypesAreNumeric = isNumeric!(U) && isNumeric!(V);
}
template SwizzleIndex(char c)
{
static if((c == 'x' || c == 'r') && N > 0)
enum SwizzleIndex = 0;
else static if((c == 'y' || c == 'g') && N > 1)
enum SwizzleIndex = 1;
else static if((c == 'z' || c == 'b') && N > 2)
enum SwizzleIndex = 2;
else static if((c == 'w' || c == 'a') && N > 3)
enum SwizzleIndex = 3;
else
enum SwizzleIndex = -1;
}
template SwizzleSet(char c)
{
static if(c == 'x' || c == 'y' || c == 'z' || c == 'w')
enum SwizzleSet = 0;
else static if(c == 'r' || c == 'g' || c == 'b' || c == 'a')
enum SwizzleSet = 1;
else
enum SwizzleSet = -1;
}
template SwizzleTuple(string op)
{
enum op_length = op.length;
static if(op.length == 0)
enum SwizzleTuple = [];
else
enum SwizzleTuple = [ SwizzleIndex!(op[0])] ~ SwizzleTuple!(op[1 .. op.length]);
}
template SearchString(char c, string s)
{
static if(s.length == 0)
{
enum bool result = false;
}
else
{
enum string tail = s[1 .. s.length];
enum bool result = (s[0] == c) || SearchString!(c, tail).result;
}
}
template UniqueChars(string s)
{
static if(s.length == 1)
{
enum bool result = true;
}
else
{
enum tail = s[1 .. s.length];
enum bool result = !(SearchString!(s[0], tail).result) && UniqueChars!(tail).result;
}
}
template ValidSwizzle(string op, int last_swizzle = -1)
{
static if(op.length == 0)
{
enum bool ValidSwizzle = true;
}
else
{
enum length = op.length;
enum int swizzle_set = SwizzleSet!(op[0]);
enum bool valid_swizzle_set = (last_swizzle == -1 || (swizzle_set == last_swizzle));
enum bool ValidSwizzle = (SwizzleIndex!(op[0]) != -1) && valid_swizzle_set && ValidSwizzle!(op[1 .. length], swizzle_set);
}
}
template ValidUniqueSwizzle(string op)
{
static if(ValidSwizzle!(op))
{
enum ValidUniqueSwizzle = UniqueChars!(op).result;
}
else
{
enum ValidUniqueSwizzle = false;
}
}
}
// TODO: fix alignment for non mat4s
align(16) struct Matrix(T, int D)
{
static assert(D > 0 && D <= 4);
alias Vector!(T, D) MatrixVec;
alias T _T;
alias T[4] Row;
enum N = D*D;
enum _D = D;
union
{
T[N] v = 0;
Row[D] rows;
MatrixVec[D] vec;
}
// TODO: setup
this(U...)(U values)
{
static if((U.length == N) && allSatisfy!(IsTypeAssignable, U))
{
static foreach(i, x; values)
{
v[i] = x;
}
}
else static if((U.length == 1) && (isAssignable!(U[0])) && (!is(U[0] : Matrix)))
{
v[] = values[0];
}
else static assert(false, "Cannot construct matrix with provided parameters");
}
this(U)(T x)
{
static foreach(i; 0 .. N)
{
v[i] = x;
}
}
this(U)(U x) if(is(U: typeof(this.v)))
{
this.v = x;
}
@property inout(T)* ptr() inout
{
return v.ptr;
}
ref Matrix opAssign(U : T)(U x)
{
static foreach(i; 0 .. N)
{
v[i] = x;
}
return this;
}
ref Matrix opAssign(U : Matrix)(U x)
{
static foreach(i; 0 .. N)
{
v[i] = x.v[i];
}
return this;
}
ref Matrix opAssign(U)(U x) if(IsMatrixInstantiation!(U) && is(U._T : _T) && (!is(U: Matrix) && (U.N != N)))
{
static foreach(i; 0 .. N)
{
v[i] = x.v[i];
}
return this;
}
ref T opIndex(size_t i, size_t j)
{
return v[(i * D) + j];
}
T opIndexAssign(U: T)(U x, size_t i, size_t j)
{
return v[(i * D) + j] = x;
}
bool opEquals(U)(U other) if(is(U: Matrix!(T, D)))
{
bool result = true;
static foreach(i; 0 .. N)
{
if(Abs(this.v[i] - other.v[i]) > 0.0000009)
{
result = false;
}
}
return result;
}
Matrix opBinary(string op)(T scalar) if(op == "*")
{
Matrix result;
static foreach(i; 0 .. N)
{
result.v[i] = v[i] * scalar;
}
return result;
}
static if(D == 4)
{
Vec4 opBinary(string op)(Vec4 x) if(is(T: f32) && (op == "*"))
{
Vec4 result = 0.0;
// TODO: optimize
Mat4MulVec4(&this, &x, &result);
return result;
}
Matrix opBinary(string op, U)(U x) if(is(U: Matrix!(T, D)) && is(T: f32) && D == 4 && (op == "*"))
{
Matrix result;
MatZero(&result);
Mat4Mul(&this, &x, &result);
return result;
}
}
template IsTypeAssignable(U)
{
enum bool IsTypeAssignable = std.traits.isAssignable!(T, U);
}
template IsMatrixInstantiation(U)
{
static void IsMatrix(T, int D)(Matrix!(T, D) x) {}
enum bool IsMatrixInstantiation = is(typeof(IsMatrix(U.init)));
}
}
struct Quat
{
union
{
f32[4] v = 0;
Vec4 vec;
struct
{
f32 x;
f32 y;
f32 z;
f32 w;
};
};
this(f32 w, f32 x, f32 y, f32 z)
{
vec.x = x;
vec.y = y;
vec.z = z;
vec.w = w;
}
U opCast(U)() if(is(U: Mat4))
{
Mat4 result;
// TODO: optimize
Mat4FromQuat(&this, &result);
return result;
}
Quat opBinary(string op, U)(U r) if(op == "*" && is(U: Quat))
{
Quat q;
q.x = this.w * r.x + this.x * r.w + this.y * r.z - this.z * r.y;
q.y = this.w * r.y - this.x * r.z + this.y * r.w + this.z * r.x;
q.z = this.w * r.z + this.x * r.y - this.y * r.x + this.z * r.w;
q.w = this.w * r.w - this.x * r.x - this.y * r.y - this.z * r.z;
return q;
}
}
void
Mat4Mul(Mat4* l, Mat4* r, Mat4* result)
{
version(X86_64)
{
asm @trusted
{
mov R8, l;
mov R9, r;
mov R10, result;
movups XMM0, [R8];
movups XMM1, [R9+00];
movups XMM2, [R9+16];
movups XMM3, [R9+32];
movups XMM4, [R9+48];
movups XMM6, XMM1;
shufps XMM6, XMM6, 0; // XMM5 = vec.xxxx;
mulps XMM6, XMM0; // XMM6 = col1;
movups XMM7, XMM2;
shufps XMM7, XMM7, 0;
mulps XMM7, XMM0; // XMM7 = col2;
movups XMM8, XMM3;
shufps XMM8, XMM8, 0;
mulps XMM8, XMM0; // XMM8 = col3;
movups XMM9, XMM4;
shufps XMM9, XMM9, 0;
mulps XMM9, XMM0; // XMM9 = col4;
movups XMM0, [R8+16];
movups XMM5, XMM1;
shufps XMM5, XMM5, 85; // XMM5 = vec.yyyy;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups XMM0, [R8+32];
movups XMM5, XMM1;
shufps XMM5, XMM5, 170; // XMM5 = vec.zzzz;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups XMM0, [R8+48];
movups XMM5, XMM1;
shufps XMM5, XMM5, 255; // XMM5 = vec.wwww;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups [R10+00], XMM6;
movups [R10+16], XMM7;
movups [R10+32], XMM8;
movups [R10+48], XMM9;
}
}
}
void
Vec4MulAddScale(Vec4* a, f32 s, Vec4* dst) // glm_vec4_muladds
{
(*dst) += (*a)*s;
}
void
Mat4Inv(Mat4* mat, Mat4* dst) // glm_mat4_inv
{
f32 a = mat.rows[0][0], b = mat.rows[0][1], c = mat.rows[0][2], d = mat.rows[0][3],
e = mat.rows[1][0], f = mat.rows[1][1], g = mat.rows[1][2], h = mat.rows[1][3],
i = mat.rows[2][0], j = mat.rows[2][1], k = mat.rows[2][2], l = mat.rows[2][3],
m = mat.rows[3][0], n = mat.rows[3][1], o = mat.rows[3][2], p = mat.rows[3][3],
c1 = k * p - l * o, c2 = c * h - d * g, c3 = i * p - l * m,
c4 = a * h - d * e, c5 = j * p - l * n, c6 = b * h - d * f,
c7 = i * n - j * m, c8 = a * f - b * e, c9 = j * o - k * n,
c10 = b * g - c * f, c11 = i * o - k * m, c12 = a * g - c * e,
idt = 1.0f/(c8*c1+c4*c9+c10*c3+c2*c7-c12*c5-c6*c11), ndt = -idt;
dst.rows[0][0] = (f * c1 - g * c5 + h * c9) * idt;
dst.rows[0][1] = (b * c1 - c * c5 + d * c9) * ndt;
dst.rows[0][2] = (n * c2 - o * c6 + p * c10) * idt;
dst.rows[0][3] = (j * c2 - k * c6 + l * c10) * ndt;
dst.rows[1][0] = (e * c1 - g * c3 + h * c11) * ndt;
dst.rows[1][1] = (a * c1 - c * c3 + d * c11) * idt;
dst.rows[1][2] = (m * c2 - o * c4 + p * c12) * ndt;
dst.rows[1][3] = (i * c2 - k * c4 + l * c12) * idt;
dst.rows[2][0] = (e * c5 - f * c3 + h * c7) * idt;
dst.rows[2][1] = (a * c5 - b * c3 + d * c7) * ndt;
dst.rows[2][2] = (m * c6 - n * c4 + p * c8) * idt;
dst.rows[2][3] = (i * c6 - j * c4 + l * c8) * ndt;
dst.rows[3][0] = (e * c9 - f * c11 + g * c7) * ndt;
dst.rows[3][1] = (a * c9 - b * c11 + c * c7) * idt;
dst.rows[3][2] = (m * c10 - n * c12 + o * c8) * ndt;
dst.rows[3][3] = (i * c10 - j * c12 + k * c8) * idt;
}
void
Mat4MulVec4(Mat4* m, Vec4* v, Vec4* dst) // glm_mat4_mulv
{
dst.v[0] = m.rows[0][0] * v.v[0] + m.rows[1][0] * v.v[1] + m.rows[2][0] * v.v[2] + m.rows[3][0] * v.v[3];
dst.v[1] = m.rows[0][1] * v.v[0] + m.rows[1][1] * v.v[1] + m.rows[2][1] * v.v[2] + m.rows[3][1] * v.v[3];
dst.v[2] = m.rows[0][2] * v.v[0] + m.rows[1][2] * v.v[1] + m.rows[2][2] * v.v[2] + m.rows[3][2] * v.v[3];
dst.v[3] = m.rows[0][3] * v.v[0] + m.rows[1][3] * v.v[1] + m.rows[2][3] * v.v[2] + m.rows[3][3] * v.v[3];
}
void
Mat4FromQuat(Quat* q, Mat4* dst) // glm_quat_mat4
{
f32 w, x, y, z,
xx, yy, zz,
xy, yz, xz,
wx, wy, wz, norm, s;
norm = Norm(q);
s = norm > 0.0f ? 2.0f/norm : 0.0f;
x = q.v[0];
y = q.v[1];
z = q.v[2];
w = q.v[3];
xx = s * x * x; xy = s * x * y; wx = s * w * x;
yy = s * y * y; yz = s * y * z; wy = s * w * y;
zz = s * z * z; xz = s * x * z; wz = s * w * z;
dst.rows[0][0] = 1.0f - yy - zz;
dst.rows[1][1] = 1.0f - xx - zz;
dst.rows[2][2] = 1.0f - xx - yy;
dst.rows[0][1] = xy + wz;
dst.rows[1][2] = yz + wx;
dst.rows[2][0] = xz + wy;
dst.rows[1][0] = xy - wz;
dst.rows[2][1] = yz - wx;
dst.rows[0][2] = xz - wy;
dst.rows[0][3] = 0.0f;
dst.rows[1][3] = 0.0f;
dst.rows[2][3] = 0.0f;
dst.rows[3][0] = 0.0f;
dst.rows[3][1] = 0.0f;
dst.rows[3][2] = 0.0f;
dst.rows[3][3] = 1.0f;
}
pragma(inline) Mat4
Mat4Identity()
{
return Mat4(
1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0
);
}
pragma(inline) void
Mat4Identity(Mat4* mat)
{
MatZero(mat);
(*mat)[0, 0] = 1.0;
(*mat)[1, 1] = 1.0;
(*mat)[2, 2] = 1.0;
(*mat)[3, 3] = 1.0;
}
pragma(inline) Mat3
Mat3Identity()
{
return Mat3(
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0
);
}
pragma(inline) Mat2
Mat2Identity()
{
return Mat2(
1.0, 0.0,
0.0, 1.0
);
}
pragma(inline) Quat
QuatFromAxis(f32 angle, Vec3 axis) // glm_quatv
{
Quat q;
f32 a = angle * 0.5f;
f32 c = cos(a);
f32 s = sin(a);
Vec3 k = Normalize(axis);
q.x = s * k.x;
q.y = s * k.y;
q.z = s * k.z;
q.w = c;
return q;
}
pragma(inline) f32
Dot(Vec2* l, Vec2* r)
{
return l.x * r.x + l.y * r.y;
}
pragma(inline) f32
Dot(Vec3* l, Vec3* r)
{
return l.x * r.x + l.y * r.y + l.z * r.z;
}
pragma(inline) f32
Dot(Vec4* l, Vec4* r)
{
// TODO: SIMD this
return l.x * r.x + l.y * r.y + l.z * r.z + l.w * r.w;
}
pragma(inline) f32
Norm(Quat* q)
{
return Norm(&q.vec);
}
pragma(inline) f32
Norm(Vec3* v)
{
return sqrt(Dot(v, v));
}
pragma(inline) f32
Norm(Vec4* v)
{
// TODO: SIMD this
return sqrt(Dot(v, v));
}
pragma(inline) void
Normalize(T)(T* vec) if(is(T: Vec2) || is(T: Vec3) || is(T: Vec4))
{
f32 length = Norm(vec);
if(length < f32.epsilon)
{
static foreach(i; 0 .. vec._N)
{
vec.v[i] = 0.0;
}
}
else
{
static foreach(i; 0 .. vec._N)
{
vec.v[i] *= (1.0 / length);
}
}
}
pragma(inline) T
Normalize(T)(T vec) if(is(T: Vec2) || is(T: Vec3) || is(T: Vec4))
{
Normalize(&vec);
return vec;
}
pragma(inline) Quat
Normalize(Quat q)
{
f32 dot = Norm(&q.vec);
if(dot <= 0.0)
{
q = Quat(1.0, 0.0, 0.0, 0.0);
}
q.vec *= 1.0 / sqrt(dot);
return q;
}
pragma(inline) Mat4
Perspective(bool flip_y = true)(f32 fov, f32 aspect, f32 near, f32 far) // glm_perspective_lh_zo
{
Mat4 mat;
f32 f = 1.0f / tanf(fovy * 0.5f);
f32 fn = 1.0f / (nearZ - farZ);
mat.rows[0][0] = f / aspect;
mat.rows[1][1] = f;
mat.rows[2][2] = -far * fn;
mat.rows[2][3] = 1.0f;
mat.rows[3][2] = near * far * fn;
static if(flip_y)
{
mat[1, 1] *= -1.0;
}
return res;
}
void
Ortho(Mat4* mat, f32 left, f32 bottom, f32 right, f32 top, f32 near, f32 far) // glm_ortho_lh_zo
{
MatZero(mat);
f32 rl = 1.0f / (right - left);
f32 tb = 1.0f / (top - bottom);
f32 fn =-1.0f / (far - near);
mat.rows[0][0] = 2.0f * rl;
mat.rows[1][1] = 2.0f * tb;
mat.rows[2][2] =-fn;
mat.rows[3][0] =-(right + left) * rl;
mat.rows[3][1] =-(top + bottom) * tb;
mat.rows[3][2] = near * fn;
mat.rows[3][3] = 1.0f;
}
Mat4
Ortho(f32 left, f32 bottom, f32 right, f32 top, f32 near, f32 far)
{
Mat4 mat;
Ortho(&mat, left, bottom, right, top, near, far);
return mat;
}
T
Mix(T, U)(T a, T b, U t) if((IsVector!(T) || is(T == f32) || is(T == f64)) && (is(U == f32) || is(U == f64)))
{
return a + t * (b - a);
}
pragma(inline) Vec3
Rotate(Quat q, Vec3 vec)
{
Quat p = Normalize(q);
Vec3 i = p.vec.xyz;
f32 r = p.vec.w;
Vec3 v1 = i * (2.0 * Dot(&i, &vec));
Vec3 v2 = i * (r * r - Dot(&i, &i));
v1 += v2;
v2 = i * vec;
v2 = v2 * (2.0 * r);
return v1 + v2;
}
pragma(inline) Mat4
LookAt(Vec3 eye, Vec3 center, Vec3 up) // glm_lookat_lh
{
Mat4 result;
Vec3 f = Normalize(center - eye);
Vec3 s = CrossN(up, f);
Vec3 u = Cross(f, s);
result.rows[0][0] = s[0];
result.rows[0][1] = u[0];
result.rows[0][2] = f[0];
result.rows[1][0] = s[1];
result.rows[1][1] = u[1];
result.rows[1][2] = f[1];
result.rows[2][0] = s[2];
result.rows[2][1] = u[2];
result.rows[2][2] = f[2];
result.rows[3][0] =-Dot(&s, &eye);
result.rows[3][1] =-Dot(&u, &eye);
result.rows[3][2] =-Dot(&f, &eye);
result.rows[0][3] = result.rows[1][3] = result.rows[2][3] = 0.0f;
result.rows[3][3] = 1.0f;
return result;
}
pragma(inline) Mat4
Look(Vec3 eye, Vec3 dir, Vec3 up)
{
return LookAt(eye, eye + dir, up);
}
pragma(inline) Vec3
Cross(Vec3 a, Vec3 b)
{
Vec3 c;
Cross(a, b, &c);
return c;
}
pragma(inline) Vec3
CrossN(Vec3 a, Vec3 b)
{
Vec3 c;
Cross(a, b, &c);
Normalize(&c);
return c;
}
pragma(inline) void
CrossN(Vec3 a, Vec3 b, Vec3* dst)
{
Cross(a, b, dst);
Normalize(dst);
}
pragma(inline) void
Cross(Vec3 a, Vec3 b, Vec3* dst)
{
dst.x = a.y * b.z - a.z * b.y;
dst.y = a.z * b.x - a.x * b.z;
dst.z = a.x * b.y - a.y * b.x;
}
pragma(inline) void
MatZero(Mat4* mat)
{
mat.v[] = 0.0;
}
pragma(inline) void
Translate(Mat4* mat, Vec3 vec)
{
Vec4MulAddScale(&mat.vec[0], vec.x, &mat.vec[3]);
Vec4MulAddScale(&mat.vec[1], vec.y, &mat.vec[3]);
Vec4MulAddScale(&mat.vec[2], vec.z, &mat.vec[3]);
}
pragma(inline) Mat4
Inverse(Mat4 mat)
{
Mat4 res;
MatZero(&res);
// TODO: optimize
Mat4Inv(&mat, &res);
return res;
}
void
Transform(Vec3* vec, Mat4* mat, f32 last = 1.0)
{
Vec4 tvec = Vec4(*vec, last);
tvec = *mat * tvec;
*vec = tvec.xyz;
}
pragma(inline) T
Lerp(T)(T x, T y, T a)
{
return x * (1 - a) + y * a;
}
pragma(inline) T
InverseLerp(T)(T v, T min, T max)
{
return (v - min) / (max - min);
}
pragma(inline) T
Remap(T)(T v, T in_min, T in_max, T out_min, T out_max)
{
T t = InverseLerp(v, in_min, in_max);
return Lerp(out_min, out_max, t);
}
template
IsVector(T)
{
// import std.traits : isInstanceOf;
enum bool IsVector = isInstanceOf!(Vector, T);
}
T
Clamp(T)(T value, T min, T max) if(isNumeric!(T))
{
assert(min <= max, "Clamp assertion failed: min is higher than max");
T x = value < min ? min : value;
return x > max ? max : x;
}
T
Clamp(T)(T value, T min, T max) if(IsVector!(T))
{
static foreach(i; 0 .. value._N)
{
value.v[i] = Clamp(value.v[i], min.v[i], max.v[i]);
}
return value;
}
version(DLIB_TEST) unittest
{
enum FLOAT_MAX = f32.max;
enum FLOAT_MIN = -f32.max;
/*
void PrintMatrix(Mat4 mat)
{
foreach(i; 0 .. mat.N)
{
if(i % 4 == 0)
{
printf("\n");
}
printf("%.08f ", mat.v[i]);
}
printf("\n");
}
srand(cast(u32)time(null));
f32 RandomFloat()
{
return cast(f32)(rand())/cast(f32)(RAND_MAX + 1.0);
}
{ // Vec2 arithmetic
Vec2 v1 = Vec2(5.0, 10.0);
Vec2 v2 = Vec2(2.5, 5.0);
Vec2 result = v1 * v2;
assert(result == Vec2(12.5, 50.0), "Vec2 mul failure");
result = v1 + v2;
assert(result == Vec2(7.5, 15.0), "Vec2 add failure");
result = v1 - v2;
assert(result == Vec2(2.5, 5.0), "Vec2 sub failure");
result = v1 / v2;
assert(result == Vec2(2.0), "Vec2 div failure");
}
{ // Vec3 Arithmetic
Vec3 v1 = Vec3(5.0, 10.0, 15.0);
Vec3 v2 = Vec3(2.5, 5.0, 7.5);
Vec3 result = v1 * v2;
assert(result == Vec3(12.5, 50.0, 112.5), "Vec3 mul failure");
result = v1 + v2;
assert(result == Vec3(7.5, 15.0, 22.5), "Vec3 add failure");
result = v1 - v2;
assert(result == Vec3(2.5, 5.0, 7.5), "Vec3 sub failure");
result = v1 / v2;
assert(result == Vec3(2.0), "Vec3 div failure");
}
{ // Vec3 Arithmetic
Vec4 v1 = Vec4(5.0, 10.0, 15.0, 20.0);
Vec4 v2 = Vec4(2.5, 5.0, 7.5, 10.0);
Vec4 result = v1 * v2;
assert(result == Vec4(12.5, 50.0, 112.5, 200.0), "Vec4 mul failure");
result = v1 + v2;
assert(result == Vec4(7.5, 15.0, 22.5, 30.0), "Vec4 add failure");
result = v1 - v2;
assert(result == Vec4(2.5, 5.0, 7.5, 10.0), "Vec4 sub failure");
result = v1 / v2;
assert(result == Vec4(2.0), "Vec4 div failure");
}
{ // Mat4 Arithmetic
Mat4 m1 = RandomMat4();
Mat4 m2 = RandomMat4();
Mat4 m3 = m1 * m2;
Mat4 m4;
MatZero(&m4);
for(u32 i = 0; i < 4; i += 1)
{
for(u32 j = 0; j < 4; j += 1)
{
for(u32 k = 0; k < 4; k += 1)
{
m4.rows[i][j] += m1.rows[k][j] * m2.rows[i][k];
}
}
}
assert(m3 == m4, "Mat4 mul failure");
}
{ // Translate
Mat4 mat = Mat4Identity();
Vec4 vec = Vec4(1.0, 2.0, 3.0, 1.0);
Translate(&mat, Vec3(13.0, 11.0, 7.0));
Vec4 result = mat * vec;
assert(result == Vec4(14.0, 13.0, 10.0, 1.0));
mat = Mat4Identity();
Translate(&mat, Vec3(1.0, -1.0, -5.0));
result = mat * result;
assert(result == Vec4(15.0, 12.0, 5.0, 1.0));
}
{ // Identity
Mat4 identity = Mat4(
1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0
);
Mat4 mat = Mat4Identity();
assert(identity == mat);
}
{ // Inverse
foreach(i; 0 .. 1000)
{
Mat4 m1 = RandomMat4();
Mat4 m1_inv = Inverse(m1);
Mat4 m1_reinv = Inverse(m1_inv);
assert(m1 == m1_reinv, "Inverse test failed");
}
}
{ // Cross
Vec3 v1 = Vec3(2.0, -3.0, 4.0);
Vec3 v2 = Vec3(12.0, -31.0, 43.0);
Vec3 v3 = Cross(v1, v2);
Vec3 v4 = Vec3(
v1.y * v2.z - v1.z * v2.y,
v1.z * v2.x - v1.x * v2.z,
v1.x * v2.y - v1.y * v2.x
);
assert(v3 == v4, "Vec3 Cross failure");
v3 = CrossN(v1, v2);
Normalize(&v4);
assert(v3 == v4, "Vec3 CrossN failure");
}
*/
{ // Initializers
u32[4] arr = [1, 2, 3, 4];
Vec4 vec = Vec4(arr);
assert(vec == Vec4(1.0, 2.0, 3.0, 4.0));
Mat2 mat = Mat2(1.0, 0.0, 0.0, 1.0);
f32[16] floats = 22.0;
Mat4 mat4 = Mat4(floats);
assert(mat4.v == floats);
Quat quat = Quat(1.0, 1.0, 1.0, 1.0);
Quat quat2;
}
{
Vec3 vec = Vec3(1.0, 2.0, 3.0);
Mat4 mat = Mat4(
1.0, 1.0, 1.0, 1.0,
2.0, 2.0, 2.0, 2.0,
3.0, 3.0, 3.0, 3.0,
4.0, 4.0, 4.0, 4.0
);
Transform(&vec, &mat, 1.0);
assert(vec == Vec3(18.0));
Vec4 low = Vec4(-5.0);
Vec4 high = Vec4(+5.0);
Vec4 test = Vec4(-500.0);
test = Clamp(test, low, high);
assert(test == low);
test = Vec4(+500.0);
test = Clamp(test, low, high);
assert(test == high);
}
{
Vec4 m0 = Vec4(0.0);
Vec4 m1 = Vec4(10.0);
assert(Mix(m0, m1, 0.0) == m0);
assert(Mix(m0, m1, 1.0) == m1);
assert(Mix(m0, m1, 0.5) == Vec4(5.0));
assert(Mix(m0, m1, 0.75) == Vec4(7.5));
void TestModify(Vec4 v)
{
v.r = 55;
}
}
{
u32 max = Max(50, 33, 123.0);
assert(max == 123);
}
{
Vec2 v0 = 50U;
assert(Abs(v0.x-50.0) < 0.0009 && Abs(v0.y-50.0) < 0.0009);
U8Vec2 v1 = U8Vec2(55U);
assert(v1.x == 55 && v1.y == 55);
}
{
assert(Ceil(10.5) == 11.0);
assert(Floor(100.33) == 100.0);
assert(Abs(-500) == 500);
assert(Abs(-500.0) == 500.0);
assert(Clamp(55, 10, 40) == 40);
assert(Clamp(-20, -5, 55) == -5);
}
}