fix asm mat4 multiply
This commit is contained in:
parent
a97309cb04
commit
f5fdd05336
79
math.d
79
math.d
@ -521,7 +521,7 @@ align(16) struct Matrix(T, int D)
|
||||
Matrix result;
|
||||
MatZero(&result);
|
||||
|
||||
glm_mat4_mul(glm_mat.ptr, x.glm_mat.ptr, result.glm_mat.ptr);
|
||||
Mat4MulASM(&this, &x, &result);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -583,21 +583,14 @@ struct Quat
|
||||
}
|
||||
}
|
||||
|
||||
Mat4
|
||||
Mat4MulASM(Mat4 l, Mat4 r)
|
||||
void
|
||||
Mat4MulASM(Mat4* l, Mat4* r, Mat4* result)
|
||||
{
|
||||
Mat4 result;
|
||||
|
||||
auto lp = &l;
|
||||
auto rp = &r;
|
||||
auto res = &result;
|
||||
|
||||
// TODO: fix this
|
||||
asm @trusted
|
||||
{
|
||||
mov R8, lp;
|
||||
mov R9, rp;
|
||||
mov R10, res;
|
||||
mov R8, l;
|
||||
mov R9, r;
|
||||
mov R10, result;
|
||||
|
||||
movups XMM0, [R8];
|
||||
movups XMM1, [R9+00];
|
||||
@ -607,93 +600,91 @@ Mat4MulASM(Mat4 l, Mat4 r)
|
||||
|
||||
movups XMM6, XMM1;
|
||||
shufps XMM6, XMM6, 0; // XMM5 = vec.xxxx;
|
||||
mulps XMM6, XMM0; // XMM6 = col1;
|
||||
mulps XMM6, XMM0; // XMM6 = col1;
|
||||
|
||||
movups XMM7, XMM2;
|
||||
shufps XMM7, XMM7, 0;
|
||||
mulps XMM7, XMM0; // XMM7 = col2;
|
||||
mulps XMM7, XMM0; // XMM7 = col2;
|
||||
|
||||
movups XMM8, XMM3;
|
||||
shufps XMM8, XMM8, 0;
|
||||
mulps XMM8, XMM0; // XMM8 = col3;
|
||||
mulps XMM8, XMM0; // XMM8 = col3;
|
||||
|
||||
movups XMM9, XMM3;
|
||||
movups XMM9, XMM4;
|
||||
shufps XMM9, XMM9, 0;
|
||||
mulps XMM9, XMM0; // XMM9 = col4;
|
||||
mulps XMM9, XMM0; // XMM9 = col4;
|
||||
|
||||
movups XMM0, [R8+16];
|
||||
|
||||
movups XMM5, XMM1;
|
||||
shufps XMM5, XMM5, 85; // XMM5 = vec.yyyy;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
|
||||
movups XMM5, XMM2;
|
||||
shufps XMM5, XMM5, 85;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
|
||||
movups XMM5, XMM3;
|
||||
shufps XMM5, XMM5, 85;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
|
||||
movups XMM5, XMM4;
|
||||
shufps XMM5, XMM5, 85;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
|
||||
movups XMM0, [R8+32];
|
||||
|
||||
movups XMM5, XMM1;
|
||||
shufps XMM5, XMM5, 170; // XMM5 = vec.zzzz;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
|
||||
movups XMM5, XMM2;
|
||||
shufps XMM5, XMM5, 170;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
|
||||
movups XMM5, XMM3;
|
||||
shufps XMM5, XMM5, 170;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
|
||||
movups XMM5, XMM4;
|
||||
shufps XMM5, XMM5, 170;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
|
||||
movups XMM0, [R8+48];
|
||||
|
||||
movups XMM5, XMM1;
|
||||
shufps XMM5, XMM5, 255; // XMM5 = vec.wwww;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM6, XMM5;
|
||||
|
||||
movups XMM5, XMM2;
|
||||
shufps XMM5, XMM5, 255;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM7, XMM5;
|
||||
|
||||
movups XMM5, XMM3;
|
||||
shufps XMM5, XMM5, 255;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM8, XMM5;
|
||||
|
||||
movups XMM5, XMM4;
|
||||
shufps XMM5, XMM5, 255;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
mulps XMM5, XMM0;
|
||||
addps XMM9, XMM5;
|
||||
|
||||
movups [R10+00], XMM6;
|
||||
movups [R10+16], XMM7;
|
||||
movups [R10+32], XMM8;
|
||||
movups [R10+48], XMM9;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pragma(inline) Mat4
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user