fix asm mat4 multiply

This commit is contained in:
Matthew 2025-10-25 13:21:31 +11:00
parent a97309cb04
commit f5fdd05336

79
math.d
View File

@ -521,7 +521,7 @@ align(16) struct Matrix(T, int D)
Matrix result;
MatZero(&result);
glm_mat4_mul(glm_mat.ptr, x.glm_mat.ptr, result.glm_mat.ptr);
Mat4MulASM(&this, &x, &result);
return result;
}
@ -583,21 +583,14 @@ struct Quat
}
}
Mat4
Mat4MulASM(Mat4 l, Mat4 r)
void
Mat4MulASM(Mat4* l, Mat4* r, Mat4* result)
{
Mat4 result;
auto lp = &l;
auto rp = &r;
auto res = &result;
// TODO: fix this
asm @trusted
{
mov R8, lp;
mov R9, rp;
mov R10, res;
mov R8, l;
mov R9, r;
mov R10, result;
movups XMM0, [R8];
movups XMM1, [R9+00];
@ -607,93 +600,91 @@ Mat4MulASM(Mat4 l, Mat4 r)
movups XMM6, XMM1;
shufps XMM6, XMM6, 0; // XMM5 = vec.xxxx;
mulps XMM6, XMM0; // XMM6 = col1;
mulps XMM6, XMM0; // XMM6 = col1;
movups XMM7, XMM2;
shufps XMM7, XMM7, 0;
mulps XMM7, XMM0; // XMM7 = col2;
mulps XMM7, XMM0; // XMM7 = col2;
movups XMM8, XMM3;
shufps XMM8, XMM8, 0;
mulps XMM8, XMM0; // XMM8 = col3;
mulps XMM8, XMM0; // XMM8 = col3;
movups XMM9, XMM3;
movups XMM9, XMM4;
shufps XMM9, XMM9, 0;
mulps XMM9, XMM0; // XMM9 = col4;
mulps XMM9, XMM0; // XMM9 = col4;
movups XMM0, [R8+16];
movups XMM5, XMM1;
shufps XMM5, XMM5, 85; // XMM5 = vec.yyyy;
mulps XMM5, XMM0;
addps XMM6, XMM5;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM7, XMM5;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM8, XMM5;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 85;
mulps XMM5, XMM0;
addps XMM9, XMM5;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups XMM0, [R8+32];
movups XMM5, XMM1;
shufps XMM5, XMM5, 170; // XMM5 = vec.zzzz;
mulps XMM5, XMM0;
addps XMM6, XMM5;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM7, XMM5;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM8, XMM5;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 170;
mulps XMM5, XMM0;
addps XMM9, XMM5;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups XMM0, [R8+48];
movups XMM5, XMM1;
shufps XMM5, XMM5, 255; // XMM5 = vec.wwww;
mulps XMM5, XMM0;
addps XMM6, XMM5;
mulps XMM5, XMM0;
addps XMM6, XMM5;
movups XMM5, XMM2;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM7, XMM5;
mulps XMM5, XMM0;
addps XMM7, XMM5;
movups XMM5, XMM3;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM8, XMM5;
mulps XMM5, XMM0;
addps XMM8, XMM5;
movups XMM5, XMM4;
shufps XMM5, XMM5, 255;
mulps XMM5, XMM0;
addps XMM9, XMM5;
mulps XMM5, XMM0;
addps XMM9, XMM5;
movups [R10+00], XMM6;
movups [R10+16], XMM7;
movups [R10+32], XMM8;
movups [R10+48], XMM9;
}
return result;
}
pragma(inline) Mat4