fix asm mat4 multiply
This commit is contained in:
parent
a97309cb04
commit
f5fdd05336
81
math.d
81
math.d
@ -521,7 +521,7 @@ align(16) struct Matrix(T, int D)
|
|||||||
Matrix result;
|
Matrix result;
|
||||||
MatZero(&result);
|
MatZero(&result);
|
||||||
|
|
||||||
glm_mat4_mul(glm_mat.ptr, x.glm_mat.ptr, result.glm_mat.ptr);
|
Mat4MulASM(&this, &x, &result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -583,21 +583,14 @@ struct Quat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat4
|
void
|
||||||
Mat4MulASM(Mat4 l, Mat4 r)
|
Mat4MulASM(Mat4* l, Mat4* r, Mat4* result)
|
||||||
{
|
{
|
||||||
Mat4 result;
|
|
||||||
|
|
||||||
auto lp = &l;
|
|
||||||
auto rp = &r;
|
|
||||||
auto res = &result;
|
|
||||||
|
|
||||||
// TODO: fix this
|
|
||||||
asm @trusted
|
asm @trusted
|
||||||
{
|
{
|
||||||
mov R8, lp;
|
mov R8, l;
|
||||||
mov R9, rp;
|
mov R9, r;
|
||||||
mov R10, res;
|
mov R10, result;
|
||||||
|
|
||||||
movups XMM0, [R8];
|
movups XMM0, [R8];
|
||||||
movups XMM1, [R9+00];
|
movups XMM1, [R9+00];
|
||||||
@ -607,93 +600,91 @@ Mat4MulASM(Mat4 l, Mat4 r)
|
|||||||
|
|
||||||
movups XMM6, XMM1;
|
movups XMM6, XMM1;
|
||||||
shufps XMM6, XMM6, 0; // XMM5 = vec.xxxx;
|
shufps XMM6, XMM6, 0; // XMM5 = vec.xxxx;
|
||||||
mulps XMM6, XMM0; // XMM6 = col1;
|
mulps XMM6, XMM0; // XMM6 = col1;
|
||||||
|
|
||||||
movups XMM7, XMM2;
|
movups XMM7, XMM2;
|
||||||
shufps XMM7, XMM7, 0;
|
shufps XMM7, XMM7, 0;
|
||||||
mulps XMM7, XMM0; // XMM7 = col2;
|
mulps XMM7, XMM0; // XMM7 = col2;
|
||||||
|
|
||||||
movups XMM8, XMM3;
|
movups XMM8, XMM3;
|
||||||
shufps XMM8, XMM8, 0;
|
shufps XMM8, XMM8, 0;
|
||||||
mulps XMM8, XMM0; // XMM8 = col3;
|
mulps XMM8, XMM0; // XMM8 = col3;
|
||||||
|
|
||||||
movups XMM9, XMM3;
|
movups XMM9, XMM4;
|
||||||
shufps XMM9, XMM9, 0;
|
shufps XMM9, XMM9, 0;
|
||||||
mulps XMM9, XMM0; // XMM9 = col4;
|
mulps XMM9, XMM0; // XMM9 = col4;
|
||||||
|
|
||||||
movups XMM0, [R8+16];
|
movups XMM0, [R8+16];
|
||||||
|
|
||||||
movups XMM5, XMM1;
|
movups XMM5, XMM1;
|
||||||
shufps XMM5, XMM5, 85; // XMM5 = vec.yyyy;
|
shufps XMM5, XMM5, 85; // XMM5 = vec.yyyy;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM6, XMM5;
|
addps XMM6, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM2;
|
movups XMM5, XMM2;
|
||||||
shufps XMM5, XMM5, 85;
|
shufps XMM5, XMM5, 85;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM7, XMM5;
|
addps XMM7, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM3;
|
movups XMM5, XMM3;
|
||||||
shufps XMM5, XMM5, 85;
|
shufps XMM5, XMM5, 85;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM8, XMM5;
|
addps XMM8, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM4;
|
movups XMM5, XMM4;
|
||||||
shufps XMM5, XMM5, 85;
|
shufps XMM5, XMM5, 85;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM9, XMM5;
|
addps XMM9, XMM5;
|
||||||
|
|
||||||
movups XMM0, [R8+32];
|
movups XMM0, [R8+32];
|
||||||
|
|
||||||
movups XMM5, XMM1;
|
movups XMM5, XMM1;
|
||||||
shufps XMM5, XMM5, 170; // XMM5 = vec.zzzz;
|
shufps XMM5, XMM5, 170; // XMM5 = vec.zzzz;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM6, XMM5;
|
addps XMM6, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM2;
|
movups XMM5, XMM2;
|
||||||
shufps XMM5, XMM5, 170;
|
shufps XMM5, XMM5, 170;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM7, XMM5;
|
addps XMM7, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM3;
|
movups XMM5, XMM3;
|
||||||
shufps XMM5, XMM5, 170;
|
shufps XMM5, XMM5, 170;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM8, XMM5;
|
addps XMM8, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM4;
|
movups XMM5, XMM4;
|
||||||
shufps XMM5, XMM5, 170;
|
shufps XMM5, XMM5, 170;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM9, XMM5;
|
addps XMM9, XMM5;
|
||||||
|
|
||||||
movups XMM0, [R8+48];
|
movups XMM0, [R8+48];
|
||||||
|
|
||||||
movups XMM5, XMM1;
|
movups XMM5, XMM1;
|
||||||
shufps XMM5, XMM5, 255; // XMM5 = vec.wwww;
|
shufps XMM5, XMM5, 255; // XMM5 = vec.wwww;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM6, XMM5;
|
addps XMM6, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM2;
|
movups XMM5, XMM2;
|
||||||
shufps XMM5, XMM5, 255;
|
shufps XMM5, XMM5, 255;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM7, XMM5;
|
addps XMM7, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM3;
|
movups XMM5, XMM3;
|
||||||
shufps XMM5, XMM5, 255;
|
shufps XMM5, XMM5, 255;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM8, XMM5;
|
addps XMM8, XMM5;
|
||||||
|
|
||||||
movups XMM5, XMM4;
|
movups XMM5, XMM4;
|
||||||
shufps XMM5, XMM5, 255;
|
shufps XMM5, XMM5, 255;
|
||||||
mulps XMM5, XMM0;
|
mulps XMM5, XMM0;
|
||||||
addps XMM9, XMM5;
|
addps XMM9, XMM5;
|
||||||
|
|
||||||
movups [R10+00], XMM6;
|
movups [R10+00], XMM6;
|
||||||
movups [R10+16], XMM7;
|
movups [R10+16], XMM7;
|
||||||
movups [R10+32], XMM8;
|
movups [R10+32], XMM8;
|
||||||
movups [R10+48], XMM9;
|
movups [R10+48], XMM9;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pragma(inline) Mat4
|
pragma(inline) Mat4
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user