update dependencies, fix up vulkan initialization, start model loading to gpu
This commit is contained in:
parent
25899ff448
commit
bd4e1cc07e
BIN
assets/models/DamagedHelmet.glb
Normal file
BIN
assets/models/DamagedHelmet.glb
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4
dub.json
4
dub.json
@ -7,7 +7,7 @@
|
||||
"targetType": "executable",
|
||||
"targetName": "Gears",
|
||||
"targetPath": "build",
|
||||
"sourceFiles-linux": ["build/libvma.a", "build/libstb.a", "build/libm3d.a", "build/libcglm.a"],
|
||||
"sourceFiles-linux": ["build/libvma.a", "build/libstb.a", "build/libm3d.a", "build/libcglm.a", "build/libcgltf.a"],
|
||||
"sourceFiles-windows": [],
|
||||
"importPaths": ["src/gears", "src/dlib", "src/dlib/external/xxhash", "src/VulkanRenderer"],
|
||||
"sourcePaths": ["src/gears", "src/dlib", "src/dlib/external/xxhash", "src/VulkanRenderer"],
|
||||
@ -17,7 +17,7 @@
|
||||
"preGenerateCommands-linux": ["./build.sh"],
|
||||
"preGenerateCommands-windows": [],
|
||||
"dflags": ["-Xcc=-mno-sse", "-P-I/usr/include/freetype2", "-Jbuild", "-Jassets/fonts"],
|
||||
"dflags-dmd": ["-P=-DSTBI_NO_SIMD"]
|
||||
"dflags-dmd": []
|
||||
},
|
||||
{
|
||||
"name": "packer",
|
||||
|
||||
726
external/inteli/bmi2intrin.d
vendored
726
external/inteli/bmi2intrin.d
vendored
@ -1,363 +1,363 @@
|
||||
/**
|
||||
* BMI2 intrinsics.
|
||||
* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=BMI2
|
||||
*
|
||||
* Copyright: Copyright Johan Engelen 2021.
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
|
||||
*/
|
||||
module inteli.bmi2intrin;
|
||||
|
||||
import inteli.internals;
|
||||
|
||||
nothrow @nogc pure @safe:
|
||||
|
||||
/// Copy all bits from unsigned 32-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
uint _bzhi_u32 (uint a, uint index)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_bzhi_si(a, index);
|
||||
else
|
||||
return bzhi!uint(a, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
return bzhi!uint(a, index);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
|
||||
assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
|
||||
static assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
|
||||
assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
|
||||
static assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
|
||||
assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
|
||||
}
|
||||
|
||||
/// Copy all bits from unsigned 64-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
ulong _bzhi_u64 (ulong a, uint index)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_bzhi_di(a, index);
|
||||
}
|
||||
else
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
else
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
|
||||
assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
|
||||
static assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
|
||||
assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
|
||||
static assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
|
||||
assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
|
||||
static assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
|
||||
assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
|
||||
}
|
||||
|
||||
// Helper function for BZHI
|
||||
private T bzhi(T)(T a, uint index)
|
||||
{
|
||||
/+
|
||||
n := index[7:0]
|
||||
dst := a
|
||||
IF (n < number of bits)
|
||||
dst[MSB:n] := 0
|
||||
FI
|
||||
+/
|
||||
enum numbits = T.sizeof*8;
|
||||
T dst = a;
|
||||
if (index < numbits)
|
||||
{
|
||||
T mask = (T(1) << index) - 1;
|
||||
dst &= mask;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
/// Multiply unsigned 32-bit integers `a` and `b`, store the low 32-bits of the result in dst,
|
||||
/// and store the high 32-bits in `hi`. This does not read or write arithmetic flags.
|
||||
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
|
||||
/// But, those particular semantics don't exist at the level of intrinsics.
|
||||
uint _mulx_u32 (uint a, uint b, uint* hi)
|
||||
{
|
||||
// Note: that does NOT generate mulx with LDC, and there seems to be no way to do that for
|
||||
// some reason, even with LLVM IR.
|
||||
// Also same with GDC.
|
||||
ulong result = cast(ulong) a * b;
|
||||
*hi = cast(uint) (result >>> 32);
|
||||
return cast(uint)result;
|
||||
}
|
||||
@system unittest
|
||||
{
|
||||
uint hi;
|
||||
assert (_mulx_u32(0x1234_5678, 0x1234_5678, &hi) == 0x1DF4_D840);
|
||||
assert (hi == 0x014B_66DC);
|
||||
}
|
||||
|
||||
/// Multiply unsigned 64-bit integers `a` and `b`, store the low 64-bits of the result in dst, and
|
||||
/// store the high 64-bits in `hi`. This does not read or write arithmetic flags.
|
||||
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
|
||||
/// But, those particular semantics don't exist at the level of intrinsics.
|
||||
ulong _mulx_u64 (ulong a, ulong b, ulong* hi)
|
||||
{
|
||||
/+
|
||||
dst[63:0] := (a * b)[63:0]
|
||||
MEM[hi+63:hi] := (a * b)[127:64]
|
||||
+/
|
||||
|
||||
static if (LDC_with_optimizations)
|
||||
{
|
||||
static if (__VERSION__ >= 2094)
|
||||
enum bool withLDCIR = true;
|
||||
else
|
||||
enum bool withLDCIR = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
enum bool withLDCIR = false;
|
||||
}
|
||||
|
||||
static if (withLDCIR)
|
||||
{
|
||||
// LDC x86: Generates mulx from -O0
|
||||
enum ir = `
|
||||
%4 = zext i64 %0 to i128
|
||||
%5 = zext i64 %1 to i128
|
||||
%6 = mul nuw i128 %5, %4
|
||||
%7 = lshr i128 %6, 64
|
||||
%8 = trunc i128 %7 to i64
|
||||
store i64 %8, i64* %2, align 8
|
||||
%9 = trunc i128 %6 to i64
|
||||
ret i64 %9`;
|
||||
return LDCInlineIR!(ir, ulong, ulong, ulong, ulong*)(a, b, hi);
|
||||
}
|
||||
else
|
||||
{
|
||||
/+ Straight-forward implementation with `ucent`:
|
||||
ucent result = cast(ucent) a * b;
|
||||
*hi = cast(ulong) ((result >>> 64) & 0xFFFF_FFFF_FFFF_FFFF);
|
||||
return cast(ulong) (result & 0xFFFF_FFFF_FFFF_FFFF);
|
||||
+/
|
||||
|
||||
/+
|
||||
Implementation using 64bit math is more complex...
|
||||
a * b = (a_high << 32 + a_low) * (b_high << 32 + b_low)
|
||||
= (a_high << 32)*(b_high << 32) + (a_high << 32)*b_low + a_low* (b_high << 32) + a_low*b_low
|
||||
= (a_high*b_high) << 64 + (a_high*b_low) << 32 + (a_low*b_high) << 32 + a_low*b_low
|
||||
= c2 << 64 + c11 << 32 + c12 << 32 + c0
|
||||
= z1 << 64 + z0
|
||||
// The sums may overflow, so we need to carry the carry (from low 64bits to high 64bits). We can do that
|
||||
// by separately creating the sum to get the high 32 bits of z0 using 64bit math. The high 32 bits of that
|
||||
// intermediate result is then the 'carry' that we need to add when calculating z1's sum.
|
||||
z0 = (c0 & 0xFFFF_FFFF) + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) << 32
|
||||
The carry part from z0's sum = (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
|
||||
z1 = c2 + (c11 >> 32 + c12 >> 32 + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
|
||||
+/
|
||||
|
||||
const ulong a_low = a & 0xFFFF_FFFF;
|
||||
const ulong a_high = a >>> 32;
|
||||
const ulong b_low = b & 0xFFFF_FFFF;
|
||||
const ulong b_high = b >>> 32;
|
||||
|
||||
const ulong c2 = a_high*b_high;
|
||||
const ulong c11 = a_high*b_low;
|
||||
const ulong c12 = a_low*b_high;
|
||||
const ulong c0 = a_low*b_low;
|
||||
|
||||
const ulong common_term = (c0 >> 32) + (c11 & 0xFFFF_FFFF) + (c12 & 0xFFFF_FFFF);
|
||||
const ulong z0 = (c0 & 0xFFFF_FFFF) + (common_term << 32);
|
||||
const ulong z1 = c2 + (c11 >> 32) + (c12 >> 32) + (common_term >> 32);
|
||||
|
||||
*hi = z1;
|
||||
return z0;
|
||||
}
|
||||
}
|
||||
@system unittest
|
||||
{
|
||||
ulong hi;
|
||||
// 0x1234_5678_9ABC_DEF0 * 0x1234_5678_9ABC_DEF0 == 0x14b_66dc_33f6_acdc_a5e2_0890_f2a5_2100
|
||||
assert (_mulx_u64(0x1234_5678_9ABC_DEF0, 0x1234_5678_9ABC_DEF0, &hi) == 0xa5e2_0890_f2a5_2100);
|
||||
assert (hi == 0x14b_66dc_33f6_acdc);
|
||||
}
|
||||
|
||||
/// Deposit contiguous low bits from unsigned 32-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
|
||||
uint _pdep_u32 (uint a, uint mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_pdep_si(a, mask);
|
||||
else
|
||||
return pdep!uint(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pdep!uint(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
|
||||
assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
|
||||
}
|
||||
|
||||
/// Deposit contiguous low bits from unsigned 64-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
|
||||
ulong _pdep_u64 (ulong a, ulong mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_pdep_di(a, mask);
|
||||
}
|
||||
else
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
|
||||
assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
|
||||
}
|
||||
|
||||
// Helper function for PDEP
|
||||
private T pdep(T)(T a, T mask)
|
||||
{
|
||||
/+
|
||||
tmp := a
|
||||
dst := 0
|
||||
m := 0
|
||||
k := 0
|
||||
DO WHILE m < 32
|
||||
IF mask[m] == 1
|
||||
dst[m] := tmp[k]
|
||||
k := k + 1
|
||||
FI
|
||||
m := m + 1
|
||||
OD
|
||||
+/
|
||||
T dst;
|
||||
T k_bitpos = 1;
|
||||
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
|
||||
foreach (m; 0..T.sizeof*8)
|
||||
{
|
||||
if (mask & m_bitpos)
|
||||
{
|
||||
dst |= (a & k_bitpos) ? m_bitpos : 0;
|
||||
k_bitpos <<= 1;
|
||||
}
|
||||
m_bitpos <<= 1;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
/// Extract bits from unsigned 32-bit integer `a` at the corresponding bit locations specified by
|
||||
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
uint _pext_u32 (uint a, uint mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_pext_si(a, mask);
|
||||
else
|
||||
return pext!uint(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pext!uint(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
|
||||
assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
|
||||
}
|
||||
|
||||
/// Extract bits from unsigned 64-bit integer `a` at the corresponding bit locations specified by
|
||||
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
ulong _pext_u64 (ulong a, ulong mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_pext_di(a, mask);
|
||||
}
|
||||
else
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
|
||||
assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
|
||||
}
|
||||
|
||||
// Helper function for PEXT
|
||||
private T pext(T)(T a, T mask)
|
||||
{
|
||||
/+
|
||||
tmp := a
|
||||
dst := 0
|
||||
m := 0
|
||||
k := 0
|
||||
DO WHILE m < number of bits in T
|
||||
IF mask[m] == 1
|
||||
dst[k] := tmp[m]
|
||||
k := k + 1
|
||||
FI
|
||||
m := m + 1
|
||||
OD
|
||||
+/
|
||||
T dst;
|
||||
T k_bitpos = 1;
|
||||
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
|
||||
foreach (m; 0..T.sizeof*8)
|
||||
{
|
||||
if (mask & m_bitpos)
|
||||
{
|
||||
dst |= (a & m_bitpos) ? k_bitpos : 0;
|
||||
k_bitpos <<= 1;
|
||||
}
|
||||
m_bitpos <<= 1;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
/**
|
||||
* BMI2 intrinsics.
|
||||
* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=BMI2
|
||||
*
|
||||
* Copyright: Copyright Johan Engelen 2021.
|
||||
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
|
||||
*/
|
||||
module inteli.bmi2intrin;
|
||||
|
||||
import inteli.internals;
|
||||
|
||||
nothrow @nogc pure @safe:
|
||||
|
||||
/// Copy all bits from unsigned 32-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
uint _bzhi_u32 (uint a, uint index)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_bzhi_si(a, index);
|
||||
else
|
||||
return bzhi!uint(a, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
return bzhi!uint(a, index);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
|
||||
assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
|
||||
static assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
|
||||
assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
|
||||
static assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
|
||||
assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
|
||||
}
|
||||
|
||||
/// Copy all bits from unsigned 64-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
|
||||
ulong _bzhi_u64 (ulong a, uint index)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_bzhi_di(a, index);
|
||||
}
|
||||
else
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
else
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
return bzhi!ulong(a, index);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
|
||||
assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
|
||||
static assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
|
||||
assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
|
||||
static assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
|
||||
assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
|
||||
static assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
|
||||
assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
|
||||
}
|
||||
|
||||
// Helper function for BZHI
|
||||
private T bzhi(T)(T a, uint index)
|
||||
{
|
||||
/+
|
||||
n := index[7:0]
|
||||
dst := a
|
||||
IF (n < number of bits)
|
||||
dst[MSB:n] := 0
|
||||
FI
|
||||
+/
|
||||
enum numbits = T.sizeof*8;
|
||||
T dst = a;
|
||||
if (index < numbits)
|
||||
{
|
||||
T mask = (T(1) << index) - 1;
|
||||
dst &= mask;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
/// Multiply unsigned 32-bit integers `a` and `b`, store the low 32-bits of the result in dst,
|
||||
/// and store the high 32-bits in `hi`. This does not read or write arithmetic flags.
|
||||
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
|
||||
/// But, those particular semantics don't exist at the level of intrinsics.
|
||||
uint _mulx_u32 (uint a, uint b, uint* hi)
|
||||
{
|
||||
// Note: that does NOT generate mulx with LDC, and there seems to be no way to do that for
|
||||
// some reason, even with LLVM IR.
|
||||
// Also same with GDC.
|
||||
ulong result = cast(ulong) a * b;
|
||||
*hi = cast(uint) (result >>> 32);
|
||||
return cast(uint)result;
|
||||
}
|
||||
@system unittest
|
||||
{
|
||||
uint hi;
|
||||
assert (_mulx_u32(0x1234_5678, 0x1234_5678, &hi) == 0x1DF4_D840);
|
||||
assert (hi == 0x014B_66DC);
|
||||
}
|
||||
|
||||
/// Multiply unsigned 64-bit integers `a` and `b`, store the low 64-bits of the result in dst, and
|
||||
/// store the high 64-bits in `hi`. This does not read or write arithmetic flags.
|
||||
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
|
||||
/// But, those particular semantics don't exist at the level of intrinsics.
|
||||
ulong _mulx_u64 (ulong a, ulong b, ulong* hi)
|
||||
{
|
||||
/+
|
||||
dst[63:0] := (a * b)[63:0]
|
||||
MEM[hi+63:hi] := (a * b)[127:64]
|
||||
+/
|
||||
|
||||
static if (LDC_with_optimizations)
|
||||
{
|
||||
static if (__VERSION__ >= 2094)
|
||||
enum bool withLDCIR = true;
|
||||
else
|
||||
enum bool withLDCIR = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
enum bool withLDCIR = false;
|
||||
}
|
||||
|
||||
static if (withLDCIR)
|
||||
{
|
||||
// LDC x86: Generates mulx from -O0
|
||||
enum ir = `
|
||||
%4 = zext i64 %0 to i128
|
||||
%5 = zext i64 %1 to i128
|
||||
%6 = mul nuw i128 %5, %4
|
||||
%7 = lshr i128 %6, 64
|
||||
%8 = trunc i128 %7 to i64
|
||||
store i64 %8, i64* %2, align 8
|
||||
%9 = trunc i128 %6 to i64
|
||||
ret i64 %9`;
|
||||
return LDCInlineIR!(ir, ulong, ulong, ulong, ulong*)(a, b, hi);
|
||||
}
|
||||
else
|
||||
{
|
||||
/+ Straight-forward implementation with `ucent`:
|
||||
ucent result = cast(ucent) a * b;
|
||||
*hi = cast(ulong) ((result >>> 64) & 0xFFFF_FFFF_FFFF_FFFF);
|
||||
return cast(ulong) (result & 0xFFFF_FFFF_FFFF_FFFF);
|
||||
+/
|
||||
|
||||
/+
|
||||
Implementation using 64bit math is more complex...
|
||||
a * b = (a_high << 32 + a_low) * (b_high << 32 + b_low)
|
||||
= (a_high << 32)*(b_high << 32) + (a_high << 32)*b_low + a_low* (b_high << 32) + a_low*b_low
|
||||
= (a_high*b_high) << 64 + (a_high*b_low) << 32 + (a_low*b_high) << 32 + a_low*b_low
|
||||
= c2 << 64 + c11 << 32 + c12 << 32 + c0
|
||||
= z1 << 64 + z0
|
||||
// The sums may overflow, so we need to carry the carry (from low 64bits to high 64bits). We can do that
|
||||
// by separately creating the sum to get the high 32 bits of z0 using 64bit math. The high 32 bits of that
|
||||
// intermediate result is then the 'carry' that we need to add when calculating z1's sum.
|
||||
z0 = (c0 & 0xFFFF_FFFF) + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) << 32
|
||||
The carry part from z0's sum = (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
|
||||
z1 = c2 + (c11 >> 32 + c12 >> 32 + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
|
||||
+/
|
||||
|
||||
const ulong a_low = a & 0xFFFF_FFFF;
|
||||
const ulong a_high = a >>> 32;
|
||||
const ulong b_low = b & 0xFFFF_FFFF;
|
||||
const ulong b_high = b >>> 32;
|
||||
|
||||
const ulong c2 = a_high*b_high;
|
||||
const ulong c11 = a_high*b_low;
|
||||
const ulong c12 = a_low*b_high;
|
||||
const ulong c0 = a_low*b_low;
|
||||
|
||||
const ulong common_term = (c0 >> 32) + (c11 & 0xFFFF_FFFF) + (c12 & 0xFFFF_FFFF);
|
||||
const ulong z0 = (c0 & 0xFFFF_FFFF) + (common_term << 32);
|
||||
const ulong z1 = c2 + (c11 >> 32) + (c12 >> 32) + (common_term >> 32);
|
||||
|
||||
*hi = z1;
|
||||
return z0;
|
||||
}
|
||||
}
|
||||
@system unittest
|
||||
{
|
||||
ulong hi;
|
||||
// 0x1234_5678_9ABC_DEF0 * 0x1234_5678_9ABC_DEF0 == 0x14b_66dc_33f6_acdc_a5e2_0890_f2a5_2100
|
||||
assert (_mulx_u64(0x1234_5678_9ABC_DEF0, 0x1234_5678_9ABC_DEF0, &hi) == 0xa5e2_0890_f2a5_2100);
|
||||
assert (hi == 0x14b_66dc_33f6_acdc);
|
||||
}
|
||||
|
||||
/// Deposit contiguous low bits from unsigned 32-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
|
||||
uint _pdep_u32 (uint a, uint mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_pdep_si(a, mask);
|
||||
else
|
||||
return pdep!uint(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pdep!uint(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
|
||||
assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
|
||||
}
|
||||
|
||||
/// Deposit contiguous low bits from unsigned 64-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
|
||||
ulong _pdep_u64 (ulong a, ulong mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_pdep_di(a, mask);
|
||||
}
|
||||
else
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pdep!ulong(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
|
||||
assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
|
||||
}
|
||||
|
||||
// Helper function for PDEP
|
||||
private T pdep(T)(T a, T mask)
|
||||
{
|
||||
/+
|
||||
tmp := a
|
||||
dst := 0
|
||||
m := 0
|
||||
k := 0
|
||||
DO WHILE m < 32
|
||||
IF mask[m] == 1
|
||||
dst[m] := tmp[k]
|
||||
k := k + 1
|
||||
FI
|
||||
m := m + 1
|
||||
OD
|
||||
+/
|
||||
T dst;
|
||||
T k_bitpos = 1;
|
||||
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
|
||||
foreach (m; 0..T.sizeof*8)
|
||||
{
|
||||
if (mask & m_bitpos)
|
||||
{
|
||||
dst |= (a & k_bitpos) ? m_bitpos : 0;
|
||||
k_bitpos <<= 1;
|
||||
}
|
||||
m_bitpos <<= 1;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
/// Extract bits from unsigned 32-bit integer `a` at the corresponding bit locations specified by
|
||||
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
uint _pext_u32 (uint a, uint mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
return __builtin_ia32_pext_si(a, mask);
|
||||
else
|
||||
return pext!uint(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pext!uint(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
|
||||
assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
|
||||
}
|
||||
|
||||
/// Extract bits from unsigned 64-bit integer `a` at the corresponding bit locations specified by
|
||||
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
|
||||
ulong _pext_u64 (ulong a, ulong mask)
|
||||
{
|
||||
static if (GDC_or_LDC_with_BMI2)
|
||||
{
|
||||
if (!__ctfe)
|
||||
{
|
||||
version(X86_64)
|
||||
{
|
||||
// This instruction not available in 32-bit x86.
|
||||
return __builtin_ia32_pext_di(a, mask);
|
||||
}
|
||||
else
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
return pext!ulong(a, mask);
|
||||
}
|
||||
}
|
||||
unittest
|
||||
{
|
||||
static assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
|
||||
assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
|
||||
}
|
||||
|
||||
// Helper function for PEXT
|
||||
private T pext(T)(T a, T mask)
|
||||
{
|
||||
/+
|
||||
tmp := a
|
||||
dst := 0
|
||||
m := 0
|
||||
k := 0
|
||||
DO WHILE m < number of bits in T
|
||||
IF mask[m] == 1
|
||||
dst[k] := tmp[m]
|
||||
k := k + 1
|
||||
FI
|
||||
m := m + 1
|
||||
OD
|
||||
+/
|
||||
T dst;
|
||||
T k_bitpos = 1;
|
||||
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
|
||||
foreach (m; 0..T.sizeof*8)
|
||||
{
|
||||
if (mask & m_bitpos)
|
||||
{
|
||||
dst |= (a & m_bitpos) ? k_bitpos : 0;
|
||||
k_bitpos <<= 1;
|
||||
}
|
||||
m_bitpos <<= 1;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
4430
external/inteli/smmintrin.d
vendored
4430
external/inteli/smmintrin.d
vendored
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
Subproject commit d46741a48033b5136fa189c1b80a574986e68f64
|
||||
Subproject commit c42238a456f5048c7d1b2d5ebd71ecf13bb10ece
|
||||
2
src/dlib
2
src/dlib
@ -1 +1 @@
|
||||
Subproject commit c83ffabce69071a3e7a0af3f26aa420082eeda1f
|
||||
Subproject commit 493c17cba26952861ae4c5402f1676013b8317c6
|
||||
@ -1,4 +1,5 @@
|
||||
import dlib;
|
||||
/*
|
||||
public import vulkan : PlatformHandles;
|
||||
import vulkan : Destroy;
|
||||
import vulkan;
|
||||
@ -414,6 +415,7 @@ DrawRect(Game* g, f32 p0_x, f32 p0_y, f32 p1_x, f32 p1_y, Vec4 col)
|
||||
AddUIIndices(g);
|
||||
}
|
||||
|
||||
/*
|
||||
// TODO: integrate this with vulkan again
|
||||
Model
|
||||
LoadModel(Game* g, string name)
|
||||
@ -697,3 +699,4 @@ ReadModel(Game* g, string name)
|
||||
const(char)[] mat_name = m3d.material[i].name[0 .. strlen(m3d.material[i].name)];
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
@ -10,8 +10,6 @@ ImageView[256] TEXTURES;
|
||||
Buffer[256] MATERIALS;
|
||||
Buffer[256] MODEL_STATES;
|
||||
|
||||
DescIndices[DESC_SET_MAX] DESC_INDICES];
|
||||
|
||||
struct GameState
|
||||
{
|
||||
RenderState rds;
|
||||
@ -29,6 +27,7 @@ struct RenderState
|
||||
Pipeline[PID.Max] pipelines;
|
||||
DescSetLayout desc_layout_globals;
|
||||
DescSetLayout desc_layout_resources;
|
||||
DescSetLayout desc_layout_state;
|
||||
DescSet[2] desc_set_globals;
|
||||
PipelineLayout pipeline_layout_pbr;
|
||||
|
||||
@ -41,13 +40,6 @@ struct RenderState
|
||||
Buffer globals_buffer;
|
||||
}
|
||||
|
||||
struct DescIndices
|
||||
{
|
||||
u32 tex;
|
||||
u32 mat;
|
||||
u32 state;
|
||||
}
|
||||
|
||||
struct ShaderGlobals
|
||||
{
|
||||
Vec4 ambient;
|
||||
@ -57,41 +49,6 @@ struct ShaderGlobals
|
||||
f32 alpha = 0.0;
|
||||
}
|
||||
|
||||
struct MeshPart
|
||||
{
|
||||
u32 mat;
|
||||
u32 offset;
|
||||
u32 length;
|
||||
PushConst pc;
|
||||
|
||||
alias pc this;
|
||||
}
|
||||
|
||||
struct ModelData
|
||||
{
|
||||
MeshPart[] parts;
|
||||
ModelState state;
|
||||
Vertex[] v;
|
||||
u32[] idx;
|
||||
TextureData[] tex;
|
||||
}
|
||||
|
||||
struct TextureData
|
||||
{
|
||||
u8[] name;
|
||||
u8[] data;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 ch;
|
||||
}
|
||||
|
||||
struct Model
|
||||
{
|
||||
Buffer v_buf;
|
||||
Buffer i_buf;
|
||||
MeshPart[] parts;
|
||||
}
|
||||
|
||||
struct ModelRenderInfo
|
||||
{
|
||||
PushConst pc;
|
||||
@ -105,15 +62,6 @@ struct ModelState
|
||||
Mat4 matrix;
|
||||
}
|
||||
|
||||
struct Material
|
||||
{
|
||||
Vec4 ambient;
|
||||
Vec4 diffuse;
|
||||
Vec4 specular;
|
||||
float shininess;
|
||||
float alpha;
|
||||
}
|
||||
|
||||
enum PBRMod : u32
|
||||
{
|
||||
AlbedoValue = 0x0001,
|
||||
@ -182,17 +130,14 @@ struct PushConst
|
||||
}
|
||||
}
|
||||
|
||||
struct Vertex
|
||||
{
|
||||
Vec4 col;
|
||||
Vec4 tangent;
|
||||
Vec3 pos;
|
||||
Vec3 normal;
|
||||
Vec2 uv;
|
||||
}
|
||||
|
||||
ModelData g_box;
|
||||
|
||||
void
|
||||
RunCycle(GameState* g)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
GameState
|
||||
InitGame(PlatformWindow* window)
|
||||
{
|
||||
@ -225,7 +170,7 @@ Init(RenderState* rds, PlatformWindow* window)
|
||||
{ binding: 2, descriptorType: DT.StorageImage, descriptorCount: 1, stageFlags: SS.All },
|
||||
];
|
||||
|
||||
DescLayoutBinding[3] resource_bindings = [
|
||||
DescLayoutBinding[6] resource_bindings = [
|
||||
{ binding: 0, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
|
||||
{ binding: 1, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
|
||||
{ binding: 2, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
|
||||
@ -234,8 +179,12 @@ Init(RenderState* rds, PlatformWindow* window)
|
||||
{ binding: 5, descriptorType: DT.Uniform, descriptorCount: 1, stageFlags: SS.All },
|
||||
];
|
||||
|
||||
DescLayoutBinding[1] state_bindings = [
|
||||
{ binding: 0, descriptorType: DT.Uniform, descriptorCount: 1, stageFlags: SS.All },
|
||||
];
|
||||
|
||||
Attribute[5] attributes = [
|
||||
{ binding: 0, location: 0, format: FMT.RGBA_F32, offset: Vertex.col.offsetof },
|
||||
{ binding: 0, location: 0, format: FMT.RGBA_F32, offset: Vertex.color.offsetof },
|
||||
{ binding: 0, location: 1, format: FMT.RGBA_F32, offset: Vertex.tangent.offsetof },
|
||||
{ binding: 0, location: 2, format: FMT.RGB_F32, offset: Vertex.pos.offsetof },
|
||||
{ binding: 0, location: 3, format: FMT.RGB_F32, offset: Vertex.normal.offsetof },
|
||||
@ -251,8 +200,9 @@ Init(RenderState* rds, PlatformWindow* window)
|
||||
|
||||
rds.desc_layout_globals = CreateDescSetLayout(&rds.rd, global_bindings);
|
||||
rds.desc_layout_resources = CreateDescSetLayout(&rds.rd, resource_bindings);
|
||||
rds.desc_layout_state = CreateDescSetLayout(&rds.rd, state_bindings);
|
||||
|
||||
rds.pipeline_layout_pbr = CreatePipelineLayout(&rds.rd, [rds.desc_layout_globals, rds.desc_layout_resources], PushConst.sizeof);
|
||||
rds.pipeline_layout_pbr = CreatePipelineLayout(&rds.rd, [rds.desc_layout_globals, rds.desc_layout_resources, rds.desc_layout_state], PushConst.sizeof);
|
||||
|
||||
foreach(i; 0 .. 2)
|
||||
{
|
||||
@ -273,8 +223,8 @@ Init(RenderState* rds, PlatformWindow* window)
|
||||
};
|
||||
|
||||
GfxPipelineInfo pbr_info = {
|
||||
vertex_shader: LoadAssetData(&rds.frame_arenas[0], "shaders/pbr.vert.spv"),
|
||||
frag_shader: LoadAssetData(&rds.frame_arenas[0], "shaders/pbr.frag.spv"),
|
||||
vertex_shader: LoadFile(&rds.frame_arenas[0], "assets/shaders/pbr.vert.spv"),
|
||||
frag_shader: LoadFile(&rds.frame_arenas[0], "assets/shaders/pbr.frag.spv"),
|
||||
input_rate: IR.Vertex,
|
||||
input_rate_stride: Vertex.sizeof,
|
||||
layout: rds.pipeline_layout_pbr,
|
||||
@ -315,7 +265,7 @@ Init(RenderState* rds, PlatformWindow* window)
|
||||
|
||||
CreateBuffer(&rds.rd, &rds.globals_buffer, BT.Uniform, ShaderGlobals.sizeof, false);
|
||||
|
||||
g_box = MakeBox
|
||||
ModelData md = LoadGLTF(&rds.frame_arenas[0], "assets/models/DamagedHelmet.glb");
|
||||
}
|
||||
|
||||
PipelineID
|
||||
@ -381,6 +331,7 @@ GetPBRMod(bool albedo = false, bool ambient = false, bool specular = false, bool
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
ModelData
|
||||
MakeBox(RenderState* rds, f32 width, f32 height, Vec4 col)
|
||||
{
|
||||
@ -428,9 +379,9 @@ Model
|
||||
Upload(RenderState* rds, ModelData* data)
|
||||
{
|
||||
Model model;
|
||||
u32[] tex_idx = Alloc!(&rds.frame_arenas[0], data.text.length);
|
||||
u32[] mat_idx = Alloc!(&rds.frame_arenas[0], data.materials.length);
|
||||
u32[] state_idx = Alloc!(&rds.frame_arenas[0], data.model_states.length);
|
||||
u32[] tex_idx = Alloc!(u32)(&rds.frame_arenas[0], data.text.length);
|
||||
u32[] mat_idx = Alloc!(u32)(&rds.frame_arenas[0], data.materials.length);
|
||||
u32[] state_idx = Alloc!(u32)(&rds.frame_arenas[0], data.model_states.length);
|
||||
|
||||
bool result = true;
|
||||
|
||||
@ -450,20 +401,18 @@ Upload(RenderState* rds, ModelData* data)
|
||||
{
|
||||
Buffer* buf = &rds.materials[rds.imat++];
|
||||
CreateBuffer(&rds.rd, buf, BT.Uniform, Material.sizeof);
|
||||
result = Transfer(&rds.rd, buf, )
|
||||
//result = Transfer(&rds.rd, buf, )
|
||||
}
|
||||
|
||||
for(u64 i = 0; i < data.model_states.length; i += 1)
|
||||
{
|
||||
Buffer* buf = &rds.model_states[rds.istate++];
|
||||
CreateBuffer(&rds)
|
||||
//CreateBuffer(&rds)
|
||||
}
|
||||
|
||||
model.parts = data.parts;
|
||||
}
|
||||
|
||||
DescIndices*
|
||||
GetDescIndices()
|
||||
*/
|
||||
|
||||
unittest
|
||||
{
|
||||
|
||||
@ -44,7 +44,8 @@ layout (set = 2, binding = 4) uniform MaterialData {
|
||||
float shininess;
|
||||
float alpha;
|
||||
} Material;
|
||||
layout (set = 2, binding = 5) uniform ModelState {
|
||||
|
||||
layout (set = 3, binding = 0) uniform ModelState {
|
||||
mat4 model_matrix;
|
||||
} State;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user