update dependencies, fix up vulkan initialization, start model loading to gpu

This commit is contained in:
matthew 2025-11-21 18:45:12 +11:00
parent 25899ff448
commit bd4e1cc07e
12 changed files with 2612 additions and 2659 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -7,7 +7,7 @@
"targetType": "executable",
"targetName": "Gears",
"targetPath": "build",
"sourceFiles-linux": ["build/libvma.a", "build/libstb.a", "build/libm3d.a", "build/libcglm.a"],
"sourceFiles-linux": ["build/libvma.a", "build/libstb.a", "build/libm3d.a", "build/libcglm.a", "build/libcgltf.a"],
"sourceFiles-windows": [],
"importPaths": ["src/gears", "src/dlib", "src/dlib/external/xxhash", "src/VulkanRenderer"],
"sourcePaths": ["src/gears", "src/dlib", "src/dlib/external/xxhash", "src/VulkanRenderer"],
@ -17,7 +17,7 @@
"preGenerateCommands-linux": ["./build.sh"],
"preGenerateCommands-windows": [],
"dflags": ["-Xcc=-mno-sse", "-P-I/usr/include/freetype2", "-Jbuild", "-Jassets/fonts"],
"dflags-dmd": ["-P=-DSTBI_NO_SIMD"]
"dflags-dmd": []
},
{
"name": "packer",

View File

@ -1,363 +1,363 @@
/**
* BMI2 intrinsics.
* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=BMI2
*
* Copyright: Copyright Johan Engelen 2021.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
*/
module inteli.bmi2intrin;
import inteli.internals;
nothrow @nogc pure @safe:
/// Copy all bits from unsigned 32-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
uint _bzhi_u32 (uint a, uint index)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_bzhi_si(a, index);
else
return bzhi!uint(a, index);
}
else
{
return bzhi!uint(a, index);
}
}
unittest
{
static assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
static assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
static assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
}
/// Copy all bits from unsigned 64-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
ulong _bzhi_u64 (ulong a, uint index)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_bzhi_di(a, index);
}
else
return bzhi!ulong(a, index);
}
else
return bzhi!ulong(a, index);
}
else
{
return bzhi!ulong(a, index);
}
}
unittest
{
static assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
static assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
static assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
static assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
}
// Helper function for BZHI
private T bzhi(T)(T a, uint index)
{
/+
n := index[7:0]
dst := a
IF (n < number of bits)
dst[MSB:n] := 0
FI
+/
enum numbits = T.sizeof*8;
T dst = a;
if (index < numbits)
{
T mask = (T(1) << index) - 1;
dst &= mask;
}
return dst;
}
/// Multiply unsigned 32-bit integers `a` and `b`, store the low 32-bits of the result in dst,
/// and store the high 32-bits in `hi`. This does not read or write arithmetic flags.
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
/// But, those particular semantics don't exist at the level of intrinsics.
uint _mulx_u32 (uint a, uint b, uint* hi)
{
// Note: that does NOT generate mulx with LDC, and there seems to be no way to do that for
// some reason, even with LLVM IR.
// Also same with GDC.
ulong result = cast(ulong) a * b;
*hi = cast(uint) (result >>> 32);
return cast(uint)result;
}
@system unittest
{
uint hi;
assert (_mulx_u32(0x1234_5678, 0x1234_5678, &hi) == 0x1DF4_D840);
assert (hi == 0x014B_66DC);
}
/// Multiply unsigned 64-bit integers `a` and `b`, store the low 64-bits of the result in dst, and
/// store the high 64-bits in `hi`. This does not read or write arithmetic flags.
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
/// But, those particular semantics don't exist at the level of intrinsics.
ulong _mulx_u64 (ulong a, ulong b, ulong* hi)
{
/+
dst[63:0] := (a * b)[63:0]
MEM[hi+63:hi] := (a * b)[127:64]
+/
static if (LDC_with_optimizations)
{
static if (__VERSION__ >= 2094)
enum bool withLDCIR = true;
else
enum bool withLDCIR = false;
}
else
{
enum bool withLDCIR = false;
}
static if (withLDCIR)
{
// LDC x86: Generates mulx from -O0
enum ir = `
%4 = zext i64 %0 to i128
%5 = zext i64 %1 to i128
%6 = mul nuw i128 %5, %4
%7 = lshr i128 %6, 64
%8 = trunc i128 %7 to i64
store i64 %8, i64* %2, align 8
%9 = trunc i128 %6 to i64
ret i64 %9`;
return LDCInlineIR!(ir, ulong, ulong, ulong, ulong*)(a, b, hi);
}
else
{
/+ Straight-forward implementation with `ucent`:
ucent result = cast(ucent) a * b;
*hi = cast(ulong) ((result >>> 64) & 0xFFFF_FFFF_FFFF_FFFF);
return cast(ulong) (result & 0xFFFF_FFFF_FFFF_FFFF);
+/
/+
Implementation using 64bit math is more complex...
a * b = (a_high << 32 + a_low) * (b_high << 32 + b_low)
= (a_high << 32)*(b_high << 32) + (a_high << 32)*b_low + a_low* (b_high << 32) + a_low*b_low
= (a_high*b_high) << 64 + (a_high*b_low) << 32 + (a_low*b_high) << 32 + a_low*b_low
= c2 << 64 + c11 << 32 + c12 << 32 + c0
= z1 << 64 + z0
// The sums may overflow, so we need to carry the carry (from low 64bits to high 64bits). We can do that
// by separately creating the sum to get the high 32 bits of z0 using 64bit math. The high 32 bits of that
// intermediate result is then the 'carry' that we need to add when calculating z1's sum.
z0 = (c0 & 0xFFFF_FFFF) + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) << 32
The carry part from z0's sum = (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
z1 = c2 + (c11 >> 32 + c12 >> 32 + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
+/
const ulong a_low = a & 0xFFFF_FFFF;
const ulong a_high = a >>> 32;
const ulong b_low = b & 0xFFFF_FFFF;
const ulong b_high = b >>> 32;
const ulong c2 = a_high*b_high;
const ulong c11 = a_high*b_low;
const ulong c12 = a_low*b_high;
const ulong c0 = a_low*b_low;
const ulong common_term = (c0 >> 32) + (c11 & 0xFFFF_FFFF) + (c12 & 0xFFFF_FFFF);
const ulong z0 = (c0 & 0xFFFF_FFFF) + (common_term << 32);
const ulong z1 = c2 + (c11 >> 32) + (c12 >> 32) + (common_term >> 32);
*hi = z1;
return z0;
}
}
@system unittest
{
ulong hi;
// 0x1234_5678_9ABC_DEF0 * 0x1234_5678_9ABC_DEF0 == 0x14b_66dc_33f6_acdc_a5e2_0890_f2a5_2100
assert (_mulx_u64(0x1234_5678_9ABC_DEF0, 0x1234_5678_9ABC_DEF0, &hi) == 0xa5e2_0890_f2a5_2100);
assert (hi == 0x14b_66dc_33f6_acdc);
}
/// Deposit contiguous low bits from unsigned 32-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
uint _pdep_u32 (uint a, uint mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_pdep_si(a, mask);
else
return pdep!uint(a, mask);
}
else
{
return pdep!uint(a, mask);
}
}
unittest
{
static assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
}
/// Deposit contiguous low bits from unsigned 64-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
ulong _pdep_u64 (ulong a, ulong mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_pdep_di(a, mask);
}
else
return pdep!ulong(a, mask);
}
else
return pdep!ulong(a, mask);
}
else
{
return pdep!ulong(a, mask);
}
}
unittest
{
static assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
}
// Helper function for PDEP
private T pdep(T)(T a, T mask)
{
/+
tmp := a
dst := 0
m := 0
k := 0
DO WHILE m < 32
IF mask[m] == 1
dst[m] := tmp[k]
k := k + 1
FI
m := m + 1
OD
+/
T dst;
T k_bitpos = 1;
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
foreach (m; 0..T.sizeof*8)
{
if (mask & m_bitpos)
{
dst |= (a & k_bitpos) ? m_bitpos : 0;
k_bitpos <<= 1;
}
m_bitpos <<= 1;
}
return dst;
}
/// Extract bits from unsigned 32-bit integer `a` at the corresponding bit locations specified by
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
uint _pext_u32 (uint a, uint mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_pext_si(a, mask);
else
return pext!uint(a, mask);
}
else
{
return pext!uint(a, mask);
}
}
unittest
{
static assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
}
/// Extract bits from unsigned 64-bit integer `a` at the corresponding bit locations specified by
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
ulong _pext_u64 (ulong a, ulong mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_pext_di(a, mask);
}
else
return pext!ulong(a, mask);
}
else
return pext!ulong(a, mask);
}
else
{
return pext!ulong(a, mask);
}
}
unittest
{
static assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
}
// Helper function for PEXT
private T pext(T)(T a, T mask)
{
/+
tmp := a
dst := 0
m := 0
k := 0
DO WHILE m < number of bits in T
IF mask[m] == 1
dst[k] := tmp[m]
k := k + 1
FI
m := m + 1
OD
+/
T dst;
T k_bitpos = 1;
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
foreach (m; 0..T.sizeof*8)
{
if (mask & m_bitpos)
{
dst |= (a & m_bitpos) ? k_bitpos : 0;
k_bitpos <<= 1;
}
m_bitpos <<= 1;
}
return dst;
}
/**
* BMI2 intrinsics.
* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#othertechs=BMI2
*
* Copyright: Copyright Johan Engelen 2021.
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
*/
module inteli.bmi2intrin;
import inteli.internals;
nothrow @nogc pure @safe:
/// Copy all bits from unsigned 32-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
uint _bzhi_u32 (uint a, uint index)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_bzhi_si(a, index);
else
return bzhi!uint(a, index);
}
else
{
return bzhi!uint(a, index);
}
}
unittest
{
static assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
assert (_bzhi_u32(0x1234_5678, 5) == 0x18);
static assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
assert (_bzhi_u32(0x1234_5678, 10) == 0x278);
static assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
assert (_bzhi_u32(0x1234_5678, 21) == 0x14_5678);
}
/// Copy all bits from unsigned 64-bit integer `a` to dst, and reset (set to 0) the high bits in dst starting at index.
ulong _bzhi_u64 (ulong a, uint index)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_bzhi_di(a, index);
}
else
return bzhi!ulong(a, index);
}
else
return bzhi!ulong(a, index);
}
else
{
return bzhi!ulong(a, index);
}
}
unittest
{
static assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
assert (_bzhi_u64(0x1234_5678, 5) == 0x18);
static assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
assert (_bzhi_u64(0x1234_5678, 10) == 0x278);
static assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
assert (_bzhi_u64(0x1234_5678, 21) == 0x14_5678);
static assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
assert (_bzhi_u64(0x8765_4321_1234_5678, 54) == 0x0025_4321_1234_5678);
}
// Helper function for BZHI
private T bzhi(T)(T a, uint index)
{
/+
n := index[7:0]
dst := a
IF (n < number of bits)
dst[MSB:n] := 0
FI
+/
enum numbits = T.sizeof*8;
T dst = a;
if (index < numbits)
{
T mask = (T(1) << index) - 1;
dst &= mask;
}
return dst;
}
/// Multiply unsigned 32-bit integers `a` and `b`, store the low 32-bits of the result in dst,
/// and store the high 32-bits in `hi`. This does not read or write arithmetic flags.
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
/// But, those particular semantics don't exist at the level of intrinsics.
uint _mulx_u32 (uint a, uint b, uint* hi)
{
// Note: that does NOT generate mulx with LDC, and there seems to be no way to do that for
// some reason, even with LLVM IR.
// Also same with GDC.
ulong result = cast(ulong) a * b;
*hi = cast(uint) (result >>> 32);
return cast(uint)result;
}
@system unittest
{
uint hi;
assert (_mulx_u32(0x1234_5678, 0x1234_5678, &hi) == 0x1DF4_D840);
assert (hi == 0x014B_66DC);
}
/// Multiply unsigned 64-bit integers `a` and `b`, store the low 64-bits of the result in dst, and
/// store the high 64-bits in `hi`. This does not read or write arithmetic flags.
/// Note: the implementation _does_ set arithmetic flags, unlike the instruction semantics say.
/// But, those particular semantics don't exist at the level of intrinsics.
ulong _mulx_u64 (ulong a, ulong b, ulong* hi)
{
/+
dst[63:0] := (a * b)[63:0]
MEM[hi+63:hi] := (a * b)[127:64]
+/
static if (LDC_with_optimizations)
{
static if (__VERSION__ >= 2094)
enum bool withLDCIR = true;
else
enum bool withLDCIR = false;
}
else
{
enum bool withLDCIR = false;
}
static if (withLDCIR)
{
// LDC x86: Generates mulx from -O0
enum ir = `
%4 = zext i64 %0 to i128
%5 = zext i64 %1 to i128
%6 = mul nuw i128 %5, %4
%7 = lshr i128 %6, 64
%8 = trunc i128 %7 to i64
store i64 %8, i64* %2, align 8
%9 = trunc i128 %6 to i64
ret i64 %9`;
return LDCInlineIR!(ir, ulong, ulong, ulong, ulong*)(a, b, hi);
}
else
{
/+ Straight-forward implementation with `ucent`:
ucent result = cast(ucent) a * b;
*hi = cast(ulong) ((result >>> 64) & 0xFFFF_FFFF_FFFF_FFFF);
return cast(ulong) (result & 0xFFFF_FFFF_FFFF_FFFF);
+/
/+
Implementation using 64bit math is more complex...
a * b = (a_high << 32 + a_low) * (b_high << 32 + b_low)
= (a_high << 32)*(b_high << 32) + (a_high << 32)*b_low + a_low* (b_high << 32) + a_low*b_low
= (a_high*b_high) << 64 + (a_high*b_low) << 32 + (a_low*b_high) << 32 + a_low*b_low
= c2 << 64 + c11 << 32 + c12 << 32 + c0
= z1 << 64 + z0
// The sums may overflow, so we need to carry the carry (from low 64bits to high 64bits). We can do that
// by separately creating the sum to get the high 32 bits of z0 using 64bit math. The high 32 bits of that
// intermediate result is then the 'carry' that we need to add when calculating z1's sum.
z0 = (c0 & 0xFFFF_FFFF) + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) << 32
The carry part from z0's sum = (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
z1 = c2 + (c11 >> 32 + c12 >> 32 + (c0 >> 32 + c11 & 0xFFFF_FFFF + c12 & 0xFFFF_FFFF ) >> 32
+/
const ulong a_low = a & 0xFFFF_FFFF;
const ulong a_high = a >>> 32;
const ulong b_low = b & 0xFFFF_FFFF;
const ulong b_high = b >>> 32;
const ulong c2 = a_high*b_high;
const ulong c11 = a_high*b_low;
const ulong c12 = a_low*b_high;
const ulong c0 = a_low*b_low;
const ulong common_term = (c0 >> 32) + (c11 & 0xFFFF_FFFF) + (c12 & 0xFFFF_FFFF);
const ulong z0 = (c0 & 0xFFFF_FFFF) + (common_term << 32);
const ulong z1 = c2 + (c11 >> 32) + (c12 >> 32) + (common_term >> 32);
*hi = z1;
return z0;
}
}
@system unittest
{
ulong hi;
// 0x1234_5678_9ABC_DEF0 * 0x1234_5678_9ABC_DEF0 == 0x14b_66dc_33f6_acdc_a5e2_0890_f2a5_2100
assert (_mulx_u64(0x1234_5678_9ABC_DEF0, 0x1234_5678_9ABC_DEF0, &hi) == 0xa5e2_0890_f2a5_2100);
assert (hi == 0x14b_66dc_33f6_acdc);
}
/// Deposit contiguous low bits from unsigned 32-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
uint _pdep_u32 (uint a, uint mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_pdep_si(a, mask);
else
return pdep!uint(a, mask);
}
else
{
return pdep!uint(a, mask);
}
}
unittest
{
static assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
assert (_pdep_u32(0x1234_5678, 0x0F0F_0F0F) == 0x0506_0708);
}
/// Deposit contiguous low bits from unsigned 64-bit integer `a` to dst at the corresponding bit locations specified by `mask`; all other bits in dst are set to zero.
ulong _pdep_u64 (ulong a, ulong mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_pdep_di(a, mask);
}
else
return pdep!ulong(a, mask);
}
else
return pdep!ulong(a, mask);
}
else
{
return pdep!ulong(a, mask);
}
}
unittest
{
static assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
assert (_pdep_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x0807_0605_0403_0201);
}
// Helper function for PDEP
private T pdep(T)(T a, T mask)
{
/+
tmp := a
dst := 0
m := 0
k := 0
DO WHILE m < 32
IF mask[m] == 1
dst[m] := tmp[k]
k := k + 1
FI
m := m + 1
OD
+/
T dst;
T k_bitpos = 1;
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
foreach (m; 0..T.sizeof*8)
{
if (mask & m_bitpos)
{
dst |= (a & k_bitpos) ? m_bitpos : 0;
k_bitpos <<= 1;
}
m_bitpos <<= 1;
}
return dst;
}
/// Extract bits from unsigned 32-bit integer `a` at the corresponding bit locations specified by
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
uint _pext_u32 (uint a, uint mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
return __builtin_ia32_pext_si(a, mask);
else
return pext!uint(a, mask);
}
else
{
return pext!uint(a, mask);
}
}
unittest
{
static assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
assert (_pext_u32(0x1234_5678, 0x0F0F_0F0F) == 0x2468);
}
/// Extract bits from unsigned 64-bit integer `a` at the corresponding bit locations specified by
/// `mask` to contiguous low bits in dst; the remaining upper bits in dst are set to zero.
ulong _pext_u64 (ulong a, ulong mask)
{
static if (GDC_or_LDC_with_BMI2)
{
if (!__ctfe)
{
version(X86_64)
{
// This instruction not available in 32-bit x86.
return __builtin_ia32_pext_di(a, mask);
}
else
return pext!ulong(a, mask);
}
else
return pext!ulong(a, mask);
}
else
{
return pext!ulong(a, mask);
}
}
unittest
{
static assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
assert (_pext_u64(0x1234_5678_8765_4321, 0x0F0F_0F0F_0F0F_0F0F) == 0x2468_7531);
}
// Helper function for PEXT
private T pext(T)(T a, T mask)
{
/+
tmp := a
dst := 0
m := 0
k := 0
DO WHILE m < number of bits in T
IF mask[m] == 1
dst[k] := tmp[m]
k := k + 1
FI
m := m + 1
OD
+/
T dst;
T k_bitpos = 1;
T m_bitpos = 1; // for each iteration, this has one bit set to 1 in the position probed
foreach (m; 0..T.sizeof*8)
{
if (mask & m_bitpos)
{
dst |= (a & m_bitpos) ? k_bitpos : 0;
k_bitpos <<= 1;
}
m_bitpos <<= 1;
}
return dst;
}

File diff suppressed because it is too large Load Diff

@ -1 +1 @@
Subproject commit d46741a48033b5136fa189c1b80a574986e68f64
Subproject commit c42238a456f5048c7d1b2d5ebd71ecf13bb10ece

@ -1 +1 @@
Subproject commit c83ffabce69071a3e7a0af3f26aa420082eeda1f
Subproject commit 493c17cba26952861ae4c5402f1676013b8317c6

View File

@ -1,4 +1,5 @@
import dlib;
/*
public import vulkan : PlatformHandles;
import vulkan : Destroy;
import vulkan;
@ -414,6 +415,7 @@ DrawRect(Game* g, f32 p0_x, f32 p0_y, f32 p1_x, f32 p1_y, Vec4 col)
AddUIIndices(g);
}
/*
// TODO: integrate this with vulkan again
Model
LoadModel(Game* g, string name)
@ -697,3 +699,4 @@ ReadModel(Game* g, string name)
const(char)[] mat_name = m3d.material[i].name[0 .. strlen(m3d.material[i].name)];
}
}
*/

View File

@ -10,8 +10,6 @@ ImageView[256] TEXTURES;
Buffer[256] MATERIALS;
Buffer[256] MODEL_STATES;
DescIndices[DESC_SET_MAX] DESC_INDICES];
struct GameState
{
RenderState rds;
@ -29,6 +27,7 @@ struct RenderState
Pipeline[PID.Max] pipelines;
DescSetLayout desc_layout_globals;
DescSetLayout desc_layout_resources;
DescSetLayout desc_layout_state;
DescSet[2] desc_set_globals;
PipelineLayout pipeline_layout_pbr;
@ -41,13 +40,6 @@ struct RenderState
Buffer globals_buffer;
}
struct DescIndices
{
u32 tex;
u32 mat;
u32 state;
}
struct ShaderGlobals
{
Vec4 ambient;
@ -57,41 +49,6 @@ struct ShaderGlobals
f32 alpha = 0.0;
}
struct MeshPart
{
u32 mat;
u32 offset;
u32 length;
PushConst pc;
alias pc this;
}
struct ModelData
{
MeshPart[] parts;
ModelState state;
Vertex[] v;
u32[] idx;
TextureData[] tex;
}
struct TextureData
{
u8[] name;
u8[] data;
u32 width;
u32 height;
u32 ch;
}
struct Model
{
Buffer v_buf;
Buffer i_buf;
MeshPart[] parts;
}
struct ModelRenderInfo
{
PushConst pc;
@ -105,15 +62,6 @@ struct ModelState
Mat4 matrix;
}
struct Material
{
Vec4 ambient;
Vec4 diffuse;
Vec4 specular;
float shininess;
float alpha;
}
enum PBRMod : u32
{
AlbedoValue = 0x0001,
@ -182,17 +130,14 @@ struct PushConst
}
}
struct Vertex
{
Vec4 col;
Vec4 tangent;
Vec3 pos;
Vec3 normal;
Vec2 uv;
}
ModelData g_box;
void
RunCycle(GameState* g)
{
}
GameState
InitGame(PlatformWindow* window)
{
@ -225,7 +170,7 @@ Init(RenderState* rds, PlatformWindow* window)
{ binding: 2, descriptorType: DT.StorageImage, descriptorCount: 1, stageFlags: SS.All },
];
DescLayoutBinding[3] resource_bindings = [
DescLayoutBinding[6] resource_bindings = [
{ binding: 0, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
{ binding: 1, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
{ binding: 2, descriptorType: DT.Image, descriptorCount: 1, stageFlags: SS.All },
@ -234,8 +179,12 @@ Init(RenderState* rds, PlatformWindow* window)
{ binding: 5, descriptorType: DT.Uniform, descriptorCount: 1, stageFlags: SS.All },
];
DescLayoutBinding[1] state_bindings = [
{ binding: 0, descriptorType: DT.Uniform, descriptorCount: 1, stageFlags: SS.All },
];
Attribute[5] attributes = [
{ binding: 0, location: 0, format: FMT.RGBA_F32, offset: Vertex.col.offsetof },
{ binding: 0, location: 0, format: FMT.RGBA_F32, offset: Vertex.color.offsetof },
{ binding: 0, location: 1, format: FMT.RGBA_F32, offset: Vertex.tangent.offsetof },
{ binding: 0, location: 2, format: FMT.RGB_F32, offset: Vertex.pos.offsetof },
{ binding: 0, location: 3, format: FMT.RGB_F32, offset: Vertex.normal.offsetof },
@ -251,8 +200,9 @@ Init(RenderState* rds, PlatformWindow* window)
rds.desc_layout_globals = CreateDescSetLayout(&rds.rd, global_bindings);
rds.desc_layout_resources = CreateDescSetLayout(&rds.rd, resource_bindings);
rds.desc_layout_state = CreateDescSetLayout(&rds.rd, state_bindings);
rds.pipeline_layout_pbr = CreatePipelineLayout(&rds.rd, [rds.desc_layout_globals, rds.desc_layout_resources], PushConst.sizeof);
rds.pipeline_layout_pbr = CreatePipelineLayout(&rds.rd, [rds.desc_layout_globals, rds.desc_layout_resources, rds.desc_layout_state], PushConst.sizeof);
foreach(i; 0 .. 2)
{
@ -273,8 +223,8 @@ Init(RenderState* rds, PlatformWindow* window)
};
GfxPipelineInfo pbr_info = {
vertex_shader: LoadAssetData(&rds.frame_arenas[0], "shaders/pbr.vert.spv"),
frag_shader: LoadAssetData(&rds.frame_arenas[0], "shaders/pbr.frag.spv"),
vertex_shader: LoadFile(&rds.frame_arenas[0], "assets/shaders/pbr.vert.spv"),
frag_shader: LoadFile(&rds.frame_arenas[0], "assets/shaders/pbr.frag.spv"),
input_rate: IR.Vertex,
input_rate_stride: Vertex.sizeof,
layout: rds.pipeline_layout_pbr,
@ -315,7 +265,7 @@ Init(RenderState* rds, PlatformWindow* window)
CreateBuffer(&rds.rd, &rds.globals_buffer, BT.Uniform, ShaderGlobals.sizeof, false);
g_box = MakeBox
ModelData md = LoadGLTF(&rds.frame_arenas[0], "assets/models/DamagedHelmet.glb");
}
PipelineID
@ -381,6 +331,7 @@ GetPBRMod(bool albedo = false, bool ambient = false, bool specular = false, bool
}
}
/*
ModelData
MakeBox(RenderState* rds, f32 width, f32 height, Vec4 col)
{
@ -428,9 +379,9 @@ Model
Upload(RenderState* rds, ModelData* data)
{
Model model;
u32[] tex_idx = Alloc!(&rds.frame_arenas[0], data.text.length);
u32[] mat_idx = Alloc!(&rds.frame_arenas[0], data.materials.length);
u32[] state_idx = Alloc!(&rds.frame_arenas[0], data.model_states.length);
u32[] tex_idx = Alloc!(u32)(&rds.frame_arenas[0], data.text.length);
u32[] mat_idx = Alloc!(u32)(&rds.frame_arenas[0], data.materials.length);
u32[] state_idx = Alloc!(u32)(&rds.frame_arenas[0], data.model_states.length);
bool result = true;
@ -450,20 +401,18 @@ Upload(RenderState* rds, ModelData* data)
{
Buffer* buf = &rds.materials[rds.imat++];
CreateBuffer(&rds.rd, buf, BT.Uniform, Material.sizeof);
result = Transfer(&rds.rd, buf, )
//result = Transfer(&rds.rd, buf, )
}
for(u64 i = 0; i < data.model_states.length; i += 1)
{
Buffer* buf = &rds.model_states[rds.istate++];
CreateBuffer(&rds)
//CreateBuffer(&rds)
}
model.parts = data.parts;
}
DescIndices*
GetDescIndices()
*/
unittest
{

View File

@ -44,7 +44,8 @@ layout (set = 2, binding = 4) uniform MaterialData {
float shininess;
float alpha;
} Material;
layout (set = 2, binding = 5) uniform ModelState {
layout (set = 3, binding = 0) uniform ModelState {
mat4 model_matrix;
} State;