From c7b4fc463aa38da5131aeceb81caa4a3b6180308 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sun, 28 Sep 2025 21:17:27 +1000 Subject: [PATCH] syntax parsing fixes --- src/editor/parsing.d | 137 +++++++++++++++++++++++++++++++++---------- 1 file changed, 105 insertions(+), 32 deletions(-) diff --git a/src/editor/parsing.d b/src/editor/parsing.d index a593e61..a49a125 100644 --- a/src/editor/parsing.d +++ b/src/editor/parsing.d @@ -180,7 +180,6 @@ const TT[128] D_STD_TOKEN = [ ':': TT.Colon, '?': TT.Question, ',': TT.Comma, - '#': TT.Hash, ]; const TT[128] D_OP_TOKEN = [ @@ -203,6 +202,52 @@ const TT[128] D_STR_TOKEN = [ '"' : TT.DoubleQuote, ]; +u8[][][128] D_TYPES = [ + 'a': [ + cast(u8[])r"auto", + ], + 'b': [ + cast(u8[])r"bool", + cast(u8[])r"byte", + ], + 'c': [ + cast(u8[])r"char", + ], + 'd': [ + cast(u8[])r"delegate", + cast(u8[])r"dchar", + cast(u8[])r"double", + ], + 'f': [ + cast(u8[])r"float", + cast(u8[])r"function", + ], + 'i': [ + cast(u8[])r"int", + ], + 'l': [ + cast(u8[])r"long", + ], + 'r': [ + cast(u8[])r"real", + ], + 's': [ + cast(u8[])r"short", + ], + 'u': [ + cast(u8[])r"ubyte", + cast(u8[])r"uint", + cast(u8[])r"ulong", + cast(u8[])r"ushort", + ], + 'v': [ + cast(u8[])r"void", + ], + 'w': [ + cast(u8[])r"wchar", + ], +]; + u8[][][128] D_KEYWORDS = [ '_': [ cast(u8[])r"__FILE__", @@ -223,30 +268,23 @@ u8[][][128] D_KEYWORDS = [ cast(u8[])r"align", cast(u8[])r"asm", cast(u8[])r"assert", - cast(u8[])r"auto", ], 'b': [ - cast(u8[])r"bool", cast(u8[])r"break", - cast(u8[])r"byte", ], 'c': [ cast(u8[])r"case", cast(u8[])r"cast", cast(u8[])r"catch", - cast(u8[])r"char", cast(u8[])r"class", cast(u8[])r"const", cast(u8[])r"continue", ], 'd': [ - cast(u8[])r"dchar", cast(u8[])r"debug", cast(u8[])r"default", - cast(u8[])r"delegate", cast(u8[])r"deprecated", cast(u8[])r"do", - cast(u8[])r"double", ], 'e': [ cast(u8[])r"else", @@ -258,11 +296,9 @@ u8[][][128] D_KEYWORDS = [ cast(u8[])r"false", cast(u8[])r"final", cast(u8[])r"finally", - cast(u8[])r"float", cast(u8[])r"for", cast(u8[])r"foreach", cast(u8[])r"foreach_reverse", - cast(u8[])r"function", ], 'g': [ cast(u8[])r"goto", @@ -273,21 +309,19 @@ u8[][][128] D_KEYWORDS = [ cast(u8[])r"import", cast(u8[])r"in", cast(u8[])r"inout", - cast(u8[])r"int", cast(u8[])r"interface", cast(u8[])r"invariant", cast(u8[])r"is", ], 'l': [ cast(u8[])r"lazy", - cast(u8[])r"long", ], 'm': [ cast(u8[])r"mixin", cast(u8[])r"module", ], 'n': [ -cast(u8[])r"new", + cast(u8[])r"new", cast(u8[])r"nothrow", cast(u8[])r"null", ], @@ -304,14 +338,12 @@ cast(u8[])r"new", cast(u8[])r"pure", ], 'r': [ - cast(u8[])r"real", cast(u8[])r"ref", cast(u8[])r"return", ], 's': [ cast(u8[])r"scope", cast(u8[])r"shared", - cast(u8[])r"short", cast(u8[])r"static", cast(u8[])r"struct", cast(u8[])r"super", @@ -328,19 +360,13 @@ cast(u8[])r"new", cast(u8[])r"typeof", ], 'u': [ - cast(u8[])r"ubyte", - cast(u8[])r"uint", - cast(u8[])r"ulong", cast(u8[])r"union", cast(u8[])r"unittest", - cast(u8[])r"ushort", ], 'v': [ cast(u8[])r"version", - cast(u8[])r"void", ], 'w': [ - cast(u8[])r"wchar", cast(u8[])r"while", cast(u8[])r"with", ], @@ -393,6 +419,14 @@ CreateTokenizer(FlatBuffer* fb) } } + for(u64 i = 0; i < D_TYPES.length; i += 1) + { + if(D_TYPES[i] == null) + { + D_TYPES[i] = []; + } + } + return tk; } @@ -434,6 +468,7 @@ TokenizeD(FlatBuffer* fb) Reset(&tk.arena); tk.pos = 0; + tk.first = tk.last = g_NIL_TOKEN; for(; tk.pos < fb.length;) { @@ -527,10 +562,10 @@ TokenizeD(FlatBuffer* fb) case '@': { Token* t = MakeToken(tk, TT.At, tk.pos, tk.pos+1); + tk.pos += 1; - while (tk.pos < fb.length) + for(; tk.pos < fb.length; tk.pos += 1) { - tk.pos += 1; u8 c = fb.data[tk.pos]; if(CheckWhiteSpace(c)) break; @@ -543,10 +578,20 @@ TokenizeD(FlatBuffer* fb) { Token* t = MakeToken(tk, TT.Dot, tk.pos, tk.pos+1); - while (tk.pos < fb.length) + for(; tk.pos < fb.length; tk.pos += 1) { - tk.pos += 1; - if(tk.pos != '.') break; + if(fb.data[tk.pos] != '.') break; + } + + t.end = tk.pos; + } break; + case '#': + { + Token* t = MakeToken(tk, TT.Import, tk.pos, tk.pos+1); + + for(; tk.pos < fb.length; tk.pos += 1) + { + if(CheckWhiteSpace(fb.data[tk.pos])) break; } t.end = tk.pos; @@ -561,10 +606,8 @@ TokenizeD(FlatBuffer* fb) tk.pos += 1; t.type = TT.Comment; - while (tk.pos < fb.length) + for(; tk.pos < fb.length; tk.pos += 1) { - tk.pos += 1; - if(CheckEOL(fb.data[tk.pos])) { break; @@ -573,6 +616,22 @@ TokenizeD(FlatBuffer* fb) t.end = tk.pos+1; } + else if(next == '*') + { + tk.pos += 1; + t.type = TT.Comment; + + for(; tk.pos < fb.length; tk.pos += 1) + { + if(fb.data[tk.pos] == '/' && fb.data[tk.pos-1] == '*') + { + break; + } + } + + tk.pos += 1; + t.end = tk.pos; + } else { tk.pos += 1; @@ -713,7 +772,7 @@ CheckFuncOrTemplateSig(FlatBuffer* fb, Token* token) { bool found = false; - if(token.type == TT.Identifier || token.type == TT.Type || token.type == TT.Keyword) + if(token.type == TT.Identifier || token.type == TT.Type) { Token* n = Next(token); if(n.type == TT.Asterisk) @@ -743,7 +802,7 @@ CheckFuncOrTemplateSig(FlatBuffer* fb, Token* token) { break; } - else if(next.type == TT.LeftParen || next.type == TT.Comma) + else if((next.type == TT.LeftParen || next.type == TT.Comma) && Next(next).type != TT.RightParen) { Next(next).type = TT.Type; } @@ -913,9 +972,23 @@ ParseId(FlatBuffer* fb) u8 ch = fb.data[t.start]; if(ch < D_KEYWORDS.length && D_KEYWORDS[ch] != null && D_KEYWORDS[ch].length > 0) { + bool found = false; u8[] id = fb.data[t.start .. t.end]; - foreach(k; D_KEYWORDS[ch]) + + foreach(ref k; D_TYPES[ch]) { + if(id == k) + { + t.type = TT.Type; + found = true; + break; + } + } + + foreach(ref k; D_KEYWORDS[ch]) + { + if(found) break; + if(id == k) { if(k == r"import")