From 02103dfd518faf327f7edc13695435308ddcead8 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Tue, 19 May 2020 14:02:28 -0700 Subject: feat: added prototype of stringizer & tokenizer macro operators --- sys/cmd/cc/cc.h | 1 + sys/cmd/cc/lex.c | 25 +++++++++++++-- sys/cmd/cc/pp.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 106 insertions(+), 12 deletions(-) (limited to 'sys') diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index f5d3d28..dab351a 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -186,6 +186,7 @@ enum Vun = iota(13), Vfloat = iota(14), Vstr = iota(15), + Vwstr = iota(16), Vmask = Vchar - 1, }; diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index c1ee6a4..c84ea68 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -180,6 +180,7 @@ oct: continue; } ungetbyte(lx); + break; } if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l); @@ -253,7 +254,8 @@ Dispatch: if (b >= RuneSelf || b == '_') goto Talpha; if (isalpha(b)) { - goto Talpha; + if (b != 'L') + goto Talpha; n = b; b = getbyte(lx); @@ -267,8 +269,12 @@ Dispatch: tok.val.r = v; goto Return; } + if (b == '"') + goto TLstr; ungetbyte(lx); b = n; + + goto Talpha; } if (isdigit(b)) goto Tnum; @@ -321,7 +327,22 @@ Dispatch: intern(&tok.val.s); str·free(s); - break; + goto Return; + + TLstr: + s = str·makecap("", 0, 8); + // NOTE: this violates strict aliasing + for (;;) { + if (escapechar(lx, '"', 1, 0, &v)) + break; + str·appendlen(&s, sizeof(wchar_t), (byte*)&v); + } + tok.kind = Alit | Vwstr; + tok.val.s = s; + intern(&tok.val.s); + + str·free(s); + goto Return; case '.': tok.kind = Adot; diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c index 8de4792..16fac29 100644 --- a/sys/cmd/cc/pp.c +++ b/sys/cmd/cc/pp.c @@ -110,8 +110,10 @@ opand(Lexer *lx) switch (tok.kind & ~Vmask) { case Vint: case Vlong: case Vvlong: return tok.val.i; - case Vint | Vun: case Vlong | Vun: case Vvlong | Vun: + case Vun|Vint : case Vun|Vlong : case Vun|Vvlong: return tok.val.ui; + case Vrune: + return tok.val.r; case Vchar: return tok.val.c; default: @@ -383,7 +385,7 @@ enum { PPbeg = 0x02, PParg = 0x03, - PPtok = 0x04, + PPcat = 0x04, PPstr = 0x05, PPvar = 0x80, @@ -435,9 +437,9 @@ static error ppdef(Lexer *lx) { - byte b; + int b; Sym *sym; - int i, n, dot; + int i, j, f, n, dot; string s, a, buf, args[PPnarg]; s = ident(lx); @@ -503,12 +505,47 @@ ppdef(Lexer *lx) for (i = 0; i < n; i++) { if (strcmp(lx->buf, args[i]) == 0) { - goto Arg; + goto Args; } } str·appendlen(&buf, (lx->b - lx->buf), lx->buf); continue; + Args: + /* Check for argx ## argy OR argx##argy */ + if (isspace(b)) { + b = getnsbyte(lx); + f = 1; + } + if (b == '#') { + b = getbyte(lx); + if (b != '#') { + ungetbyte(lx); + goto Arg; + } + b = getnsbyte(lx); + lx->b = lx->buf; + while (isalnum(b) || b == '_') { + *lx->b++ = b; + b = getbyte(lx); + } + *lx->b = '\0'; + + for (j = 0; j < n; j++) { + if (strcmp(lx->buf, args[j]) == 0) + goto CatArgs; + } + errorat(lx->pos, "macro operator '##' must be terminated by valid variable identifier"); + goto Bad; + CatArgs: + str·appendbyte(&buf, PPcat); + str·appendbyte(&buf, 'a' + i); + str·appendbyte(&buf, 'a' + j); + continue; + } Arg: + if (f) + ungetbyte(lx); + str·appendbyte(&buf, PParg); str·appendbyte(&buf, 'a' + i); continue; @@ -564,8 +601,29 @@ ppdef(Lexer *lx) break; } - if (b == '#' && n > 0) { - panicf("tokenizer needs implementation"); + if (b == '#') { + b = getnsbyte(lx); + if (b == '#') { + errorat(lx->pos, "macro operator '##' must be proceeded by a valid variable identifier"); + goto Bad; + } + lx->b = lx->buf; + while (isalnum(b) || b == '_') { + *lx->b++ = b; + b = getbyte(lx); + } + *lx->b = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(lx->buf, args[i]) == 0) + goto Str; + } + errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier"); + goto Bad; + Str: + str·appendbyte(&buf, PPstr); + str·appendbyte(&buf, 'a' + i); + continue; } str·appendbyte(&buf, b); @@ -578,10 +636,12 @@ ppdef(Lexer *lx) if (dot) *buf |= PPvar; - sym = defmacro(lx, s, buf); + lx->b = lx->buf; + sym = defmacro(lx, s, buf); return 0; Bad: errorat(lx->pos, "failed parse of #define macro '%s'", s); + lx->b = lx->buf; ppend(lx); return 1; } @@ -733,9 +793,21 @@ expandmacro(Lexer *lx, Sym *s, byte *dst) dst += strlen(arg[b]); break; - case PPtok: case PPstr: - panicf("haven't implemented"); + b = *it++; + b -= 'a'; + *dst++ = '"'; + strcpy(dst, arg[b]); + *dst++ = '"'; + break; + + case PPcat: + b = *it++; + b -= 'a'; + strcpy(dst, arg[b]); + b = *it++; + b -= 'a'; + strcpy(dst, arg[b]); break; case '\0': -- cgit v1.2.1