From 57eb0c15a10dfcd9816b84166106a1aa32323325 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 20 May 2020 12:15:24 -0700 Subject: fix: made rune constants unsigned for implicit casting --- include/libn.h | 22 ++++++++++++---------- sys/cmd/cc/cc.c | 2 ++ sys/cmd/cc/cc.h | 4 ++-- sys/cmd/cc/lex.c | 17 ++++++++--------- sys/cmd/cc/pp.c | 5 ++--- 5 files changed, 26 insertions(+), 24 deletions(-) diff --git a/include/libn.h b/include/libn.h index f469c5c..835d2c1 100644 --- a/include/libn.h +++ b/include/libn.h @@ -136,22 +136,24 @@ void str·replace(string s, const byte* from, const byte* to); string* str·split(string s, const byte* tok); string str·join(vlong len, byte** fields, const byte* sep); - /* +/* * UTF-8 functions. * Perhaps break into own unit * TODO: Add to(upper|lower|title) */ typedef uint32 rune; -enum -{ - UTFmax = 4, - RuneSync = 0x80, - RuneSelf = 0x80, - RuneErr = 0xFFFD, - RuneMax = 0x10FFFF, - RuneMask = 0x1FFFFF, -}; +/* + * We have to use the preprocessor to ensure + * we have unsigned constants. Unfortunate... + */ + +#define UTFmax 4 +#define RuneSync 0x80u +#define RuneSelf 0x80u +#define RuneErr 0xFFFDu +#define RuneMax 0x10FFFFu +#define RuneMask 0x1FFFFFu /* utf8 helpers */ int utf8·fullrune(byte *s, int n); diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c index 2a61e89..6338050 100644 --- a/sys/cmd/cc/cc.c +++ b/sys/cmd/cc/cc.c @@ -111,6 +111,8 @@ openio(byte *path) return nil; } + // TODO: See if we have already loaded the file + // printf("OPENING PATH %s\n", path); if ((C.io - C.iostk) >= arrlen(C.iostk)-1) diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 8a9fd80..6ed2caa 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -255,8 +255,8 @@ struct Lexer /* lex.c functions */ Token lex(Lexer *); -byte getbyte(Lexer *); -byte getnsbyte(Lexer *l); +int getbyte(Lexer *); +int getnsbyte(Lexer *l); rune getrune(Lexer *); byte ungetbyte(Lexer *); rune ungetrune(Lexer *, rune r); diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index 6c57b3c..8ce4e79 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -25,13 +25,13 @@ puttok(Token tok) // ----------------------------------------------------------------------- // simple wrappers -byte +int getbyte(Lexer *lx) { return bufio·getbyte(&lx->io->buf); } -byte +int getnsbyte(Lexer *lx) { int b; @@ -246,7 +246,6 @@ lex(Lexer *lx) { int b, n, f; vlong v, _; - uint u; rune r; string s; double d; @@ -260,7 +259,7 @@ GetByte: Dispatch: tok.pos.beg = lx->pos; - if (b >= RuneSelf || b == '_') + if ((b != EOF && b >= RuneSelf) || b == '_') goto Talpha; if (isalpha(b)) { if (b != 'L') @@ -604,7 +603,6 @@ Dispatch: goto Return; Talpha: - u = b; s = lx->buf; e = lx->buf + arrlen(lx->buf); for (;;) { @@ -612,18 +610,18 @@ Dispatch: errorat(lx->pos, "identifier too long for buffer: %s", s); goto Nospace; } - if (u != EOF && u >= RuneSelf) { + if (b != EOF && b >= RuneSelf) { ungetbyte(lx); r = getrune(lx); if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) { errorat(lx->pos, "invalid identifier character %d", r); } s += utf8·runetochar(s, &r); - } else if (!isalnum(u) && u != '_') + } else if (!isalnum(b) && b != '_') break; else - *s++ = u; - u = getbyte(lx); + *s++ = b; + b = getbyte(lx); } *s = '\0'; ungetbyte(lx); @@ -639,6 +637,7 @@ Dispatch: sym = lookup(&lx->sym, tok.val.s); if (sym) { io = makeio(); + printf("EXPANDING MACRO %s\n", sym->name); io->buf.end += expandmacro(lx, sym, io->b); *io->buf.end++ = EOF; pushio(lx, io); diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c index cd8e28d..57cd5e2 100644 --- a/sys/cmd/cc/pp.c +++ b/sys/cmd/cc/pp.c @@ -40,7 +40,7 @@ identdots(Lexer *lx, int *dots) byte *s; s = ident(lx); - if (s != nil) + if (*s != '\0') return s; c = getnsbyte(lx); @@ -65,7 +65,7 @@ defmacro(Lexer *lx, string name, string macro) { Sym *mac; - printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); + // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); mac = define(&lx->sym, name, Smacro); mac->macro = macro; @@ -429,7 +429,6 @@ ppund(Lexer *lx) intern(&s); lx->b = lx->buf; - printf("FORGETTING %s\n", s); err = forget(&lx->sym, s); if (err) warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); -- cgit v1.2.1