aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/cc/lex.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-19 12:13:05 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-19 12:13:05 -0700
commit732c0f6ed6bc072638a66e8ed6802968e18ceb98 (patch)
tree8bfc25aff1903f452006f31ff6a2f906303e2017 /sys/cmd/cc/lex.c
parent5c3114c81477298e9eb78ed3a070752f8d24b8a5 (diff)
feat: cleaned up character lexing
Diffstat (limited to 'sys/cmd/cc/lex.c')
-rw-r--r--sys/cmd/cc/lex.c133
1 files changed, 86 insertions, 47 deletions
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index 20c6f9c..03aba4b 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -82,7 +82,7 @@ static uint8 Atoi[256] =
static
error
-escape(Lexer *lx, int x, int *flag, vlong *val)
+escapechar(Lexer *lx, int x, int islong, int esc, vlong *val)
{
int i, u, c;
vlong l;
@@ -90,14 +90,14 @@ escape(Lexer *lx, int x, int *flag, vlong *val)
c = getrune(lx);
switch (c) {
+ case '\\':
+ break;
case EOF:
errorat(lx->pos, "EOF in string");
return 1;
case '\n':
errorat(lx->pos, "newline in string");
return 1;
- case '\\':
- break;
default:
if (c == x)
return 1;
@@ -110,12 +110,11 @@ escape(Lexer *lx, int x, int *flag, vlong *val)
switch(c) {
case 'x':
- i = 2;
- *flag = 1;
+ i = islong ? 4 : 2;
goto hex;
case 'u':
- i = 4;
+ i = islong ? 8 : 4;
u = 1;
goto hex;
@@ -126,17 +125,17 @@ escape(Lexer *lx, int x, int *flag, vlong *val)
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
- *flag = 1;
+ i = islong ? 4 : 2;
goto oct;
- case 'a': c = '\a'; break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
- case '\\': c = '\\'; break;
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\\':c = '\\'; break;
default:
if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
@@ -160,7 +159,6 @@ hex:
l = l*16 + c-'A' + 10;
continue;
}
- errorat(lx->pos, "non-hex character in escape sequence: %c", c);
ungetbyte(lx);
break;
}
@@ -169,22 +167,25 @@ hex:
l = RuneErr;
}
*val = l;
+ if (esc)
+ *val |= RuneMask + 1;
return 0;
oct:
l = c - '0';
- for (i = 2; i > 0; i--) {
+ for (; i > 0; i--) {
c = getbyte(lx);
if (c >= '0' && c <= '7') {
l = l*8 + c-'0';
continue;
}
- errorat(lx->pos, "non-octal character in escape sequence: %c", c);
ungetbyte(lx);
}
if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
*val = l;
+ if (esc)
+ *val |= RuneMask + 1;
return 0;
}
@@ -234,7 +235,7 @@ Token
lex(Lexer *lx)
{
int b, n, f;
- vlong v;
+ vlong v, _;
uint u;
rune r;
string s;
@@ -249,10 +250,28 @@ GetByte:
Dispatch:
tok.pos.beg = lx->pos;
- if (b >= RuneSelf || isalpha(b) || b == '_')
- goto TAlpha;
+ if (b >= RuneSelf || b == '_')
+ goto Talpha;
+ if (isalpha(b)) {
+ goto Talpha;
+
+ n = b;
+ b = getbyte(lx);
+ if (b == '\'') {
+ if (escapechar(lx, '\'', 1, 0, &v))
+ b = '\'';
+ if (!escapechar(lx, '\'', 1, 0, &_)) {
+ errorat(lx->pos, "missing ' at end of character constant");
+ }
+ tok.kind = Alit | Vrune;
+ tok.val.r = v;
+ goto Return;
+ }
+ ungetbyte(lx);
+ b = n;
+ }
if (isdigit(b))
- goto TNum;
+ goto Tnum;
switch (b) {
case '\n':
@@ -263,12 +282,13 @@ Dispatch:
lx->pos.line++;
goto Dispatch;
+ Tchar:
case '\'':
- if (escape(lx, '\'', &f, &v)) {
+ if (escapechar(lx, '\'', 0, 0, &v)) {
errorat(lx->pos, "empty literal or escaped ' in char literal");
v = '\'';
}
- if (!escape(lx, '\'', &f, &v)) {
+ if (!escapechar(lx, '\'', 0, 0, &_)) {
errorat(lx->pos, "missing '");
ungetbyte(lx);
}
@@ -279,15 +299,15 @@ Dispatch:
}
tok.kind = Alit | Vchar;
tok.val.c = v;
- break;
+ break;
case '"':
s = str·makecap("", 0, 8);
for (;;) {
- if (escape(lx, '"', &f, &v))
+ if (escapechar(lx, '"', 0, 1, &v))
break;
- if (v < RuneSelf || f)
+ if (v & (RuneMask + 1))
str·appendbyte(&s, v);
else {
r = v;
@@ -301,7 +321,7 @@ Dispatch:
intern(&tok.val.s);
str·free(s);
- break;
+ break;
case '.':
tok.kind = Adot;
@@ -309,7 +329,7 @@ Dispatch:
if (isdigit(b)) {
// *lx->b++ = b;
- goto TFlt;
+ goto Tflt;
} else if (b == '.') {
b = getbyte(lx);
if (b != '.') {
@@ -319,7 +339,7 @@ Dispatch:
}
}
ungetbyte(lx);
- break;
+ break;
case '<':
tok.kind = Alt;
@@ -336,7 +356,7 @@ Dispatch:
tok.kind = Alteq;
else
ungetbyte(lx);
- break;
+ break;
case '>':
tok.kind = Agt;
@@ -353,7 +373,7 @@ Dispatch:
tok.kind = Agteq;
else
ungetbyte(lx);
- break;
+ break;
case '/':
tok.kind = Adiv;
@@ -385,7 +405,7 @@ Dispatch:
goto Dispatch;
} else
ungetbyte(lx);
- break;
+ break;
case '#':
if (domacro(lx)) {
@@ -394,7 +414,6 @@ Dispatch:
return tok;
}
goto GetByte;
- break;
case EOF:
popio(lx);
@@ -432,7 +451,7 @@ Dispatch:
goto Return;
- TNum:
+ Tnum:
e = lx->buf + arrlen(lx->buf);
do {
if (lx->b >= e) {
@@ -443,12 +462,11 @@ Dispatch:
} while (b = getbyte(lx), isdigit(b) || b == '_');
if (b == '.' || tolower(b) == 'e')
- goto TFlt;
- TInt:
+ goto Tflt;
+ Tint:
r = b;
n = 10;
s = lx->buf;
- ungetbyte(lx);
if (*s == '0') {
b = *++s;
switch (b) {
@@ -485,18 +503,39 @@ Dispatch:
v = v * n + f;
}
+
b = r;
- tok.kind = Alit | Vint;
+ tok.kind = Alit;
tok.val.i = v;
- /* TODO: Suffixes!
- if (tolower(b) == 'u') {
- tok.kind |= Vusgn;
+
+ if (b == 'u' || b == 'U') {
+ tok.kind |= Vun;
b = getbyte(lx);
}
- */
- goto Return;
+ if (b == 'l' || b == 'L') {
+ r = getbyte(lx);
+ if (r == 'l' || r == 'L') {
+ if (r != b)
+ errorat(lx->pos, "mismatched case on long long integer suffix");
+ tok.kind |= Vvlong;
+ r = getbyte(lx);
+ } else
+ tok.kind |= Vlong;
+
+ if (r == 'u' || r == 'U') {
+ if (tok.kind & Vun)
+ errorat(lx->pos, "multiple unsigned designators on integer suffix");
+ tok.kind |= Vun;
+ goto Return;
+ }
+ ungetbyte(lx);
+ } else
+ tok.kind |= Vint;
+
+ ungetbyte(lx);
+ goto Return;
- TFlt:
+ Tflt:
if (b == '.') {
*lx->b++ = b;
b = getbyte(lx);
@@ -530,9 +569,9 @@ Dispatch:
tok.kind = Alit | Vfloat;
tok.val.f = d;
- goto Return;
+ goto Return;
- TAlpha:
+ Talpha:
u = b;
s = lx->buf;
e = lx->buf + arrlen(lx->buf);