aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/rc/lex.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-29 08:44:47 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-29 08:44:47 -0700
commitb12609cd69ff39f8a221b12230becadfe3e1fc57 (patch)
treea6d4eeaf4cdeab53d2f924556bf24a8ce4ff8214 /sys/cmd/rc/lex.c
parent079d5d6a214f169fa8b73f2d6f6f741810dbe9fe (diff)
Prototype of unicode readline support
The readline functionality operated on the assumption that 1 byte = 1 character. This is obviously wrong if you input a non-ascii character. This commit temporarily removes a lot of functionality but parses input bytes in a unicode-aware manner. The outstanding problem now is 1 unicode rune != 1 column. There are double wide characters, as well as zero width runes, that further break our assumption that 1 rune = 1 character = 1 column. This is the next iteration.
Diffstat (limited to 'sys/cmd/rc/lex.c')
-rw-r--r--sys/cmd/rc/lex.c25
1 files changed, 10 insertions, 15 deletions
diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c
index 9a58f21..9ca2453 100644
--- a/sys/cmd/rc/lex.c
+++ b/sys/cmd/rc/lex.c
@@ -142,28 +142,23 @@ putbyte(char *buf, int c)
return buf;
}
-#define onebyte(c) ((c&0x80)==0x00)
-#define twobyte(c) ((c&0xe0)==0xc0)
-#define threebyte(c) ((c&0xf0)==0xe0)
-#define fourbyte(c) ((c&0xf8)==0xf0)
-
static
char *
putrune(char *buf, int c)
{
buf = putbyte(buf, c);
- if(onebyte(c))
+ if(utf8·onebyte(c))
return buf;
- if(twobyte(c))
- return putbyte(buf,c);
- if(threebyte(c)){
- buf = putbyte(buf,c);
- return putbyte(buf,c);
+ if(utf8·twobyte(c))
+ return putbyte(buf,advance());
+ if(utf8·threebyte(c)){
+ buf = putbyte(buf,advance());
+ return putbyte(buf,advance());
}
- if(fourbyte(c)){
- buf = putbyte(buf,c);
- buf = putbyte(buf,c);
- return putbyte(buf,c);
+ if(utf8·fourbyte(c)){
+ buf = putbyte(buf,advance());
+ buf = putbyte(buf,advance());
+ return putbyte(buf,advance());
}
fatal("malformed utf8 stream");