Prototype of unicode readline support

The readline functionality operated on the assumption that 1 byte = 1 character. This is obviously wrong if you input a non-ascii character. This commit temporarily removes a lot of functionality but parses input bytes in a unicode-aware manner. The outstanding problem now is 1 unicode rune != 1 column. There are double wide characters, as well as zero width runes, that further break our assumption that 1 rune = 1 character = 1 column. This is the next iteration.
author: Nicholas Noll <nbnoll@eml.cc> 2021-10-29 08:44:47 -0700
committer: Nicholas Noll <nbnoll@eml.cc> 2021-10-29 08:44:47 -0700
commit: b12609cd69ff39f8a221b12230becadfe3e1fc57 (patch)
tree: a6d4eeaf4cdeab53d2f924556bf24a8ce4ff8214 /sys/cmd/rc/lex.c
parent: 079d5d6a214f169fa8b73f2d6f6f741810dbe9fe (diff)
1 files changed, 10 insertions, 15 deletions
diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c
index 9a58f21..9ca2453 100644
--- a/sys/cmd/rc/lex.c
+++ b/sys/cmd/rc/lex.c
@@ -142,28 +142,23 @@ putbyte(char *buf, int c)
     return buf;
 }
 
-#define	onebyte(c)	     ((c&0x80)==0x00)
-#define	twobyte(c)	     ((c&0xe0)==0xc0)
-#define	threebyte(c)	 ((c&0xf0)==0xe0)
-#define	fourbyte(c)	     ((c&0xf8)==0xf0)
-
 static
 char *
 putrune(char *buf, int c)
 {
     buf = putbyte(buf, c);
-    if(onebyte(c))
+    if(utf8·onebyte(c))
         return buf;
-    if(twobyte(c))
-        return putbyte(buf,c);
-    if(threebyte(c)){
-        buf = putbyte(buf,c);
-        return putbyte(buf,c);
+    if(utf8·twobyte(c))
+        return putbyte(buf,advance());
+    if(utf8·threebyte(c)){
+        buf = putbyte(buf,advance());
+        return putbyte(buf,advance());
     }
-    if(fourbyte(c)){
-        buf = putbyte(buf,c);
-        buf = putbyte(buf,c);
-        return putbyte(buf,c);
+    if(utf8·fourbyte(c)){
+        buf = putbyte(buf,advance());
+        buf = putbyte(buf,advance());
+        return putbyte(buf,advance());
     }
     fatal("malformed utf8 stream");
author	Nicholas Noll <nbnoll@eml.cc>	2021-10-29 08:44:47 -0700
committer	Nicholas Noll <nbnoll@eml.cc>	2021-10-29 08:44:47 -0700
commit	b12609cd69ff39f8a221b12230becadfe3e1fc57 (patch)
tree	a6d4eeaf4cdeab53d2f924556bf24a8ce4ff8214 /sys/cmd/rc/lex.c
parent	079d5d6a214f169fa8b73f2d6f6f741810dbe9fe (diff)