From 7ea1cdb7d31f00024f5a1d124b42cd19a03b959a Mon Sep 17 00:00:00 2001
From: Nicholas <nbnoll@eml.cc>
Date: Wed, 10 Nov 2021 20:12:45 -0800
Subject: chore: libunicode -> libutf

---
 include/base.h                         |   8 +-
 include/libunicode.h                   |  38 ----
 include/libutf.h                       |  38 ++++
 sys/base/mmap.c                        |   9 +-
 sys/cmd/dwm/dwm.h                      |   2 +-
 sys/cmd/dwm/rules.mk                   |   2 +-
 sys/cmd/ic/ic.c                        |  40 ++--
 sys/cmd/ic/rules.mk                    |   2 +-
 sys/cmd/ic/strlcpy.c                   |  14 +-
 sys/cmd/menu/menu.h                    |   2 +-
 sys/cmd/rc/rc.h                        |   2 +-
 sys/cmd/rc/rules.mk                    |   2 +-
 sys/cmd/rules.mk                       |   3 +
 sys/cmd/term/rules.mk                  |   4 +-
 sys/cmd/term/term.h                    |   2 +-
 sys/libunicode/canfit.c                |  23 --
 sys/libunicode/decode.c                |  98 ---------
 sys/libunicode/decodeprev.c            |  60 -----
 sys/libunicode/encode.c                |  69 ------
 sys/libunicode/find.c                  |  31 ---
 sys/libunicode/findlast.c              |  32 ---
 sys/libunicode/internal.h              |  38 ----
 sys/libunicode/len.c                   |  21 --
 sys/libunicode/rules.mk                |  76 -------
 sys/libunicode/runelen.c               |   8 -
 sys/libunicode/vendor/common.c         | 220 -------------------
 sys/libunicode/vendor/common.h         |  46 ----
 sys/libunicode/vendor/mkgraphemedata.c |  24 --
 sys/libunicode/vendor/mkrunetype.c     | 388 ---------------------------------
 sys/libunicode/vendor/mkrunewidth.c    | 325 ---------------------------
 sys/libutf/canfit.c                    |  23 ++
 sys/libutf/decode.c                    |  98 +++++++++
 sys/libutf/decodeprev.c                |  60 +++++
 sys/libutf/encode.c                    |  69 ++++++
 sys/libutf/find.c                      |  31 +++
 sys/libutf/findlast.c                  |  32 +++
 sys/libutf/internal.h                  |  38 ++++
 sys/libutf/len.c                       |  21 ++
 sys/libutf/rules.mk                    |  76 +++++++
 sys/libutf/runelen.c                   |   8 +
 sys/libutf/runetype-14.0.0.c           | 111 ++++++++++
 sys/libutf/runewidth-14.0.0.c          |  71 ++++++
 sys/rules.mk                           |   2 +-
 43 files changed, 724 insertions(+), 1543 deletions(-)
 delete mode 100644 include/libunicode.h
 create mode 100644 include/libutf.h
 delete mode 100644 sys/libunicode/canfit.c
 delete mode 100644 sys/libunicode/decode.c
 delete mode 100644 sys/libunicode/decodeprev.c
 delete mode 100644 sys/libunicode/encode.c
 delete mode 100644 sys/libunicode/find.c
 delete mode 100644 sys/libunicode/findlast.c
 delete mode 100644 sys/libunicode/internal.h
 delete mode 100644 sys/libunicode/len.c
 delete mode 100644 sys/libunicode/rules.mk
 delete mode 100644 sys/libunicode/runelen.c
 delete mode 100644 sys/libunicode/vendor/common.c
 delete mode 100644 sys/libunicode/vendor/common.h
 delete mode 100644 sys/libunicode/vendor/mkgraphemedata.c
 delete mode 100644 sys/libunicode/vendor/mkrunetype.c
 delete mode 100644 sys/libunicode/vendor/mkrunewidth.c
 create mode 100644 sys/libutf/canfit.c
 create mode 100644 sys/libutf/decode.c
 create mode 100644 sys/libutf/decodeprev.c
 create mode 100644 sys/libutf/encode.c
 create mode 100644 sys/libutf/find.c
 create mode 100644 sys/libutf/findlast.c
 create mode 100644 sys/libutf/internal.h
 create mode 100644 sys/libutf/len.c
 create mode 100644 sys/libutf/rules.mk
 create mode 100644 sys/libutf/runelen.c
 create mode 100644 sys/libutf/runetype-14.0.0.c
 create mode 100644 sys/libutf/runewidth-14.0.0.c

diff --git a/include/base.h b/include/base.h
index a7813e6..b841040 100644
--- a/include/base.h
+++ b/include/base.h
@@ -261,12 +261,12 @@ int   bufio·read(io·Buffer *buf, int sz, int n, void *out);
 // -----------------------------------------------------------------------------
 // memory mapped files
 
-typedef struct mmap·Reader 
+typedef struct mmap·Reader
 {
     vlong len;
-    union {
-        byte   *buf;
-        ubyte *ubuf;
+    union{
+        char  *b;
+        uchar *u;
     };
 } mmap·Reader;
 
diff --git a/include/libunicode.h b/include/libunicode.h
deleted file mode 100644
index 25d6dee..0000000
--- a/include/libunicode.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-typedef uint32 rune;
-
-#define UTFmax   4
-#define RuneErr  0xFFFDu
-#define RuneMax  0x10FFFFu
-
-#define utf8·onebyte(c)       (((c)&0x80u)==0x00u)
-#define utf8·twobyte(c)       (((c)&0xE0u)==0xC0u)
-#define utf8·threebyte(c)     (((c)&0xF0u)==0xE0u)
-#define utf8·fourbyte(c)      (((c)&0xF8u)==0xF0u)
-
-/*
- * UTF-8 functions.
- */
-int     utf8·len(char *s);       // returns number of runes for s
-int     utf8·runelen(rune r);    // returns number of bytes for rune
-int     utf8·runewidth(rune r);  // returns number of printed columns for rune
-
-int     utf8·decode(char *, rune *);       // decode 1 rune from char stream, store into rune, return number of bytes
-int     utf8·encode(rune *, char *);       // encode 1 rune from rune stream, store into char, return number of bytes
-int     utf8·decodeprev(char *s, rune *r); // decode 1 rune from char stream, reading backwards, store into rune, return number of bytes
-
-char   *utf8·find(char *s, rune);     // find rune in char stream
-char   *utf8·findlast(char* s, rune); // find last rune in char stream
-
-int     utf8·canfit(char *, int); // XXX: odd function...
-
-int     utf8·isalpha(rune r);
-int     utf8·isdigit(rune r);
-int     utf8·isspace(rune r);
-int     utf8·istitle(rune r);
-int     utf8·ispunct(rune r);
-
-rune    utf8·toupper(rune r);
-rune    utf8·tolower(rune r);
-rune    utf8·totitle(rune r);
diff --git a/include/libutf.h b/include/libutf.h
new file mode 100644
index 0000000..25d6dee
--- /dev/null
+++ b/include/libutf.h
@@ -0,0 +1,38 @@
+#pragma once
+
+typedef uint32 rune;
+
+#define UTFmax   4
+#define RuneErr  0xFFFDu
+#define RuneMax  0x10FFFFu
+
+#define utf8·onebyte(c)       (((c)&0x80u)==0x00u)
+#define utf8·twobyte(c)       (((c)&0xE0u)==0xC0u)
+#define utf8·threebyte(c)     (((c)&0xF0u)==0xE0u)
+#define utf8·fourbyte(c)      (((c)&0xF8u)==0xF0u)
+
+/*
+ * UTF-8 functions.
+ */
+int     utf8·len(char *s);       // returns number of runes for s
+int     utf8·runelen(rune r);    // returns number of bytes for rune
+int     utf8·runewidth(rune r);  // returns number of printed columns for rune
+
+int     utf8·decode(char *, rune *);       // decode 1 rune from char stream, store into rune, return number of bytes
+int     utf8·encode(rune *, char *);       // encode 1 rune from rune stream, store into char, return number of bytes
+int     utf8·decodeprev(char *s, rune *r); // decode 1 rune from char stream, reading backwards, store into rune, return number of bytes
+
+char   *utf8·find(char *s, rune);     // find rune in char stream
+char   *utf8·findlast(char* s, rune); // find last rune in char stream
+
+int     utf8·canfit(char *, int); // XXX: odd function...
+
+int     utf8·isalpha(rune r);
+int     utf8·isdigit(rune r);
+int     utf8·isspace(rune r);
+int     utf8·istitle(rune r);
+int     utf8·ispunct(rune r);
+
+rune    utf8·toupper(rune r);
+rune    utf8·tolower(rune r);
+rune    utf8·totitle(rune r);
diff --git a/sys/base/mmap.c b/sys/base/mmap.c
index 6edfe28..7f3062d 100644
--- a/sys/base/mmap.c
+++ b/sys/base/mmap.c
@@ -15,19 +15,19 @@ mmap·open(byte *filename)
     s   = io·open(filename, "r");
     fd  = io·fd(s);
     err = io·stat(s, &st);
-    if (err) {
+    if(err){
         errorf("file stat: error code %d", err);
         goto ERROR;
     }
 
     buf = mmap(nil, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
-    if (!buf) {
+    if(!buf){
         errorf("mmap: failed");
         goto ERROR;
     }
     // NOTE: posix systems require that reference kept to mmap file after fd is closed
     io·close(s);
-    return (mmap·Reader){.len=st.st_size, .buf=buf};
+    return (mmap·Reader){.len=st.st_size, .b=buf};
 
 ERROR:
     io·close(s);
@@ -37,7 +37,6 @@ ERROR:
 error
 mmap·close(mmap·Reader rdr)
 {
-    munmap(rdr.buf, rdr.len);
-
+    munmap(rdr.b, rdr.len);
     return 0;
 }
diff --git a/sys/cmd/dwm/dwm.h b/sys/cmd/dwm/dwm.h
index 9b616d1..afec1f2 100644
--- a/sys/cmd/dwm/dwm.h
+++ b/sys/cmd/dwm/dwm.h
@@ -2,7 +2,7 @@
 #pragma once
 #include <u.h>
 #include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
 
 #include <errno.h>
 #include <locale.h>
diff --git a/sys/cmd/dwm/rules.mk b/sys/cmd/dwm/rules.mk
index f7254cd..79c4548 100644
--- a/sys/cmd/dwm/rules.mk
+++ b/sys/cmd/dwm/rules.mk
@@ -22,7 +22,7 @@ $(BINS_$(d)): TCLIBS = \
     `$(PKG) --libs freetype2`  \
 	-lX11 -lXinerama -lXft -lX11-xcb -lxcb -lxcb-res
 
-$(BINS_$(d)): $(OBJS_$(d))  $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a
+$(BINS_$(d)): $(OBJS_$(d))  $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a
 	$(COMPLINK)
 
 include share/pop.mk
diff --git a/sys/cmd/ic/ic.c b/sys/cmd/ic/ic.c
index 19b868d..7fc37d8 100644
--- a/sys/cmd/ic/ic.c
+++ b/sys/cmd/ic/ic.c
@@ -1,6 +1,6 @@
 /* See LICENSE file for license details. */
 #include <u.h>
-#include <libn.h>
+#include <base.h>
 
 #include <sys/select.h>
 #include <sys/socket.h>
@@ -308,7 +308,7 @@ channel_rm(Channel *c)
 	free(c);
 }
 
-static 
+static
 void
 channel_leave(Channel *c)
 {
@@ -321,7 +321,7 @@ channel_leave(Channel *c)
 	channel_rm(c);
 }
 
-static 
+static
 void
 loginkey(int ircfd, const char *key)
 {
@@ -347,13 +347,13 @@ udsopen(const char *uds)
 	size_t len;
 	int fd;
 
-	if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+	if((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 		fprintf(stderr, "%s: socket: %s\n", argv0, strerror(errno));
 		exit(1);
 	}
 
 	sun.sun_family = AF_UNIX;
-	if (strlcpy(sun.sun_path, uds, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
+	if(strlcpy(sun.sun_path, uds, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
 		fprintf(stderr, "%s: UNIX domain socket path truncation\n", argv0);
 		exit(1);
 	}
@@ -403,7 +403,7 @@ tcpopen(const char *host, const char *service)
 	return fd;
 }
 
-static 
+static
 int
 isnumeric(const char *s)
 {
@@ -464,7 +464,7 @@ proc_channels_privmsg(int ircfd, Channel *c, char *buf)
 	ewritestr(ircfd, msg);
 }
 
-static 
+static
 void
 proc_channels_input(int ircfd, Channel *c, char *buf)
 {
@@ -700,8 +700,8 @@ handle_channels_input(int ircfd, Channel *c)
 {
 	char buf[IRC_MSG_MAX];
 
-	if (read_line(c->fdin, buf, sizeof(buf)) == -1) {
-		if (channel_reopen(c) == -1)
+	if(read_line(c->fdin, buf, sizeof(buf)) == -1) {
+		if(channel_reopen(c) == -1)
 			channel_rm(c);
 		return;
 	}
@@ -755,7 +755,7 @@ run(int ircfd, const char *host)
 	int r, maxfd;
 
 	snprintf(ping_msg, sizeof(ping_msg), "PING %s\r\n", host);
-	while (isrunning) {
+	while(isrunning) {
 		maxfd = ircfd;
 		FD_ZERO(&rdset);
 		FD_SET(ircfd, &rdset);
@@ -767,12 +767,12 @@ run(int ircfd, const char *host)
 		memset(&tv, 0, sizeof(tv));
 		tv.tv_sec = 120;
 		r = select(maxfd + 1, &rdset, 0, 0, &tv);
-		if (r < 0) {
+		if(r < 0){
 			if (errno == EINTR)
 				continue;
 			fprintf(stderr, "%s: select: %s\n", argv0, strerror(errno));
 			exit(1);
-		} else if (r == 0) {
+		}else if(r == 0){
 			if (time(nil) - last_response >= PING_TIMEOUT) {
 				channel_print(channelmaster, "-!- ii shutting down: ping timeout");
 				exit(2); /* status code 2 for timeout */
@@ -780,11 +780,11 @@ run(int ircfd, const char *host)
 			ewritestr(ircfd, ping_msg);
 			continue;
 		}
-		if (FD_ISSET(ircfd, &rdset)) {
+		if(FD_ISSET(ircfd, &rdset)) {
 			handle_server_output(ircfd);
 			last_response = time(nil);
 		}
-		for (c = channels; c; c = tmp) {
+		for(c = channels; c; c = tmp) {
 			tmp = c->next;
 			if (FD_ISSET(c->fdin, &rdset))
 				handle_channels_input(ircfd, c);
@@ -803,7 +803,7 @@ main(int argc, char *argv[])
 	int ircfd, r;
 
 	/* use nickname and home dir of user by default */
-	if (!(spw = getpwuid(getuid()))) {
+	if(!(spw = getpwuid(getuid()))) {
 		fprintf(stderr, "%s: getpwuid: %s\n", argv0, strerror(errno));
 		exit(1);
 	}
@@ -837,10 +837,10 @@ main(int argc, char *argv[])
 		break;
 	} ARGEND
 
-	if (!*host)
+	if(!*host)
 		usage();
 
-	if (uds)
+	if(uds)
 		ircfd = udsopen(uds);
 	else
 		ircfd = tcpopen(host, service);
@@ -861,15 +861,15 @@ main(int argc, char *argv[])
 	create_dirtree(ircpath);
 
 	channelmaster = channel_add(""); /* master channel */
-	if (key)
+	if(key)
 		loginkey(ircfd, key);
 	loginuser(ircfd, host, fullname && *fullname ? fullname : nick);
 	setup();
 	run(ircfd, host);
-	if (channelmaster)
+	if(channelmaster)
 		channel_leave(channelmaster);
 
-	for (c = channels; c; c = tmp) {
+	for(c = channels; c; c = tmp) {
 		tmp = c->next;
 		channel_leave(c);
 	}
diff --git a/sys/cmd/ic/rules.mk b/sys/cmd/ic/rules.mk
index c373dfb..649c9ac 100644
--- a/sys/cmd/ic/rules.mk
+++ b/sys/cmd/ic/rules.mk
@@ -8,7 +8,7 @@ BINS_$(d) := $(d)/ic
 include share/paths.mk
 
 # Local rules
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/libn/libn.a
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/base/base.a
 	$(COMPLINK)
 
 include share/pop.mk
diff --git a/sys/cmd/ic/strlcpy.c b/sys/cmd/ic/strlcpy.c
index db0e6f0..5af7906 100644
--- a/sys/cmd/ic/strlcpy.c
+++ b/sys/cmd/ic/strlcpy.c
@@ -15,18 +15,18 @@ strlcpy(char *dst, const char *src, size_t siz)
 	size_t n = siz;
 
 	/* Copy as many bytes as will fit */
-	if (n != 0) {
-		while (--n != 0) {
-			if ((*d++ = *s++) == '\0')
+	if(n != 0) {
+		while(--n != 0) {
+			if((*d++ = *s++) == '\0')
 				break;
 		}
 	}
 	/* Not enough room in dst, add NUL and traverse rest of src */
-	if (n == 0) {
-		if (siz != 0)
+	if(n == 0) {
+		if(siz != 0)
 			*d = '\0'; /* NUL-terminate dst */
-		while (*s++)
+		while(*s++)
 			;
 	}
-	return(s - src - 1); /* count does not include NUL */
+	return s - src - 1; /* count does not include NUL */
 }
diff --git a/sys/cmd/menu/menu.h b/sys/cmd/menu/menu.h
index 8f1d6d7..f4345bb 100644
--- a/sys/cmd/menu/menu.h
+++ b/sys/cmd/menu/menu.h
@@ -1,7 +1,7 @@
 /* See LICENSE file for copyright and license details. */
 #include <u.h>
 #include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
 
 #include <time.h>
 #include <locale.h>
diff --git a/sys/cmd/rc/rc.h b/sys/cmd/rc/rc.h
index 83c39e9..9b415fc 100644
--- a/sys/cmd/rc/rc.h
+++ b/sys/cmd/rc/rc.h
@@ -1,7 +1,7 @@
 
 #include <u.h>
 #include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
 
 // -----------------------------------------------------------------------
 // types
diff --git a/sys/cmd/rc/rules.mk b/sys/cmd/rc/rules.mk
index ceec50b..a2fd058 100644
--- a/sys/cmd/rc/rules.mk
+++ b/sys/cmd/rc/rules.mk
@@ -25,7 +25,7 @@ $(d)/parse.h $(d)/parse.c: $(d)/syntax.y
 	yacc --header=$(<D)/parse.h --output=$(<D)/parse.c $(<)
 
 # Local rules
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a $(d)/parse.h
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a $(d)/parse.h
 	$(COMPLINK)
 
 include share/pop.mk
diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk
index 07adaf1..52a059b 100644
--- a/sys/cmd/rules.mk
+++ b/sys/cmd/rules.mk
@@ -17,6 +17,9 @@ include $(DIR)/rules.mk
 # DIR := $(d)/test
 # include $(DIR)/rules.mk
 
+DIR := $(d)/ic
+include $(DIR)/rules.mk
+
 DIR := $(d)/dwm
 include $(DIR)/rules.mk
 
diff --git a/sys/cmd/term/rules.mk b/sys/cmd/term/rules.mk
index ef307c1..4011839 100644
--- a/sys/cmd/term/rules.mk
+++ b/sys/cmd/term/rules.mk
@@ -2,7 +2,7 @@ include share/push.mk
 # Iterate through subdirectory tree
 
 # Local sources
-SRCS_$(d) := $(d)/term.c $(d)/x.c $(d)/hb.c
+SRCS_$(d) := $(d)/term.c $(d)/x.c #$(d)/hb.c
 BINS_$(d) := $(d)/term
 
 include share/paths.mk
@@ -18,7 +18,7 @@ $(BINS_$(d)): TCLIBS = \
     `$(PKG) --libs freetype2`  \
 	-lm -lrt -lX11 -lutil -lXft -lXrender -lharfbuzz
 
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a
 	$(COMPLINK)
 
 include share/pop.mk
diff --git a/sys/cmd/term/term.h b/sys/cmd/term/term.h
index 3ff2852..f779654 100644
--- a/sys/cmd/term/term.h
+++ b/sys/cmd/term/term.h
@@ -3,7 +3,7 @@
 
 #include <u.h>
 #include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
 
 #include <signal.h>
 #include <sys/ioctl.h>
diff --git a/sys/libunicode/canfit.c b/sys/libunicode/canfit.c
deleted file mode 100644
index 4579ab3..0000000
--- a/sys/libunicode/canfit.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "internal.h"
-
-/* returns 1 if string of length n is long enough to be decoded */
-int
-utf8·canfit(byte* s, int n)
-{
-    int  i;
-    rune c;
-
-    if(n <= 0)
-        return 0;
-
-    c = *(ubyte*)s;
-    if(c < TByte1)
-        return 1;
-
-    if(c < TByte3)
-        return n >= 2;
-    if(c < TByte4)
-        return n >= 3;
-
-    return n >= UTFmax;
-}
diff --git a/sys/libunicode/decode.c b/sys/libunicode/decode.c
deleted file mode 100644
index 01797f1..0000000
--- a/sys/libunicode/decode.c
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "internal.h"
-
-#define ACCEPT 0
-#define REJECT 12
-
-static uint8 decode[] = {
-    /*
-     * the first part of the table maps bytes to character classes that
-     * to reduce the size of the transition table and create bitmasks
-     */
-     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-     8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-
-    /*
-     * the second part is a transition table that maps a combination
-     * of a state of the automaton and a character class to a state
-     */
-     0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-    12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-    12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-    12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-    12,36,12,12,12,12,12,12,12,12,12,12,
-};
-
-int
-utf8·decode(char *s, rune *r)
-{
-    int   n;
-    rune  v;
-    uint8 b, t, x=ACCEPT;
-
-    b = ((uint8 *)s)[0];
-    t = decode[b];
-    v = (0xFF >> t) & b;
-    x = decode[256+x+t];
-
-    for(n=1; x > REJECT && n < UTFmax; n++){
-        b = ((uint8 *)s)[n];
-        t = decode[b];
-        v = (v << 6) | (b & TMask);
-        x = decode[256+x+t];
-    }
-
-    if(x != ACCEPT){
-        *r = RuneErr;
-        return 1;
-    }
-
-    *r = v;
-    return n;
-}
-
-#if 0
-int
-utf8·decode(byte *s, rune *r)
-{
-    int c[UTFmax], i;
-    rune l;
-
-    c[0] = *(ubyte*)(s);
-    if(c[0] < Tx){
-        *r = c[0];
-        return 1;
-    }
-
-    l = c[0];
-    for(i = 1; i < UTFmax; i++){
-        c[i]  = *(ubyte*)(s+i);
-        c[i] ^= Tx;
-        if(c[i] & Testx) goto bad;
-
-        l = (l << Bitx) | c[i];
-        if(c[0] < Tbyte(i + 2)){
-            l &= RuneX(i + 1);
-            if(i == 1){
-                if(c[0] < Tbyte(2) || l <= Rune1)
-                    goto bad;
-            }else if(l <= RuneX(i) || l > RuneMax)
-                goto bad;
-
-            if(i == 2 && SurrogateMin <= l && l <= SurrogateMax)
-                goto bad;
-
-            *r = l;
-            return i + 1;
-        }
-    }
-bad:
-    *r = RuneErr;
-    return 1;
-}
-#endif
diff --git a/sys/libunicode/decodeprev.c b/sys/libunicode/decodeprev.c
deleted file mode 100644
index 27dced6..0000000
--- a/sys/libunicode/decodeprev.c
+++ /dev/null
@@ -1,60 +0,0 @@
-#include "internal.h"
-
-#define ACCEPT 0
-#define REJECT 12
-
-static uint8 decode[] = {
-    /*
-     * the first part of the table maps bytes to character classes that
-     * to reduce the size of the transition table and create bitmasks.
-     */
-         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-    /*
-     * The second part is a transition table that maps a combination
-     * of a state of the automaton and a character class to a state.
-     */
-    //   0  1  2  3  4  5  6  7  8  9 10 11
-         0,24,12,12,12,12,12,24,12,24,12,12,
-         0,24,12,12,12,12,12,24,12,24,12,12,
-        12,36, 0,12,12,12,12,48,12,36,12,12,
-        12,60,12, 0, 0,12,12,72,12,72,12,12,
-        12,60,12, 0,12,12,12,72,12,72, 0,12,
-        12,12,12,12,12, 0, 0,12,12,12,12,12,
-        12,12,12,12,12,12,12,12,12,12,12, 0
-};
-
-int
-utf8·decodeprev(byte *s, rune *r)
-{
-    int   n;
-    rune  v;
-    uint8 b, t, d, x=ACCEPT;
-
-    v=0, n=0, d=0;
-nextbyte:
-    b = ((uint8 *)s)[-n++];
-    t = decode[b];
-    x = decode[256+x+t];
-
-    if(x > REJECT && n < UTFmax){
-        v = v | ((b & TMask) << d);
-        d += 6;
-        goto nextbyte;
-    }
-
-    if(x != ACCEPT)
-        *r = RuneErr;
-    else{
-        v |= (((0xFFu >> t) & b) << d);
-        *r = v;
-    }
-
-    return n;
-}
diff --git a/sys/libunicode/encode.c b/sys/libunicode/encode.c
deleted file mode 100644
index fa7c93e..0000000
--- a/sys/libunicode/encode.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "internal.h"
-
-int
-utf8·encode(rune *r, byte *s)
-{
-    rune c;
-
-    c = *r;
-    if(c < Rune1Byte){ // 7 bits
-        s[0] = (uint8)c;
-        return 1;
-    }
-
-    if(c < Rune2Byte){ // 11 bits
-        s[0] = TByte1 | (c >> 6);
-        s[1] = Tx     | (c & TMask);
-        return 2;
-    }
-
-    if(c < Rune3Byte){ // 16 bits
-        s[0] = TByte2 | ((c >> 12));
-        s[1] = Tx     | ((c >> 6) & TMask);
-        s[2] = Tx     | ((c)      & TMask);
-        return 3;
-    }
-
-    // 22 bits
-    if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax))
-        c = RuneErr;
-
-    s[0] = TByte3 | ((c >> 18));
-    s[1] = Tx     | ((c >> 12) & TMask);
-    s[2] = Tx     | ((c >> 6)  & TMask);
-    s[3] = Tx     | ((c)       & TMask);
-
-    return 4;
-}
-
-#if 0
-int
-utf8·encode(rune* r, byte* s)
-{
-    int i, j;
-    rune c;
-
-    c = *r;
-    if(c <= Rune1) {
-        s[0] = c;
-        return 1;
-    }
-
-    for(i = 2; i < UTFmax + 1; i++){
-        if(i == 3){
-            if(c > RuneMax)
-                c = RuneErr;
-            if(SurrogateMin <= c && c <= SurrogateMax)
-                c = RuneErr;
-        }
-        if(c <= RuneX(i) || i == UTFmax) {
-            s[0] = Tbyte(i) |  (c >> (i - 1)*Bitx);
-            for(j = 1; j < i; j++)
-                s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
-            return i;
-        }
-    }
-
-    return UTFmax;
-}
-#endif
diff --git a/sys/libunicode/find.c b/sys/libunicode/find.c
deleted file mode 100644
index d75feb8..0000000
--- a/sys/libunicode/find.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "internal.h"
-
-byte*
-utf8·find(byte* s, rune c)
-{
-    long c1;
-    rune r;
-    int  n;
-
-    if(c < Tx)
-        return strchr(s, c);
-
-    for(;;){
-        c1 = *(ubyte*)s;
-        if(c1 < Tx){
-            if(c1 == 0) return nil;
-            if(c1 == c) return s;
-            s++;
-            continue;
-        }
-
-        n = utf8·decode(s, &r);
-
-        if(r == c)
-            return s;
-
-        s += n;
-    }
-
-    return nil;
-}
diff --git a/sys/libunicode/findlast.c b/sys/libunicode/findlast.c
deleted file mode 100644
index ab25ab2..0000000
--- a/sys/libunicode/findlast.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "internal.h"
-
-byte*
-utf8·findlast(byte* s, rune c)
-{
-    long c1;
-    rune r;
-    byte *l;
-
-    if(c < Tx)
-        return strrchr(s, c);
-
-    l = nil;
-    for(;;){
-        c1 = *(ubyte*)s;
-        if(c1 < Tx){
-            if(c1 == 0) return l;
-            if(c1 == c) l = s;
-            s++;
-            continue;
-        }
-
-        c1 = utf8·decode(s, &r);
-
-        if(r == c)
-            l = s;
-
-        s += c1;
-    }
-
-    return nil;
-}
diff --git a/sys/libunicode/internal.h b/sys/libunicode/internal.h
deleted file mode 100644
index df69310..0000000
--- a/sys/libunicode/internal.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
-
-/*
- * NOTE: we use the preprocessor to ensure we have unsigned constants.
- * UTF-8 code:
- * 1 byte:
- * 0xxxxxxx
- * 2 byte:
- * 110xxxxx 10xxxxxx
- * 3 byte:
- * 1110xxxx 10xxxxxx 10xxxxxx
- * 4 byte:
- * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- */
-
-#define Tx     0x80u // 0b10000000 transfer header
-#define TMask  0x3Fu // 0b00111111 transfer mask
-
-#define TByte1 0xC0u // 0b11000000
-#define TByte2 0xE0u // 0b11100000
-#define TByte3 0xF0u // 0b11110000
-#define TByte4 0xF8u // 0b11111000
-
-#define RuneMask  0x1FFFFFu
-
-#define Rune1Byte 0x000080u  // 1 << 8   (1 byte)
-#define Rune2Byte 0x001000u  // 1 << 12  (2 bytes)
-#define Rune3Byte 0x020000u  // 1 << 17  (3 bytes)
-#define Rune4Byte 0x400000u  // 1 << 22  (4 bytes)
-
-
-/* UTF-16 nonsense */
-#define RuneSurrogateMin 0x0D8000
-#define RuneSurrogateMax 0x0D8FFF
diff --git a/sys/libunicode/len.c b/sys/libunicode/len.c
deleted file mode 100644
index 8fbd679..0000000
--- a/sys/libunicode/len.c
+++ /dev/null
@@ -1,21 +0,0 @@
-#include "internal.h"
-
-int
-utf8·len(char *s)
-{
-    int c;
-    long n;
-    rune r;
-
-    n = 0;
-    for(;;){
-        c = *(uchar*)s;
-        if(c < Tx){
-            if(c == 0)
-                return n;
-            s++;
-        }else
-            s += utf8·decode(s, &r);
-        n++;
-    }
-}
diff --git a/sys/libunicode/rules.mk b/sys/libunicode/rules.mk
deleted file mode 100644
index 1d714fe..0000000
--- a/sys/libunicode/rules.mk
+++ /dev/null
@@ -1,76 +0,0 @@
-include share/push.mk
-
-UNICODE = 14.0.0
-
-SRCS_$(d) := \
-	$(d)/encode.c \
-	$(d)/decode.c \
-	$(d)/decodeprev.c \
-	$(d)/find.c \
-	$(d)/findlast.c \
-	$(d)/canfit.c \
-	$(d)/runelen.c \
-	$(d)/len.c \
-	$(d)/runetype-$(UNICODE).c \
-	$(d)/runewidth-$(UNICODE).c
-
-LIBS_$(d) := $(d)/libunicode.a
-
-include share/paths.mk
-
-# ========================================================================
-# table generation
-
-$(d)/vendor/common.o: $(d)/vendor/common.c
-	$(COMPILE)
-
-# rune categories
-$(d)/vendor/UnicodeData-$(UNICODE).txt:
-	@echo "GET	UnicodeData.txt";\
-	curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@
-
-$(d)/vendor/mkrunetype: $(d)/vendor/mkrunetype.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
-	$(COMPLINK)
-
-GENS += $(d)/vendor/mkrunetype
-
-$(d)/runetype-$(UNICODE).c: $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/mkrunetype
-	@$(dir $@)vendor/mkrunetype $< > $@
-
-# rune widths
-$(d)/vendor/EastAsianWidth-$(UNICODE).txt:
-	@echo "GET	EastAsianWidth.txt";\
-	curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@
-
-$(d)/vendor/EmojiData-$(UNICODE).txt:
-	@echo "GET	EmojiData.txt";\
-	curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@
-
-$(d)/vendor/mkrunewidth: $(d)/vendor/mkrunewidth.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
-	$(COMPLINK)
-
-GENS += $(d)/vendor/mkrunewidth
-
-$(d)/runewidth-$(UNICODE).c: $(d)/vendor/mkrunewidth $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/EastAsianWidth-$(UNICODE).txt $(d)/vendor/EmojiData-$(UNICODE).txt
-	@$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@
-
-# grapheme boundaries
-$(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt:
-	@echo "GET	GraphemeBreakProperty.txt";\
-	curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@
-
-$(d)/vendor/mkgraphemedata: $(d)/vendor/mkgraphemedata.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
-	$(COMPLINK)
-
-$(d)/graphemedata-$(UNICODE).c: $(d)/vendor/mkgraphemedata $(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt
-	$^ > $@
-
-GENS += $(d)/vendor/mkgraphemedata
-
-# ========================================================================
-# normal operations
-
-$(LIBS_$(d)): $(OBJS_$(d))
-	$(ARCHIVE)
-
-include share/pop.mk
diff --git a/sys/libunicode/runelen.c b/sys/libunicode/runelen.c
deleted file mode 100644
index dac7f15..0000000
--- a/sys/libunicode/runelen.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "internal.h"
-
-int
-utf8·runelen(rune r)
-{
-    byte s[10];
-    return utf8·encode(&r, s);
-}
diff --git a/sys/libunicode/vendor/common.c b/sys/libunicode/vendor/common.c
deleted file mode 100644
index 6b5d1b3..0000000
--- a/sys/libunicode/vendor/common.c
+++ /dev/null
@@ -1,220 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// input functions
-
-int
-parse(io·Stream *io, int nfield, char **field, int len, char *line)
-{
-    int n;
-    if((n=io·readln(io, len, line)) <= 0)
-        return ParseEOF;
-
-    if(n == len)
-        panicf("line too long");
-
-    if(line[n-1] != '\n')
-        panicf("invalid line: expected '\n', found '%c'", line[n]);
-
-    line[n-1] = 0;
-
-    if(line[0] == '#' || line[0] == 0)
-        return ParseSkip;
-
-    /* tokenize line into fields */
-    n = 0;
-    field[n] = line;
-    while(*line){
-        if(*line == ';'){
-            *line = 0;
-            field[++n] = line+1;
-        }
-        line++;
-    }
-
-    if(n != nfield-1)
-        panicf("expected %d number of fields, got %d: %s", nfield, n, line);
-
-    return ParseOK;
-}
-
-int
-codepoint(char *s)
-{
-    int c, b;
-
-    c = 0;
-    while((b=*s++)){
-        c <<= 4;
-        if(b >= '0' && b <= '9')
-            c += b - '0';
-        else if(b >= 'A' && b <= 'F')
-            c += b - 'A' + 10;
-        else
-            panicf("bad codepoint char '%c'", b);
-    }
-
-    return c;
-}
-
-void
-codepointrange(io·Stream *utf8, char *field[NumFields], int *start,  int *stop)
-{
-    int e, c;
-    char *other[NumFields], line[1024];
-
-    // XXX: the stop variable passes in the previous stopping character
-    e = *stop;
-    c = codepoint(field[Fcode]);
-
-    if(c >= NumRunes)
-        panicf("unexpected large codepoint %x", c);
-    if(c <= e)
-        panicf("bad code sequence: %x then %x", e, c);
-    e = c;
-
-    if(strstr(field[Fname], ", First>") != nil){
-        if(!parse(utf8, arrlen(other), other, arrlen(line), line))
-            panicf("range start at end of file");
-        if(strstr(other[Fname], ", Last>") == nil)
-            panicf("range start not followed by range end");
-
-        e = codepoint(other[Fcode]);
-
-        if(e <= c)
-            panicf("bad code sequence: %x then %x", c, e);
-        if(strcmp(field[Fcategory], other[Fcategory]) != 0)
-            panicf("range with mismatched category");
-    }
-
-    *start = c;
-    *stop  = e;
-}
-
-// -----------------------------------------------------------------------
-// output functions
-
-void
-putsearch(void)
-{
-    puts(
-        "#include <u.h>\n"
-        "#include <libunicode.h>\n"
-        "\n"
-        "static\n"
-        "rune*\n"
-        "rangesearch(rune c, rune *t, int n, int ne)\n"
-        "{\n"
-        "   rune *p;\n"
-        "   int m;\n"
-        "   while(n > 1) {\n"
-        "   m = n >> 1;\n"
-        "   p = t + m*ne;\n"
-        "   if(c >= p[0]){\n"
-        "       t = p;\n"
-        "       n = n-m;\n"
-        "   }else\n"
-        "       n = m;\n"
-        "   }\n"
-        "   if(n && c >= t[0])\n"
-        "       return t;\n"
-        "   return 0;\n"
-        "}\n"
-    );
-
-}
-
-int
-putrange(char *ident, char *prop, int force)
-{
-    int l, r, start;
-
-    start = 0;
-    for(l = 0; l < NumRunes;) {
-        if(!prop[l]){
-            l++;
-            continue;
-        }
-
-        for(r = l+1; r < NumRunes; r++){
-            if(!prop[r])
-                break;
-            prop[r] = 0;
-        }
-
-        if(force || r > l + 1){
-            if(!start){
-                printf("static rune %s[] = {\n", ident);
-                start = 1;
-            }
-            prop[l] = 0;
-            printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
-        }
-
-        l = r;
-    }
-
-    if(start)
-        printf("};\n\n");
-
-    return start;
-}
-
-int
-putpair(char *ident, char *prop)
-{
-    int l, r, start;
-
-    start = 0;
-    for(l=0; l+2 < NumRunes; ){
-        if(!prop[l]){
-            l++;
-            continue;
-        }
-
-        for(r = l + 2; r < NumRunes; r += 2){
-            if(!prop[r])
-                break;
-            prop[r] = 0;
-        }
-
-        if(r != l + 2){
-            if(!start){
-                printf("static rune %s[] = {\n", ident);
-                start = 1;
-            }
-            prop[l] = 0;
-            printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
-        }
-
-        l = r;
-    }
-
-    if(start)
-        printf("};\n\n");
-    return start;
-}
-
-int
-putsingle(char *ident, char *prop)
-{
-    int i, start;
-
-    start = 0;
-    for(i = 0; i < NumRunes; i++) {
-        if(!prop[i])
-            continue;
-
-        if(!start){
-            printf("static rune %s[] = {\n", ident);
-            start = 1;
-        }
-        prop[i] = 0;
-        printf("\t0x%.4x,\n", i);
-    }
-
-    if(start)
-        printf("};\n\n");
-
-    return start;
-}
diff --git a/sys/libunicode/vendor/common.h b/sys/libunicode/vendor/common.h
deleted file mode 100644
index 17cb113..0000000
--- a/sys/libunicode/vendor/common.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
-
-enum
-{
-    // Fields inside UnicodeData.txt
-    Fcode,
-    Fname,
-    Fcategory,
-    Fcombine,
-    Fbidir,
-    Fdecomp,
-    Fdecimal,
-    Fdigit,
-    Fnumeric,
-    Fmirror,
-    Foldname,
-    Fcomment,
-    Fupper,
-    Flower,
-    Ftitle,
-
-    NumFields,
-    NumRunes = 1 << 21,
-};
-
-/* input functions */
-enum
-{
-    ParseEOF,
-    ParseOK,
-    ParseSkip,
-};
-
-int  parse(io·Stream *io, int nfield, char **field, int len, char *line);
-int  codepoint(char *s);
-void codepointrange(io·Stream *utf8, char *field[NumFields], int *start,  int *stop);
-
-/* output functions */
-void putsearch(void);
-int  putrange(char *ident, char *prop, int force);
-int  putpair(char *ident, char *prop);
-int  putsingle(char *ident, char *prop);
diff --git a/sys/libunicode/vendor/mkgraphemedata.c b/sys/libunicode/vendor/mkgraphemedata.c
deleted file mode 100644
index f79d851..0000000
--- a/sys/libunicode/vendor/mkgraphemedata.c
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
-    fprintf(stderr, "usage: mkgraphemedata <GraphemeBreakProperty.txt>\n");
-    exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
-    io·Stream *utf8;
-    char line[1024];
-
-    ARGBEGIN{
-    }ARGEND;
-}
diff --git a/sys/libunicode/vendor/mkrunetype.c b/sys/libunicode/vendor/mkrunetype.c
deleted file mode 100644
index 9f939f4..0000000
--- a/sys/libunicode/vendor/mkrunetype.c
+++ /dev/null
@@ -1,388 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// globals
-
-#define OFFSET (1 << 20)
-#define DELTA(mapx, x)  ((1 << 20) + (mapx) - (x))
-
-// TODO: use bitarrays. will reduce executable size 8x
-struct Table
-{
-    /* properties */
-    char isspace[NumRunes];
-    char isalpha[NumRunes];
-    char ismark[NumRunes];
-    char isdigit[NumRunes];
-    char isupper[NumRunes];
-    char islower[NumRunes];
-    char istitle[NumRunes];
-    char ispunct[NumRunes];
-    char issymbl[NumRunes];
-    char iscntrl[NumRunes];
-
-    char combine[NumRunes];
-
-    /* transformations */
-    int  toupper[NumRunes];
-    int  tolower[NumRunes];
-    int  totitle[NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-int
-isrange(char *label, char *prop, int force)
-{
-    char ident[128];
-    if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
-        panicf("out of identifier space\n");
-
-    return putrange(ident, prop, force);
-}
-
-static
-int
-ispair(char *label, char *prop)
-{
-    char ident[128];
-    if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
-        panicf("out of identifier space\n");
-
-    return putpair(ident, prop);
-}
-
-static
-int
-issingle(char *label, char *prop)
-{
-    char ident[128];
-    if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
-        panicf("out of identifier space\n");
-
-    return putsingle(ident, prop);
-}
-
-static
-void
-makeis(char *label, char *table, int pairs, int onlyranges)
-{
-    int hasr, hasp=0, hass=0;
-
-    hasr = isrange(label, table, onlyranges);
-    if(!onlyranges && pairs)
-        hasp = ispair(label, table);
-    if(!onlyranges)
-        hass = issingle(label, table);
-
-    printf(
-        "int\n"
-        "utf8·is%s(rune c)\n"
-        "{\n"
-        "    rune *p;\n"
-        "\n",
-        label);
-
-    if(hasr){
-        printf(
-            "   p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
-            "   if(p && c >= p[0] && c <= p[1])\n"
-            "       return 1;\n",
-            label, label);
-    }
-
-    if(hasp){
-        printf(
-            "   p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
-            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
-            "       return 1;\n",
-            label, label);
-    }
-
-    if(hass)
-        printf(
-            "   p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
-            "   if(p && c == p[0])\n"
-            "       return 1;\n",
-            label, label);
-
-    printf(
-        "   return 0;\n"
-        "}\n"
-        "\n");
-}
-
-static
-int
-torange(char *label, int *index, int force)
-{
-    int l, r, d, start = 0;
-
-    for(l = 0; l < NumRunes; ){
-        if(index[l] == l){
-            l++;
-            continue;
-        }
-
-        d = DELTA(index[l], l);
-        if(d != (rune)d)
-            panicf("bad map delta %d", d);
-
-        for(r = l+1; r < NumRunes; r++){
-            if(DELTA(index[r], r) != d)
-                break;
-            index[r] = r;
-        }
-
-        if(force || r != l + 1){
-            if(!start){
-                printf("static rune to%s_range[] = {\n", label);
-                start = 1;
-            }
-            index[l] = l;
-            printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
-        }
-        l = r;
-    }
-    if(start)
-        printf("};\n\n");
-
-    return start;
-}
-
-static
-int
-topair(char *label, int *index)
-{
-    int l, r, d, start = 0;
-
-    for(l = 0; l + 2 < NumRunes; ){
-        if(index[l] == l){
-            l++;
-            continue;
-        }
-
-        d = DELTA(index[l], l);
-        if(d != (rune)d)
-            panicf("bad delta %d", d);
-
-        for(r = l+2; r < NumRunes; r += 2){
-            if(DELTA(index[r], r) != d)
-                break;
-            index[r] = r;
-        }
-
-        if(r > l+2){
-            if(!start){
-                printf("static rune to%s_pair[] = {\n", label);
-                start = 1;
-            }
-            index[l] = l;
-            printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
-        }
-
-        l = r;
-    }
-    if(start)
-        printf("};\n\n");
-
-    return start;
-}
-
-static
-int
-tosingle(char *label, int *index)
-{
-    int i, d, start = 0;
-
-    for(i=0; i < NumRunes; i++) {
-        if(index[i] == i)
-            continue;
-
-        d = DELTA(index[i], i);
-        if(d != (rune)d)
-            panicf("bad map delta %d", d);
-
-        if(!start){
-            printf("static rune to%s_single[] = {\n", label);
-            start = 1;
-        }
-        index[i] = i;
-        printf("\t0x%.4x, %d,\n", i, d);
-    }
-    if(start)
-        printf("};\n\n");
-
-    return start;
-}
-
-static
-void
-mkto(char *label, int *index, int pairs, int onlyrange)
-{
-    int hasr, hasp=0, hass=0;
-
-    hasr = torange(label, index, !onlyrange);
-    if(!onlyrange && pairs)
-        hasp = topair(label, index);
-    if(!onlyrange)
-        hass = tosingle(label, index);
-
-    printf(
-        "rune\n"
-        "utf8·to%s(rune c)\n"
-        "{\n"
-        "	rune *p;\n"
-        "\n",
-        label);
-
-    if(hasr)
-        printf(
-            "   p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
-            "   if(p && c >= p[0] && c <= p[1])\n"
-            "       return c + p[2] - %d;\n",
-            label, label, OFFSET);
-
-    if(hasp)
-        printf(
-            "   p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
-            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
-            "       return c + p[2] - %d;\n",
-            label, label, OFFSET);
-
-    if(hass)
-        printf(
-            "   p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
-            "   if(p && c == p[0])\n"
-            "       return c + p[1] - %d;\n",
-            label, label, OFFSET);
-
-
-    printf(
-        "   return c;\n"
-        "}\n"
-        "\n"
-    );
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
-    fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
-    exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
-    int i, sc, c, ec;
-    io·Stream *utf8;
-    char *prop, *field[NumFields], line[1024];
-
-    ARGBEGIN{
-    }ARGEND;
-
-    if(argc != 1)
-        usage();
-
-    if(!(utf8 = io·open(argv[0], "r")))
-        panicf("can't open %s\n", argv[0]);
-
-    /* by default each character maps to itself */
-    for(i = 0; i < NumRunes; i++) {
-        table.toupper[i] = i;
-        table.tolower[i] = i;
-        table.totitle[i] = i;
-    }
-
-    /* ensure all C local white space characters pass */
-    table.isspace['\t'] = 1;
-    table.isspace['\n'] = 1;
-    table.isspace['\r'] = 1;
-    table.isspace['\f'] = 1;
-    table.isspace['\v'] = 1;
-    table.isspace[0x85] = 1;
-
-    ec = -1;
-    // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
-    while(parse(utf8, arrlen(field), field, arrlen(line), line)){
-        /* parse unicode range */
-        codepointrange(utf8, field, &sc, &ec);
-        prop = field[Fcategory];
-
-        for(c = sc; c <= ec; c++){
-            /* grab properties */
-            switch(prop[0]){
-            case 'L':
-                table.isalpha[c] = 1;
-                switch(prop[1]){
-                case 'u': table.isupper[c] = 1; break;
-                case 'l': table.islower[c] = 1; break;
-                case 't': table.istitle[c] = 1; break;
-                case 'm': break; // modifier letters
-                case 'o': break; // ideograph letters
-                default:
-                    goto badproperty;
-                }
-                break;
-
-            case 'Z':
-                table.isspace[c] = 1;
-                break;
-
-            case 'M':
-                table.ismark[c] = 1;
-                break;
-
-            case 'N':
-                table.isdigit[c] = 1;
-                break;
-
-            case 'P':
-                table.ispunct[c] = 1;
-                break;
-
-            case 'S':
-                table.issymbl[c] = 1;
-                break;
-
-            case 'C':
-                table.iscntrl[c] = 1;
-                break;
-
-            default: badproperty:
-                panicf("unrecognized category '%s'", prop);
-            }
-            /* grab transformations */
-            if(*field[Fupper])
-                table.toupper[c] = codepoint(field[Fupper]);
-            if(*field[Flower])
-                table.tolower[c] = codepoint(field[Flower]);
-            if(*field[Ftitle])
-                table.totitle[c] = codepoint(field[Ftitle]);
-        }
-    }
-    io·close(utf8);
-
-    putsearch();
-
-    makeis("space", table.isspace, 0, 1);
-    makeis("digit", table.isdigit, 0, 1);
-    makeis("alpha", table.isalpha, 0, 0);
-    makeis("upper", table.isupper, 1, 0);
-    makeis("lower", table.islower, 1, 0);
-    makeis("title", table.istitle, 1, 0);
-    makeis("punct", table.ispunct, 1, 0);
-
-    mkto("upper", table.toupper, 1, 0);
-    mkto("lower", table.tolower, 1, 0);
-    mkto("title", table.totitle, 1, 0);
-}
diff --git a/sys/libunicode/vendor/mkrunewidth.c b/sys/libunicode/vendor/mkrunewidth.c
deleted file mode 100644
index 14e6973..0000000
--- a/sys/libunicode/vendor/mkrunewidth.c
+++ /dev/null
@@ -1,325 +0,0 @@
-#include "common.h"
-
-/*
- * inspired by design choices in utf8proc/charwidths.jl
- * all widths default to 1 unless they fall within the categories:
- *      1. Mn 2. Mc 3. Me 4. Zl
- *      5. Zp 6. Cc 7. Cf 8. Cs
- * these default to zero width
- */
-enum
-{
-    /* width ? */
-    WidthNeutral,   /* (N) practially treated like narrow but unclear ... */
-    WidthAmbiguous, /* (A) sometimes wide and sometimes not... */
-    /* width 1 */
-    WidthHalf,      /* (H) = to narrow (compatability equivalent)  */
-    WidthNarrow,    /* (Na) ASCII width */
-    /* width 2 */
-    WidthWide,      /* (W) 2x width */
-    WidthFull,      /* (F) = to wide (compatability equivalent) */
-};
-
-struct Table
-{
-    char width[3][NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-void
-parse_category(char *path)
-{
-    int sc, c, ec, w;
-    io·Stream *utf8;
-    char *prop, *field[NumFields], line[1024];
-
-    if(!(utf8 = io·open(path, "r")))
-        panicf("can't open %s\n", path);
-
-    // NOTE: we don't check for comments here
-    ec = -1;
-    while(parse(utf8, arrlen(field), field, arrlen(line), line)){
-        codepointrange(utf8, field, &sc, &ec);
-
-        prop = field[Fcategory];
-
-        switch(prop[0]){
-        case 'M':
-            switch(prop[1]){
-            case 'n': case 'c': case 'e':
-                w = 0;
-                break;
-            default:
-                w = 1;
-                break;
-            }
-            break;
-        case 'Z':
-            switch(prop[1]){
-                case 'l': case 'p':
-                    w = 0;
-                    break;
-                default:
-                    w = 1;
-                    break;
-            }
-            break;
-        case 'C':
-            switch(prop[1]){
-                case 'c': case 'f': case 's':
-                    w = 0;
-                    break;
-                default:
-                    w = 1;
-                    break;
-            }
-        default:
-            w = 1;
-        }
-
-        for(c = sc; c <= ec; c++)
-            table.width[w][c] = 1;
-    }
-
-    io·close(utf8);
-}
-
-static
-void
-coderange(char *field, int *l, int *r)
-{
-    char *s;
-
-    if(!(s = strstr(field, "..")))
-        *l=*r=codepoint(field);
-    else{
-        *s++ = 0, *s++ = 0;
-        *l=codepoint(field);
-        *r=codepoint(s);
-    }
-}
-
-static
-void
-parse_eawidths(char *path)
-{
-    int at, w;
-    int l, c, r;
-    io·Stream *utf8;
-    char *field[2], line[1024];
-
-    utf8 = io·open(path, "r");
-    while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
-        if(at == ParseSkip)
-            continue;
-
-        switch(field[1][0]){
-        case 'A': continue;
-        case 'N':
-            if(field[1][1] != 'a')
-                continue;
-        /* fallthrough */
-        case 'H': w = 1; break;
-
-        case 'W': /* fallthrough */
-        case 'F': w = 2; break;
-
-        default:
-            panicf("malformed east asian width class: %s\n", field[1]);
-        }
-
-        coderange(field[0], &l, &r);
-
-        for(c=l; c <= r; c++){
-            /* ensure it only exists in one table */
-            table.width[w][c]       = 1;
-            table.width[(w+1)%3][c] = 0;
-            table.width[(w+2)%3][c] = 0;
-        }
-    }
-    io·close(utf8);
-}
-
-static
-void
-parse_emoji(char *path)
-{
-    int at, w;
-    int l, c, r;
-    io·Stream *utf8;
-    char *s, *field[2], line[1024];
-
-    utf8 = io·open(path, "r");
-    while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
-        if(at == ParseSkip)
-            continue;
-
-        /* only override emoji presentation */
-        if(!strstr(field[1], "Emoji_Presentation"))
-            continue;
-
-        /* trim trailing space */
-        for(s=field[0]; *s; s++){
-            if(*s == ' ')
-                *s = 0;
-        }
-
-        coderange(field[0], &l, &r);
-
-        for(c=l; c <= r; c++){
-            table.width[0][c] = 0;
-            table.width[1][c] = 0;
-            table.width[2][c] = 1;
-        }
-    }
-
-    io·close(utf8);
-}
-
-/* output functions */
-static
-void
-maketable(char *label, char *table, int pairs, int onlyranges)
-{
-    int r, p=0, s=0;
-    char ident[3][128];
-
-    enum
-    {
-        Irange,
-        Ipair,
-        Isingle,
-    };
-
-    /* ranges */
-    if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange]))
-        panicf("out of identifier space\n");
-    r = putrange(ident[Irange], table, onlyranges);
-
-    if(!onlyranges && pairs){
-        if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair]))
-            panicf("out of identifier space\n");
-        p = putpair(ident[Ipair], table);
-    }
-    if(!onlyranges){
-        if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle]))
-            panicf("out of identifier space\n");
-
-        s = putsingle(ident[Isingle], table);
-    }
-
-    printf(
-        "static int\n"
-        "is%s(rune c)\n"
-        "{\n"
-        "    rune *p;\n"
-        "\n",
-        label);
-
-    if(r){
-        printf(
-            "   p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
-            "   if(p && c >= p[0] && c <= p[1])\n"
-            "       return 1;\n",
-            ident[Irange], ident[Irange]);
-    }
-
-    if(p){
-        printf(
-            "   p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
-            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
-            "       return 1;\n",
-            ident[Ipair], ident[Ipair]);
-    }
-
-    if(s)
-        printf(
-            "   p = rangesearch(c, %s, arrlen(%s), 1);\n"
-            "   if(p && c == p[0])\n"
-            "       return 1;\n",
-            ident[Isingle], ident[Isingle]);
-
-    printf(
-        "   return 0;\n"
-        "}\n"
-        "\n");
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
-    fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n");
-    exit(1);
-}
-
-#define SETW0(c) \
-    table.width[0][(c)] = 1, \
-    table.width[1][(c)] = 0, \
-    table.width[2][(c)] = 0;
-
-#define SETW1(c) \
-    table.width[0][(c)] = 0, \
-    table.width[1][(c)] = 1, \
-    table.width[2][(c)] = 0;
-
-#define SETW2(c) \
-    table.width[0][(c)] = 0, \
-    table.width[1][(c)] = 0, \
-    table.width[2][(c)] = 1;
-
-
-int
-main(int argc, char *argv[])
-{
-    int c;
-
-    ARGBEGIN{
-    }ARGEND;
-
-    if(argc != 3)
-        usage();
-
-    parse_category(*argv++);
-    parse_eawidths(*argv++);
-    parse_emoji(*argv);
-
-    /* overrides */
-    SETW0(0x2028);
-    SETW0(0x2029);
-
-    SETW1(0x00AD);
-
-    /* simple checking */
-    for(c=0; c<NumRunes; c++){
-        if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1)
-            panicf("improper table state");
-    }
-
-    putsearch();
-
-    maketable("width0", table.width[0], 1, 0);
-    maketable("width1", table.width[1], 1, 0);
-    maketable("width2", table.width[2], 1, 0);
-
-    puts(
-        "\n"
-        "int\n"
-        "utf8·runewidth(rune c)\n"
-        "{\n"
-        "   if(iswidth1(c))\n"
-        "       return 1;\n"
-        "   if(iswidth2(c))\n"
-        "       return 2;\n"
-        "   return 0;\n"
-        "}"
-    );
-}
diff --git a/sys/libutf/canfit.c b/sys/libutf/canfit.c
new file mode 100644
index 0000000..4579ab3
--- /dev/null
+++ b/sys/libutf/canfit.c
@@ -0,0 +1,23 @@
+#include "internal.h"
+
+/* returns 1 if string of length n is long enough to be decoded */
+int
+utf8·canfit(byte* s, int n)
+{
+    int  i;
+    rune c;
+
+    if(n <= 0)
+        return 0;
+
+    c = *(ubyte*)s;
+    if(c < TByte1)
+        return 1;
+
+    if(c < TByte3)
+        return n >= 2;
+    if(c < TByte4)
+        return n >= 3;
+
+    return n >= UTFmax;
+}
diff --git a/sys/libutf/decode.c b/sys/libutf/decode.c
new file mode 100644
index 0000000..01797f1
--- /dev/null
+++ b/sys/libutf/decode.c
@@ -0,0 +1,98 @@
+#include "internal.h"
+
+#define ACCEPT 0
+#define REJECT 12
+
+static uint8 decode[] = {
+    /*
+     * the first part of the table maps bytes to character classes that
+     * to reduce the size of the transition table and create bitmasks
+     */
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+     8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+    /*
+     * the second part is a transition table that maps a combination
+     * of a state of the automaton and a character class to a state
+     */
+     0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+    12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+    12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+    12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+    12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+int
+utf8·decode(char *s, rune *r)
+{
+    int   n;
+    rune  v;
+    uint8 b, t, x=ACCEPT;
+
+    b = ((uint8 *)s)[0];
+    t = decode[b];
+    v = (0xFF >> t) & b;
+    x = decode[256+x+t];
+
+    for(n=1; x > REJECT && n < UTFmax; n++){
+        b = ((uint8 *)s)[n];
+        t = decode[b];
+        v = (v << 6) | (b & TMask);
+        x = decode[256+x+t];
+    }
+
+    if(x != ACCEPT){
+        *r = RuneErr;
+        return 1;
+    }
+
+    *r = v;
+    return n;
+}
+
+#if 0
+int
+utf8·decode(byte *s, rune *r)
+{
+    int c[UTFmax], i;
+    rune l;
+
+    c[0] = *(ubyte*)(s);
+    if(c[0] < Tx){
+        *r = c[0];
+        return 1;
+    }
+
+    l = c[0];
+    for(i = 1; i < UTFmax; i++){
+        c[i]  = *(ubyte*)(s+i);
+        c[i] ^= Tx;
+        if(c[i] & Testx) goto bad;
+
+        l = (l << Bitx) | c[i];
+        if(c[0] < Tbyte(i + 2)){
+            l &= RuneX(i + 1);
+            if(i == 1){
+                if(c[0] < Tbyte(2) || l <= Rune1)
+                    goto bad;
+            }else if(l <= RuneX(i) || l > RuneMax)
+                goto bad;
+
+            if(i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+                goto bad;
+
+            *r = l;
+            return i + 1;
+        }
+    }
+bad:
+    *r = RuneErr;
+    return 1;
+}
+#endif
diff --git a/sys/libutf/decodeprev.c b/sys/libutf/decodeprev.c
new file mode 100644
index 0000000..27dced6
--- /dev/null
+++ b/sys/libutf/decodeprev.c
@@ -0,0 +1,60 @@
+#include "internal.h"
+
+#define ACCEPT 0
+#define REJECT 12
+
+static uint8 decode[] = {
+    /*
+     * the first part of the table maps bytes to character classes that
+     * to reduce the size of the transition table and create bitmasks.
+     */
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+    /*
+     * The second part is a transition table that maps a combination
+     * of a state of the automaton and a character class to a state.
+     */
+    //   0  1  2  3  4  5  6  7  8  9 10 11
+         0,24,12,12,12,12,12,24,12,24,12,12,
+         0,24,12,12,12,12,12,24,12,24,12,12,
+        12,36, 0,12,12,12,12,48,12,36,12,12,
+        12,60,12, 0, 0,12,12,72,12,72,12,12,
+        12,60,12, 0,12,12,12,72,12,72, 0,12,
+        12,12,12,12,12, 0, 0,12,12,12,12,12,
+        12,12,12,12,12,12,12,12,12,12,12, 0
+};
+
+int
+utf8·decodeprev(byte *s, rune *r)
+{
+    int   n;
+    rune  v;
+    uint8 b, t, d, x=ACCEPT;
+
+    v=0, n=0, d=0;
+nextbyte:
+    b = ((uint8 *)s)[-n++];
+    t = decode[b];
+    x = decode[256+x+t];
+
+    if(x > REJECT && n < UTFmax){
+        v = v | ((b & TMask) << d);
+        d += 6;
+        goto nextbyte;
+    }
+
+    if(x != ACCEPT)
+        *r = RuneErr;
+    else{
+        v |= (((0xFFu >> t) & b) << d);
+        *r = v;
+    }
+
+    return n;
+}
diff --git a/sys/libutf/encode.c b/sys/libutf/encode.c
new file mode 100644
index 0000000..fa7c93e
--- /dev/null
+++ b/sys/libutf/encode.c
@@ -0,0 +1,69 @@
+#include "internal.h"
+
+int
+utf8·encode(rune *r, byte *s)
+{
+    rune c;
+
+    c = *r;
+    if(c < Rune1Byte){ // 7 bits
+        s[0] = (uint8)c;
+        return 1;
+    }
+
+    if(c < Rune2Byte){ // 11 bits
+        s[0] = TByte1 | (c >> 6);
+        s[1] = Tx     | (c & TMask);
+        return 2;
+    }
+
+    if(c < Rune3Byte){ // 16 bits
+        s[0] = TByte2 | ((c >> 12));
+        s[1] = Tx     | ((c >> 6) & TMask);
+        s[2] = Tx     | ((c)      & TMask);
+        return 3;
+    }
+
+    // 22 bits
+    if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax))
+        c = RuneErr;
+
+    s[0] = TByte3 | ((c >> 18));
+    s[1] = Tx     | ((c >> 12) & TMask);
+    s[2] = Tx     | ((c >> 6)  & TMask);
+    s[3] = Tx     | ((c)       & TMask);
+
+    return 4;
+}
+
+#if 0
+int
+utf8·encode(rune* r, byte* s)
+{
+    int i, j;
+    rune c;
+
+    c = *r;
+    if(c <= Rune1) {
+        s[0] = c;
+        return 1;
+    }
+
+    for(i = 2; i < UTFmax + 1; i++){
+        if(i == 3){
+            if(c > RuneMax)
+                c = RuneErr;
+            if(SurrogateMin <= c && c <= SurrogateMax)
+                c = RuneErr;
+        }
+        if(c <= RuneX(i) || i == UTFmax) {
+            s[0] = Tbyte(i) |  (c >> (i - 1)*Bitx);
+            for(j = 1; j < i; j++)
+                s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
+            return i;
+        }
+    }
+
+    return UTFmax;
+}
+#endif
diff --git a/sys/libutf/find.c b/sys/libutf/find.c
new file mode 100644
index 0000000..d75feb8
--- /dev/null
+++ b/sys/libutf/find.c
@@ -0,0 +1,31 @@
+#include "internal.h"
+
+byte*
+utf8·find(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    int  n;
+
+    if(c < Tx)
+        return strchr(s, c);
+
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < Tx){
+            if(c1 == 0) return nil;
+            if(c1 == c) return s;
+            s++;
+            continue;
+        }
+
+        n = utf8·decode(s, &r);
+
+        if(r == c)
+            return s;
+
+        s += n;
+    }
+
+    return nil;
+}
diff --git a/sys/libutf/findlast.c b/sys/libutf/findlast.c
new file mode 100644
index 0000000..ab25ab2
--- /dev/null
+++ b/sys/libutf/findlast.c
@@ -0,0 +1,32 @@
+#include "internal.h"
+
+byte*
+utf8·findlast(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    byte *l;
+
+    if(c < Tx)
+        return strrchr(s, c);
+
+    l = nil;
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < Tx){
+            if(c1 == 0) return l;
+            if(c1 == c) l = s;
+            s++;
+            continue;
+        }
+
+        c1 = utf8·decode(s, &r);
+
+        if(r == c)
+            l = s;
+
+        s += c1;
+    }
+
+    return nil;
+}
diff --git a/sys/libutf/internal.h b/sys/libutf/internal.h
new file mode 100644
index 0000000..9719977
--- /dev/null
+++ b/sys/libutf/internal.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <u.h>
+#include <base.h>
+#include <libutf.h>
+
+/*
+ * NOTE: we use the preprocessor to ensure we have unsigned constants.
+ * UTF-8 code:
+ * 1 byte:
+ * 0xxxxxxx
+ * 2 byte:
+ * 110xxxxx 10xxxxxx
+ * 3 byte:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 4 byte:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ */
+
+#define Tx     0x80u // 0b10000000 transfer header
+#define TMask  0x3Fu // 0b00111111 transfer mask
+
+#define TByte1 0xC0u // 0b11000000
+#define TByte2 0xE0u // 0b11100000
+#define TByte3 0xF0u // 0b11110000
+#define TByte4 0xF8u // 0b11111000
+
+#define RuneMask  0x1FFFFFu
+
+#define Rune1Byte 0x000080u  // 1 << 8   (1 byte)
+#define Rune2Byte 0x001000u  // 1 << 12  (2 bytes)
+#define Rune3Byte 0x020000u  // 1 << 17  (3 bytes)
+#define Rune4Byte 0x400000u  // 1 << 22  (4 bytes)
+
+
+/* UTF-16 nonsense */
+#define RuneSurrogateMin 0x0D8000
+#define RuneSurrogateMax 0x0D8FFF
diff --git a/sys/libutf/len.c b/sys/libutf/len.c
new file mode 100644
index 0000000..8fbd679
--- /dev/null
+++ b/sys/libutf/len.c
@@ -0,0 +1,21 @@
+#include "internal.h"
+
+int
+utf8·len(char *s)
+{
+    int c;
+    long n;
+    rune r;
+
+    n = 0;
+    for(;;){
+        c = *(uchar*)s;
+        if(c < Tx){
+            if(c == 0)
+                return n;
+            s++;
+        }else
+            s += utf8·decode(s, &r);
+        n++;
+    }
+}
diff --git a/sys/libutf/rules.mk b/sys/libutf/rules.mk
new file mode 100644
index 0000000..53ff8cf
--- /dev/null
+++ b/sys/libutf/rules.mk
@@ -0,0 +1,76 @@
+include share/push.mk
+
+UNICODE = 14.0.0
+
+SRCS_$(d) := \
+	$(d)/encode.c \
+	$(d)/decode.c \
+	$(d)/decodeprev.c \
+	$(d)/find.c \
+	$(d)/findlast.c \
+	$(d)/canfit.c \
+	$(d)/runelen.c \
+	$(d)/len.c \
+	$(d)/runetype-$(UNICODE).c \
+	$(d)/runewidth-$(UNICODE).c
+
+LIBS_$(d) := $(d)/libutf.a
+
+include share/paths.mk
+
+# ========================================================================
+# table generation
+
+$(d)/vendor/common.o: $(d)/vendor/common.c
+	$(COMPILE)
+
+# rune categories
+$(d)/vendor/UnicodeData-$(UNICODE).txt:
+	@echo "GET	UnicodeData.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@
+
+$(d)/vendor/mkrunetype: $(d)/vendor/mkrunetype.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
+	$(COMPLINK)
+
+GENS += $(d)/vendor/mkrunetype
+
+$(d)/runetype-$(UNICODE).c: $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/mkrunetype
+	@$(dir $@)vendor/mkrunetype $< > $@
+
+# rune widths
+$(d)/vendor/EastAsianWidth-$(UNICODE).txt:
+	@echo "GET	EastAsianWidth.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@
+
+$(d)/vendor/EmojiData-$(UNICODE).txt:
+	@echo "GET	EmojiData.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@
+
+$(d)/vendor/mkrunewidth: $(d)/vendor/mkrunewidth.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
+	$(COMPLINK)
+
+GENS += $(d)/vendor/mkrunewidth
+
+$(d)/runewidth-$(UNICODE).c: $(d)/vendor/mkrunewidth $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/EastAsianWidth-$(UNICODE).txt $(d)/vendor/EmojiData-$(UNICODE).txt
+	@$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@
+
+# grapheme boundaries
+$(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt:
+	@echo "GET	GraphemeBreakProperty.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@
+
+$(d)/vendor/mkgraphemedata: $(d)/vendor/mkgraphemedata.c $(d)/vendor/common.o $(OBJ_DIR)/sys/base/base.a
+	$(COMPLINK)
+
+$(d)/graphemedata-$(UNICODE).c: $(d)/vendor/mkgraphemedata $(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt
+	$^ > $@
+
+GENS += $(d)/vendor/mkgraphemedata
+
+# ========================================================================
+# normal operations
+
+$(LIBS_$(d)): $(OBJS_$(d))
+	$(ARCHIVE)
+
+include share/pop.mk
diff --git a/sys/libutf/runelen.c b/sys/libutf/runelen.c
new file mode 100644
index 0000000..dac7f15
--- /dev/null
+++ b/sys/libutf/runelen.c
@@ -0,0 +1,8 @@
+#include "internal.h"
+
+int
+utf8·runelen(rune r)
+{
+    byte s[10];
+    return utf8·encode(&r, s);
+}
diff --git a/sys/libutf/runetype-14.0.0.c b/sys/libutf/runetype-14.0.0.c
new file mode 100644
index 0000000..6f4469d
--- /dev/null
+++ b/sys/libutf/runetype-14.0.0.c
@@ -0,0 +1,111 @@
+#include <u.h>
+#include <libutf.h>
+
+static
+rune*
+rangesearch(rune c, rune *t, int n, int ne)
+{
+   rune *p;
+   int m;
+   while(n > 1) {
+   m = n >> 1;
+   p = t + m*ne;
+   if(c >= p[0]){
+       t = p;
+       n = n-m;
+   }else
+       n = m;
+   }
+   if(n && c >= t[0])
+       return t;
+   return 0;
+}
+
+static rune isspace_range[] = {
+	0x0009, 0x000d,
+	0x0085, 0x0085,
+};
+
+int
+utf8·isspace(rune c)
+{
+    rune *p;
+
+   p = rangesearch(c, isspace_range, arrlen(isspace_range)/2, 2);
+   if(p && c >= p[0] && c <= p[1])
+       return 1;
+   return 0;
+}
+
+int
+utf8·isdigit(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+int
+utf8·isalpha(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+int
+utf8·isupper(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+int
+utf8·islower(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+int
+utf8·istitle(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+int
+utf8·ispunct(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+rune
+utf8·toupper(rune c)
+{
+	rune *p;
+
+   return c;
+}
+
+rune
+utf8·tolower(rune c)
+{
+	rune *p;
+
+   return c;
+}
+
+rune
+utf8·totitle(rune c)
+{
+	rune *p;
+
+   return c;
+}
+
diff --git a/sys/libutf/runewidth-14.0.0.c b/sys/libutf/runewidth-14.0.0.c
new file mode 100644
index 0000000..113c35e
--- /dev/null
+++ b/sys/libutf/runewidth-14.0.0.c
@@ -0,0 +1,71 @@
+#include <u.h>
+#include <libutf.h>
+
+static
+rune*
+rangesearch(rune c, rune *t, int n, int ne)
+{
+   rune *p;
+   int m;
+   while(n > 1) {
+   m = n >> 1;
+   p = t + m*ne;
+   if(c >= p[0]){
+       t = p;
+       n = n-m;
+   }else
+       n = m;
+   }
+   if(n && c >= t[0])
+       return t;
+   return 0;
+}
+
+static rune width0_range[] = {
+	0x2028, 0x2029,
+};
+
+static int
+iswidth0(rune c)
+{
+    rune *p;
+
+   p = rangesearch(c, width0_range, arrlen(width0_range)/2, 2);
+   if(p && c >= p[0] && c <= p[1])
+       return 1;
+   return 0;
+}
+
+static rune width1_single[] = {
+	0x00ad,
+};
+
+static int
+iswidth1(rune c)
+{
+    rune *p;
+
+   p = rangesearch(c, width1_single, arrlen(width1_single), 1);
+   if(p && c == p[0])
+       return 1;
+   return 0;
+}
+
+static int
+iswidth2(rune c)
+{
+    rune *p;
+
+   return 0;
+}
+
+
+int
+utf8·runewidth(rune c)
+{
+   if(iswidth1(c))
+       return 1;
+   if(iswidth2(c))
+       return 2;
+   return 0;
+}
diff --git a/sys/rules.mk b/sys/rules.mk
index c8c3dd3..cefa4a9 100644
--- a/sys/rules.mk
+++ b/sys/rules.mk
@@ -8,7 +8,7 @@ include $(DIR)/rules.mk
 DIR := $(d)/base
 include $(DIR)/rules.mk
 
-DIR := $(d)/libunicode
+DIR := $(d)/libutf
 include $(DIR)/rules.mk
 
 DIR := $(d)/libmath
-- 
cgit v1.2.1