From 189b9e23edfe60b7e82c4c7b6071a3f98799653a Mon Sep 17 00:00:00 2001
From: Nicholas Noll <nbnoll@eml.cc>
Date: Tue, 26 Oct 2021 21:32:55 -0700
Subject: fix(unicode): renamed functions to be easier to understand their
 functions

---
 sys/cmd/dwm/drw.c             |  2 +-
 sys/cmd/term/term.c           | 10 +++++-----
 sys/cmd/term/x.c              |  2 +-
 sys/cmd/wm/main.c             |  2 +-
 sys/libunicode/bytetorune.c   | 40 ----------------------------------------
 sys/libunicode/canfit.c       | 20 ++++++++++++++++++++
 sys/libunicode/decode.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 sys/libunicode/encode.c       | 31 +++++++++++++++++++++++++++++++
 sys/libunicode/find.c         | 30 ++++++++++++++++++++++++++++++
 sys/libunicode/findlast.c     | 30 ++++++++++++++++++++++++++++++
 sys/libunicode/findlastrune.c | 29 -----------------------------
 sys/libunicode/findrune.c     | 26 --------------------------
 sys/libunicode/fullrune.c     | 18 ------------------
 sys/libunicode/internal.h     | 12 +++++++++++-
 sys/libunicode/len.c          | 21 +++++++++++++++++++++
 sys/libunicode/rules.mk       | 14 ++++++++------
 sys/libunicode/runelen.c      |  2 +-
 sys/libunicode/runetobyte.c   | 31 -------------------------------
 18 files changed, 200 insertions(+), 160 deletions(-)
 delete mode 100644 sys/libunicode/bytetorune.c
 create mode 100644 sys/libunicode/canfit.c
 create mode 100644 sys/libunicode/decode.c
 create mode 100644 sys/libunicode/encode.c
 create mode 100644 sys/libunicode/find.c
 create mode 100644 sys/libunicode/findlast.c
 delete mode 100644 sys/libunicode/findlastrune.c
 delete mode 100644 sys/libunicode/findrune.c
 delete mode 100644 sys/libunicode/fullrune.c
 create mode 100644 sys/libunicode/len.c
 delete mode 100644 sys/libunicode/runetobyte.c

(limited to 'sys')

diff --git a/sys/cmd/dwm/drw.c b/sys/cmd/dwm/drw.c
index 825e2f4..a6d6902 100644
--- a/sys/cmd/dwm/drw.c
+++ b/sys/cmd/dwm/drw.c
@@ -227,7 +227,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
 		utf8str = text;
 		nextfont = NULL;
 		while (*text) {
-			utf8charlen = utf8·bytetorune(&utf8codepoint, text);
+			utf8charlen = utf8·decode(text, &utf8codepoint);
 			for (curfont = drw->fonts; curfont; curfont = curfont->next) {
 				charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
 				if (charexists) {
diff --git a/sys/cmd/term/term.c b/sys/cmd/term/term.c
index 6a73a8b..b08915e 100644
--- a/sys/cmd/term/term.c
+++ b/sys/cmd/term/term.c
@@ -410,7 +410,7 @@ getsel(void)
 			if (gp->mode & Gwdummy)
 				continue;
 
-			ptr += utf8·runetobyte(ptr, &gp->u);
+			ptr += utf8·encode(&gp->u, ptr);
 		}
 
 		/*
@@ -993,7 +993,7 @@ tsetchar(rune u, Letter *attr, int x, int y)
 	 */
 	if (term.trantbl[term.charset] == CSgfx0 &&
 	   BETWEEN(u, 0x41, 0x7e) && vt100_0[u - 0x41])
-		utf8·bytetorune(&u, vt100_0[u - 0x41]);
+		utf8·decode(vt100_0[u - 0x41], &u);
 
 	if (term.line[y][x].mode & Gwide) {
 		if (x+1 < term.col) {
@@ -1801,7 +1801,7 @@ tdumpline(int n)
 	end = &bp[MIN(tlinelen(n), term.col) - 1];
 	if (bp != end || bp->u != ' ') {
 		for ( ; bp <= end; ++bp)
-			tprinter(buf, utf8·runetobyte(buf, &bp->u));
+			tprinter(buf, utf8·encode(&bp->u, buf));
 	}
 	tprinter("\n", 1);
 }
@@ -2090,7 +2090,7 @@ tputc(rune u)
 		c[0] = u;
 		width = len = 1;
 	} else {
-		len = utf8·runetobyte(c, &u);
+		len = utf8·encode(&u, c);
 		if(!control && (width = wcwidth(u)) == -1)
 			width = 1;
 	}
@@ -2257,7 +2257,7 @@ twrite(char *buf, int buflen, int show_ctrl)
 	for (n = 0; n < buflen; n += charsize) {
 		if(IS_SET(Tutf8) && !IS_SET(Tsixel)) {
 			/* process a complete utf8 char */
-			charsize = utf8·bytetorune(&u, buf + n);
+			charsize = utf8·decode(buf + n, &u);
 			if(charsize == 0)
 				break;
 		} else {
diff --git a/sys/cmd/term/x.c b/sys/cmd/term/x.c
index 3079f38..ea64bff 100644
--- a/sys/cmd/term/x.c
+++ b/sys/cmd/term/x.c
@@ -1824,7 +1824,7 @@ kpress(XEvent *ev)
 		if (IS_SET(W8bit)) {
 			if (*buf < 0177) {
 				c = *buf | 0x80;
-				len = utf8·runetobyte(buf, &c);
+				len = utf8·encode(&c, buf);
 			}
 		} else {
 			buf[1] = buf[0];
diff --git a/sys/cmd/wm/main.c b/sys/cmd/wm/main.c
index a447d80..2607801 100644
--- a/sys/cmd/wm/main.c
+++ b/sys/cmd/wm/main.c
@@ -133,7 +133,7 @@ main(int argc, char *argv[])
 {
     char *socket, *cmd=nil;
 
-    ARGBEGIN {
+    ARGBEGIN{
     case 's':
         cmd = ARGF();
         break;
diff --git a/sys/libunicode/bytetorune.c b/sys/libunicode/bytetorune.c
deleted file mode 100644
index fe41e7a..0000000
--- a/sys/libunicode/bytetorune.c
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "internal.h"
-
-int
-utf8·bytetorune(rune* r, byte* s)
-{
-    int c[UTFmax], i;
-    rune l;
-
-    c[0] = *(ubyte*)(s);
-    if(c[0] < Tx) {
-        *r = c[0];
-        return 1;
-    }
-
-    l = c[0];
-    for(i = 1; i < UTFmax; i++) {
-        c[i]  = *(ubyte*)(s+i);
-        c[i] ^= Tx;
-        if (c[i] & Testx) goto bad;
-
-        l = (l << Bitx) | c[i];
-        if(c[0] < Tbyte(i + 2)) {
-            l &= RuneX(i + 1);
-            if (i == 1) {
-                if (c[0] < Tbyte(2) || l <= Rune1)
-                    goto bad;
-            } else if (l <= RuneX(i) || l > RuneMax)
-                goto bad;
-            if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
-                goto bad;
-
-            *r = l;
-            return i + 1;
-        }
-    }
-bad:
-    *r = RuneErr;
-    return 1;
-}
-
diff --git a/sys/libunicode/canfit.c b/sys/libunicode/canfit.c
new file mode 100644
index 0000000..d44c9e6
--- /dev/null
+++ b/sys/libunicode/canfit.c
@@ -0,0 +1,20 @@
+#include "internal.h"
+
+/* returns 1 if string of length n is long enough to be decoded */
+int
+utf8·canfit(byte* s, int n)
+{
+    int  i;
+    rune c;
+
+    if(n <= 0) return 0;
+    c = *(ubyte*) s;
+    if(c < Tx) return 1;
+
+    for(i = 3; i < UTFmax + 1; i++){
+        if(c < Tbyte(i))
+            return n >= i - 1;
+    }
+
+    return n >= UTFmax;
+}
diff --git a/sys/libunicode/decode.c b/sys/libunicode/decode.c
new file mode 100644
index 0000000..79271f2
--- /dev/null
+++ b/sys/libunicode/decode.c
@@ -0,0 +1,40 @@
+#include "internal.h"
+
+int
+utf8·decode(byte *s, rune* r)
+{
+    int c[UTFmax], i;
+    rune l;
+
+    c[0] = *(ubyte*)(s);
+    if(c[0] < Tx) {
+        *r = c[0];
+        return 1;
+    }
+
+    l = c[0];
+    for(i = 1; i < UTFmax; i++) {
+        c[i]  = *(ubyte*)(s+i);
+        c[i] ^= Tx;
+        if (c[i] & Testx) goto bad;
+
+        l = (l << Bitx) | c[i];
+        if(c[0] < Tbyte(i + 2)) {
+            l &= RuneX(i + 1);
+            if (i == 1) {
+                if (c[0] < Tbyte(2) || l <= Rune1)
+                    goto bad;
+            } else if (l <= RuneX(i) || l > RuneMax)
+                goto bad;
+            if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+                goto bad;
+
+            *r = l;
+            return i + 1;
+        }
+    }
+bad:
+    *r = RuneErr;
+    return 1;
+}
+
diff --git a/sys/libunicode/encode.c b/sys/libunicode/encode.c
new file mode 100644
index 0000000..8f4d212
--- /dev/null
+++ b/sys/libunicode/encode.c
@@ -0,0 +1,31 @@
+#include "internal.h"
+
+int
+utf8·encode(rune* r, byte* s)
+{
+    int i, j;
+    rune c;
+
+    c = *r;
+    if(c <= Rune1) {
+        s[0] = c;
+        return 1;
+    }
+
+    for(i = 2; i < UTFmax + 1; i++){
+        if(i == 3){
+            if(c > RuneMax)
+                c = RuneErr;
+            if(SurrogateMin <= c && c <= SurrogateMax)
+                c = RuneErr;
+        }
+        if(c <= RuneX(i) || i == UTFmax) {
+            s[0] = Tbyte(i) |  (c >> (i - 1)*Bitx);
+            for(j = 1; j < i; j++)
+                s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
+            return i;
+        }
+    }
+
+    return UTFmax;
+}
diff --git a/sys/libunicode/find.c b/sys/libunicode/find.c
new file mode 100644
index 0000000..659ab5b
--- /dev/null
+++ b/sys/libunicode/find.c
@@ -0,0 +1,30 @@
+#include "internal.h"
+
+byte*
+utf8·find(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    int  n;
+
+    if(c < RuneSync)
+        return strchr(s, c);
+
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < RuneSelf) {
+            if(c1 == 0) return nil;
+            if(c1 == c) return s;
+            s++;
+            continue;
+        }
+        n = utf8·decode(s, &r);
+
+        if(r == c)
+            return s;
+
+        s += n;
+    }
+
+    return nil;
+}
diff --git a/sys/libunicode/findlast.c b/sys/libunicode/findlast.c
new file mode 100644
index 0000000..3a4ed38
--- /dev/null
+++ b/sys/libunicode/findlast.c
@@ -0,0 +1,30 @@
+#include "internal.h"
+
+byte*
+utf8·findlast(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    byte *l;
+
+    if(c < RuneSync)
+        return strrchr(s, c);
+
+    l = nil;
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < RuneSelf) {
+            if(c1 == 0) return l;
+            if(c1 == c) l = s;
+            s++;
+            continue;
+        }
+        c1 = utf8·decode(s, &r);
+        if(r == c)
+            l = s;
+
+        s += c1;
+    }
+
+    return nil;
+}
diff --git a/sys/libunicode/findlastrune.c b/sys/libunicode/findlastrune.c
deleted file mode 100644
index 0dc5032..0000000
--- a/sys/libunicode/findlastrune.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "internal.h"
-
-byte*
-utf8·findlastrune(byte* s, long c)
-{
-    long c1;
-    rune r;
-    byte *l;
-
-    if (c < RuneSync)
-        return strrchr(s, c);
-
-    l = nil;
-    for(;;){
-        c1 = *(ubyte*)s;
-        if (c1 < RuneSelf) {
-            if (c1 == 0) return l;
-            if (c1 == c) l = s;
-            s++;
-            continue;
-        }
-        c1 = utf8·bytetorune(&r, s);
-        if (r == c) 
-            l = s;
-        s += c1;
-    }
-
-    return nil;
-}
diff --git a/sys/libunicode/findrune.c b/sys/libunicode/findrune.c
deleted file mode 100644
index 97edc3c..0000000
--- a/sys/libunicode/findrune.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "internal.h"
-
-byte*
-utf8·findrune(byte* s, long c)
-{
-    long c1;
-    rune r;
-    int  n;
-
-    if (c < RuneSync) return strchr(s, c);
-
-    for (;;) {
-        c1 = *(ubyte*)s;
-        if (c1 < RuneSelf) {
-            if (c1 == 0) return nil;
-            if (c1 == c) return s;
-            s++;
-            continue;
-        }
-        n = utf8·bytetorune(&r, s);
-        if (r == c) return s;
-        s += n;
-    }
-
-    return nil;
-}
diff --git a/sys/libunicode/fullrune.c b/sys/libunicode/fullrune.c
deleted file mode 100644
index e5cf314..0000000
--- a/sys/libunicode/fullrune.c
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "internal.h"
-
-int
-utf8·fullrune(byte* s, int n)
-{
-    int  i;
-    rune c;
-
-    if (n <= 0) return 0;
-    c = *(ubyte*) s;
-    if (c < Tx) return 1;
-
-    for (i = 3; i < UTFmax + 1; i++) {
-        if (c < Tbyte(i)) return n >= i - 1;
-    }
-
-    return n >= UTFmax;
-}
diff --git a/sys/libunicode/internal.h b/sys/libunicode/internal.h
index bf376b2..31ed2ae 100644
--- a/sys/libunicode/internal.h
+++ b/sys/libunicode/internal.h
@@ -4,8 +4,18 @@
 #include <base.h>
 #include <libunicode.h>
 
+/* UTF-8 code
+ * 1 byte:
+ * 0xxxxxxx
+ * 2 byte:
+ * 110xxxxx 10xxxxxx
+ * 3 byte:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 4 byte:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ */
 #define Bit(i) (7-(i))
-/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
+/* 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
 #define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
 /* 0000 0000 0000 0111 1111 1111 */
 #define	RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
diff --git a/sys/libunicode/len.c b/sys/libunicode/len.c
new file mode 100644
index 0000000..c461bf3
--- /dev/null
+++ b/sys/libunicode/len.c
@@ -0,0 +1,21 @@
+#include "internal.h"
+
+int
+utf8·len(char *s)
+{
+    int c;
+    long n;
+    rune r;
+
+    n = 0;
+    for(;;){
+        c = *(uchar*)s;
+        if(c < RuneSelf) {
+            if(c == 0)
+                return n;
+            s++;
+        } else
+            s += utf8·decode(s, &r);
+        n++;
+    }
+}
diff --git a/sys/libunicode/rules.mk b/sys/libunicode/rules.mk
index fb83819..100697b 100644
--- a/sys/libunicode/rules.mk
+++ b/sys/libunicode/rules.mk
@@ -3,12 +3,13 @@ include share/push.mk
 UNICODE = 14.0.0
 
 SRCS_$(d) := \
-	$(d)/runetobyte.c \
-	$(d)/bytetorune.c \
-	$(d)/findrune.c \
-	$(d)/findlastrune.c \
-	$(d)/fullrune.c \
+	$(d)/encode.c \
+	$(d)/decode.c \
+	$(d)/find.c \
+	$(d)/findlast.c \
+	$(d)/canfit.c \
 	$(d)/runelen.c \
+	$(d)/len.c \
 	$(d)/runetype-$(UNICODE).c
 
 LIBS_$(d) := $(d)/libunicode.a
@@ -16,13 +17,14 @@ LIBS_$(d) := $(d)/libunicode.a
 include share/paths.mk
 
 $(d)/vendor/UnicodeData-$(UNICODE).txt:
+	@echo "GET	UnicodeData.txt";\
 	curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@
 
 $(d)/vendor/mkrunetype: $(d)/vendor/mkrunetype.c $(OBJ_DIR)/sys/base/base.a
 	$(COMPLINK)
 
 $(d)/runetype-$(UNICODE).c: $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/mkrunetype
-	$(dir $@)/vendor/mkrunetype $< > $@
+	@$(dir $@)vendor/mkrunetype $< > $@
 
 GENS += $(d)/vendor/mkrunetype
 
diff --git a/sys/libunicode/runelen.c b/sys/libunicode/runelen.c
index 682400c..dac7f15 100644
--- a/sys/libunicode/runelen.c
+++ b/sys/libunicode/runelen.c
@@ -4,5 +4,5 @@ int
 utf8·runelen(rune r)
 {
     byte s[10];
-    return utf8·runetobyte(s, &r);
+    return utf8·encode(&r, s);
 }
diff --git a/sys/libunicode/runetobyte.c b/sys/libunicode/runetobyte.c
deleted file mode 100644
index 27f252b..0000000
--- a/sys/libunicode/runetobyte.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "internal.h"
-
-int
-utf8·runetobyte(byte* s, rune* r)
-{
-    int i, j;
-    rune c;
-
-    c = *r;
-    if(c <= Rune1) {
-        s[0] = c;
-        return 1;
-    }
-
-    for(i = 2; i < UTFmax + 1; i++){
-        if(i == 3){
-            if(c > RuneMax)
-                c = RuneErr;
-            if(SurrogateMin <= c && c <= SurrogateMax)
-                c = RuneErr;
-        }
-        if(c <= RuneX(i) || i == UTFmax) {
-            s[0] = Tbyte(i) |  (c >> (i - 1)*Bitx);
-            for(j = 1; j < i; j++)
-                s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
-            return i;
-        }
-    }
-
-    return UTFmax;
-}
-- 
cgit v1.2.1