aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-26 21:32:55 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-26 21:51:49 -0700
commit189b9e23edfe60b7e82c4c7b6071a3f98799653a (patch)
tree77c1f32726446deb298a0d2e6389358bfe16bd32
parent29b56ef4e4113bcd091b19d6926f18814162ca53 (diff)
fix(unicode): renamed functions to be easier to understand their functions
-rw-r--r--include/libunicode.h23
-rw-r--r--sys/cmd/dwm/drw.c2
-rw-r--r--sys/cmd/term/term.c10
-rw-r--r--sys/cmd/term/x.c2
-rw-r--r--sys/cmd/wm/main.c2
-rw-r--r--sys/libunicode/canfit.c20
-rw-r--r--sys/libunicode/decode.c (renamed from sys/libunicode/bytetorune.c)2
-rw-r--r--sys/libunicode/encode.c (renamed from sys/libunicode/runetobyte.c)2
-rw-r--r--sys/libunicode/find.c30
-rw-r--r--sys/libunicode/findlast.c (renamed from sys/libunicode/findlastrune.c)15
-rw-r--r--sys/libunicode/findrune.c26
-rw-r--r--sys/libunicode/fullrune.c18
-rw-r--r--sys/libunicode/internal.h12
-rw-r--r--sys/libunicode/len.c21
-rw-r--r--sys/libunicode/rules.mk14
-rw-r--r--sys/libunicode/runelen.c2
16 files changed, 121 insertions, 80 deletions
diff --git a/include/libunicode.h b/include/libunicode.h
index e017ac5..5e69fd1 100644
--- a/include/libunicode.h
+++ b/include/libunicode.h
@@ -2,9 +2,8 @@
typedef uint32 rune;
-/*
- * We have to use the preprocessor to ensure
- * we have unsigned constants. Unfortunate...
+/*
+ * we have to use the preprocessor to ensure we have unsigned constants.
*/
#define UTFmax 4
@@ -17,14 +16,16 @@ typedef uint32 rune;
/*
* UTF-8 functions.
*/
-int utf8·len(char *s);
-int utf8·runelen(rune r);
-
-int utf8·fullrune(char *s, int n);
-char *utf8·findrune(char *s, long i);
-char *utf8·findrrune(char* s, long c);
-int utf8·bytetorune(rune *r, char *s);
-int utf8·runetobyte(char *s, rune *r);
+int utf8·len(char *s); // returns number of runes
+int utf8·runelen(rune r); // returns number of bytes for rune
+
+int utf8·decode(char *, rune *); // decode 1 rune from char stream, store into rune, return number of bytes
+int utf8·encode(rune *, char *); // encode 1 rune from rune stream, store into char, return number of bytes
+
+char *utf8·find(char *s, rune); // find rune in char stream
+char *utf8·findlast(char* s, rune); // find last rune in char stream
+
+int utf8·canfit(char *, int); // XXX: odd function...
int utf8·isletter(rune r);
int utf8·isdigit(rune r);
diff --git a/sys/cmd/dwm/drw.c b/sys/cmd/dwm/drw.c
index 825e2f4..a6d6902 100644
--- a/sys/cmd/dwm/drw.c
+++ b/sys/cmd/dwm/drw.c
@@ -227,7 +227,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
utf8str = text;
nextfont = NULL;
while (*text) {
- utf8charlen = utf8·bytetorune(&utf8codepoint, text);
+ utf8charlen = utf8·decode(text, &utf8codepoint);
for (curfont = drw->fonts; curfont; curfont = curfont->next) {
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
if (charexists) {
diff --git a/sys/cmd/term/term.c b/sys/cmd/term/term.c
index 6a73a8b..b08915e 100644
--- a/sys/cmd/term/term.c
+++ b/sys/cmd/term/term.c
@@ -410,7 +410,7 @@ getsel(void)
if (gp->mode & Gwdummy)
continue;
- ptr += utf8·runetobyte(ptr, &gp->u);
+ ptr += utf8·encode(&gp->u, ptr);
}
/*
@@ -993,7 +993,7 @@ tsetchar(rune u, Letter *attr, int x, int y)
*/
if (term.trantbl[term.charset] == CSgfx0 &&
BETWEEN(u, 0x41, 0x7e) && vt100_0[u - 0x41])
- utf8·bytetorune(&u, vt100_0[u - 0x41]);
+ utf8·decode(vt100_0[u - 0x41], &u);
if (term.line[y][x].mode & Gwide) {
if (x+1 < term.col) {
@@ -1801,7 +1801,7 @@ tdumpline(int n)
end = &bp[MIN(tlinelen(n), term.col) - 1];
if (bp != end || bp->u != ' ') {
for ( ; bp <= end; ++bp)
- tprinter(buf, utf8·runetobyte(buf, &bp->u));
+ tprinter(buf, utf8·encode(&bp->u, buf));
}
tprinter("\n", 1);
}
@@ -2090,7 +2090,7 @@ tputc(rune u)
c[0] = u;
width = len = 1;
} else {
- len = utf8·runetobyte(c, &u);
+ len = utf8·encode(&u, c);
if(!control && (width = wcwidth(u)) == -1)
width = 1;
}
@@ -2257,7 +2257,7 @@ twrite(char *buf, int buflen, int show_ctrl)
for (n = 0; n < buflen; n += charsize) {
if(IS_SET(Tutf8) && !IS_SET(Tsixel)) {
/* process a complete utf8 char */
- charsize = utf8·bytetorune(&u, buf + n);
+ charsize = utf8·decode(buf + n, &u);
if(charsize == 0)
break;
} else {
diff --git a/sys/cmd/term/x.c b/sys/cmd/term/x.c
index 3079f38..ea64bff 100644
--- a/sys/cmd/term/x.c
+++ b/sys/cmd/term/x.c
@@ -1824,7 +1824,7 @@ kpress(XEvent *ev)
if (IS_SET(W8bit)) {
if (*buf < 0177) {
c = *buf | 0x80;
- len = utf8·runetobyte(buf, &c);
+ len = utf8·encode(&c, buf);
}
} else {
buf[1] = buf[0];
diff --git a/sys/cmd/wm/main.c b/sys/cmd/wm/main.c
index a447d80..2607801 100644
--- a/sys/cmd/wm/main.c
+++ b/sys/cmd/wm/main.c
@@ -133,7 +133,7 @@ main(int argc, char *argv[])
{
char *socket, *cmd=nil;
- ARGBEGIN {
+ ARGBEGIN{
case 's':
cmd = ARGF();
break;
diff --git a/sys/libunicode/canfit.c b/sys/libunicode/canfit.c
new file mode 100644
index 0000000..d44c9e6
--- /dev/null
+++ b/sys/libunicode/canfit.c
@@ -0,0 +1,20 @@
+#include "internal.h"
+
+/* returns 1 if string of length n is long enough to be decoded */
+int
+utf8·canfit(byte* s, int n)
+{
+ int i;
+ rune c;
+
+ if(n <= 0) return 0;
+ c = *(ubyte*) s;
+ if(c < Tx) return 1;
+
+ for(i = 3; i < UTFmax + 1; i++){
+ if(c < Tbyte(i))
+ return n >= i - 1;
+ }
+
+ return n >= UTFmax;
+}
diff --git a/sys/libunicode/bytetorune.c b/sys/libunicode/decode.c
index fe41e7a..79271f2 100644
--- a/sys/libunicode/bytetorune.c
+++ b/sys/libunicode/decode.c
@@ -1,7 +1,7 @@
#include "internal.h"
int
-utf8·bytetorune(rune* r, byte* s)
+utf8·decode(byte *s, rune* r)
{
int c[UTFmax], i;
rune l;
diff --git a/sys/libunicode/runetobyte.c b/sys/libunicode/encode.c
index 27f252b..8f4d212 100644
--- a/sys/libunicode/runetobyte.c
+++ b/sys/libunicode/encode.c
@@ -1,7 +1,7 @@
#include "internal.h"
int
-utf8·runetobyte(byte* s, rune* r)
+utf8·encode(rune* r, byte* s)
{
int i, j;
rune c;
diff --git a/sys/libunicode/find.c b/sys/libunicode/find.c
new file mode 100644
index 0000000..659ab5b
--- /dev/null
+++ b/sys/libunicode/find.c
@@ -0,0 +1,30 @@
+#include "internal.h"
+
+byte*
+utf8·find(byte* s, rune c)
+{
+ long c1;
+ rune r;
+ int n;
+
+ if(c < RuneSync)
+ return strchr(s, c);
+
+ for(;;){
+ c1 = *(ubyte*)s;
+ if(c1 < RuneSelf) {
+ if(c1 == 0) return nil;
+ if(c1 == c) return s;
+ s++;
+ continue;
+ }
+ n = utf8·decode(s, &r);
+
+ if(r == c)
+ return s;
+
+ s += n;
+ }
+
+ return nil;
+}
diff --git a/sys/libunicode/findlastrune.c b/sys/libunicode/findlast.c
index 0dc5032..3a4ed38 100644
--- a/sys/libunicode/findlastrune.c
+++ b/sys/libunicode/findlast.c
@@ -1,27 +1,28 @@
#include "internal.h"
byte*
-utf8·findlastrune(byte* s, long c)
+utf8·findlast(byte* s, rune c)
{
long c1;
rune r;
byte *l;
- if (c < RuneSync)
+ if(c < RuneSync)
return strrchr(s, c);
l = nil;
for(;;){
c1 = *(ubyte*)s;
- if (c1 < RuneSelf) {
- if (c1 == 0) return l;
- if (c1 == c) l = s;
+ if(c1 < RuneSelf) {
+ if(c1 == 0) return l;
+ if(c1 == c) l = s;
s++;
continue;
}
- c1 = utf8·bytetorune(&r, s);
- if (r == c)
+ c1 = utf8·decode(s, &r);
+ if(r == c)
l = s;
+
s += c1;
}
diff --git a/sys/libunicode/findrune.c b/sys/libunicode/findrune.c
deleted file mode 100644
index 97edc3c..0000000
--- a/sys/libunicode/findrune.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "internal.h"
-
-byte*
-utf8·findrune(byte* s, long c)
-{
- long c1;
- rune r;
- int n;
-
- if (c < RuneSync) return strchr(s, c);
-
- for (;;) {
- c1 = *(ubyte*)s;
- if (c1 < RuneSelf) {
- if (c1 == 0) return nil;
- if (c1 == c) return s;
- s++;
- continue;
- }
- n = utf8·bytetorune(&r, s);
- if (r == c) return s;
- s += n;
- }
-
- return nil;
-}
diff --git a/sys/libunicode/fullrune.c b/sys/libunicode/fullrune.c
deleted file mode 100644
index e5cf314..0000000
--- a/sys/libunicode/fullrune.c
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "internal.h"
-
-int
-utf8·fullrune(byte* s, int n)
-{
- int i;
- rune c;
-
- if (n <= 0) return 0;
- c = *(ubyte*) s;
- if (c < Tx) return 1;
-
- for (i = 3; i < UTFmax + 1; i++) {
- if (c < Tbyte(i)) return n >= i - 1;
- }
-
- return n >= UTFmax;
-}
diff --git a/sys/libunicode/internal.h b/sys/libunicode/internal.h
index bf376b2..31ed2ae 100644
--- a/sys/libunicode/internal.h
+++ b/sys/libunicode/internal.h
@@ -4,8 +4,18 @@
#include <base.h>
#include <libunicode.h>
+/* UTF-8 code
+ * 1 byte:
+ * 0xxxxxxx
+ * 2 byte:
+ * 110xxxxx 10xxxxxx
+ * 3 byte:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 4 byte:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ */
#define Bit(i) (7-(i))
-/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
+/* 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
/* 0000 0000 0000 0111 1111 1111 */
#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
diff --git a/sys/libunicode/len.c b/sys/libunicode/len.c
new file mode 100644
index 0000000..c461bf3
--- /dev/null
+++ b/sys/libunicode/len.c
@@ -0,0 +1,21 @@
+#include "internal.h"
+
+int
+utf8·len(char *s)
+{
+ int c;
+ long n;
+ rune r;
+
+ n = 0;
+ for(;;){
+ c = *(uchar*)s;
+ if(c < RuneSelf) {
+ if(c == 0)
+ return n;
+ s++;
+ } else
+ s += utf8·decode(s, &r);
+ n++;
+ }
+}
diff --git a/sys/libunicode/rules.mk b/sys/libunicode/rules.mk
index fb83819..100697b 100644
--- a/sys/libunicode/rules.mk
+++ b/sys/libunicode/rules.mk
@@ -3,12 +3,13 @@ include share/push.mk
UNICODE = 14.0.0
SRCS_$(d) := \
- $(d)/runetobyte.c \
- $(d)/bytetorune.c \
- $(d)/findrune.c \
- $(d)/findlastrune.c \
- $(d)/fullrune.c \
+ $(d)/encode.c \
+ $(d)/decode.c \
+ $(d)/find.c \
+ $(d)/findlast.c \
+ $(d)/canfit.c \
$(d)/runelen.c \
+ $(d)/len.c \
$(d)/runetype-$(UNICODE).c
LIBS_$(d) := $(d)/libunicode.a
@@ -16,13 +17,14 @@ LIBS_$(d) := $(d)/libunicode.a
include share/paths.mk
$(d)/vendor/UnicodeData-$(UNICODE).txt:
+ @echo "GET UnicodeData.txt";\
curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@
$(d)/vendor/mkrunetype: $(d)/vendor/mkrunetype.c $(OBJ_DIR)/sys/base/base.a
$(COMPLINK)
$(d)/runetype-$(UNICODE).c: $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/mkrunetype
- $(dir $@)/vendor/mkrunetype $< > $@
+ @$(dir $@)vendor/mkrunetype $< > $@
GENS += $(d)/vendor/mkrunetype
diff --git a/sys/libunicode/runelen.c b/sys/libunicode/runelen.c
index 682400c..dac7f15 100644
--- a/sys/libunicode/runelen.c
+++ b/sys/libunicode/runelen.c
@@ -4,5 +4,5 @@ int
utf8·runelen(rune r)
{
byte s[10];
- return utf8·runetobyte(s, &r);
+ return utf8·encode(&r, s);
}