aboutsummaryrefslogtreecommitdiff
path: root/sys/libunicode/decode.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-26 21:32:55 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-26 21:51:49 -0700
commit189b9e23edfe60b7e82c4c7b6071a3f98799653a (patch)
tree77c1f32726446deb298a0d2e6389358bfe16bd32 /sys/libunicode/decode.c
parent29b56ef4e4113bcd091b19d6926f18814162ca53 (diff)
fix(unicode): renamed functions to be easier to understand their functions
Diffstat (limited to 'sys/libunicode/decode.c')
-rw-r--r--sys/libunicode/decode.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/sys/libunicode/decode.c b/sys/libunicode/decode.c
new file mode 100644
index 0000000..79271f2
--- /dev/null
+++ b/sys/libunicode/decode.c
@@ -0,0 +1,40 @@
+#include "internal.h"
+
+int
+utf8·decode(byte *s, rune* r)
+{
+ int c[UTFmax], i;
+ rune l;
+
+ c[0] = *(ubyte*)(s);
+ if(c[0] < Tx) {
+ *r = c[0];
+ return 1;
+ }
+
+ l = c[0];
+ for(i = 1; i < UTFmax; i++) {
+ c[i] = *(ubyte*)(s+i);
+ c[i] ^= Tx;
+ if (c[i] & Testx) goto bad;
+
+ l = (l << Bitx) | c[i];
+ if(c[0] < Tbyte(i + 2)) {
+ l &= RuneX(i + 1);
+ if (i == 1) {
+ if (c[0] < Tbyte(2) || l <= Rune1)
+ goto bad;
+ } else if (l <= RuneX(i) || l > RuneMax)
+ goto bad;
+ if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
+
+ *r = l;
+ return i + 1;
+ }
+ }
+bad:
+ *r = RuneErr;
+ return 1;
+}
+