aboutsummaryrefslogtreecommitdiff
path: root/sys/base/arg.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-26 21:01:41 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-26 21:01:41 -0700
commit29b56ef4e4113bcd091b19d6926f18814162ca53 (patch)
treea4888a16927576592af13928bb805f1f2b1159d6 /sys/base/arg.c
parente34a4791b72e426b02f33496fe03be1ad81819a6 (diff)
Feat(libunicode): Added an explicit unicode library
Refactored code to pull out utf8 functions from base into a standalone library. Also left the required function inside arg.c so that code that calls ARG_BEGIN doesn't have to link to libunicode.
Diffstat (limited to 'sys/base/arg.c')
-rw-r--r--sys/base/arg.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/sys/base/arg.c b/sys/base/arg.c
index 64e4dd6..269043e 100644
--- a/sys/base/arg.c
+++ b/sys/base/arg.c
@@ -1 +1,71 @@
+#include <u.h>
+#include <base.h>
+
+// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN.
+
+#define UTFmax 4
+#define RuneSync 0x80u
+#define RuneSelf 0x80u
+#define RuneErr 0xFFFDu
+#define RuneMax 0x10FFFFu
+#define RuneMask 0x1FFFFFu
+
+#define Bit(i) (7-(i))
+/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
+#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
+/* 0000 0000 0000 0111 1111 1111 */
+#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
+enum
+{
+ Bitx = Bit(1),
+ Tx = Tbyte(1),
+ Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
+
+ Maskx = (1 << Bitx) - 1, /* 0011 1111 */
+ Testx = Maskx ^ 0xff, /* 1100 0000 */
+
+ SurrogateMin = 0xD800,
+ SurrogateMax = 0xDFFF,
+ Bad = RuneErr,
+};
+
+
+int
+arg·bytetorune(uint32* r, byte* s)
+{
+ int c[4], i;
+ uint32 l;
+
+ c[0] = *(ubyte*)(s);
+ if(c[0] < Tx) {
+ *r = c[0];
+ return 1;
+ }
+
+ l = c[0];
+ for(i = 1; i < UTFmax; i++) {
+ c[i] = *(ubyte*)(s+i);
+ c[i] ^= Tx;
+ if (c[i] & Testx) goto bad;
+
+ l = (l << Bitx) | c[i];
+ if(c[0] < Tbyte(i + 2)) {
+ l &= RuneX(i + 1);
+ if (i == 1) {
+ if (c[0] < Tbyte(2) || l <= Rune1)
+ goto bad;
+ } else if (l <= RuneX(i) || l > RuneMax)
+ goto bad;
+ if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
+
+ *r = l;
+ return i + 1;
+ }
+ }
+bad:
+ *r = RuneErr;
+ return 1;
+}
+
char *argv0;