aboutsummaryrefslogtreecommitdiff
path: root/src/base/arg.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/base/arg.c')
-rw-r--r--src/base/arg.c71
1 files changed, 71 insertions, 0 deletions
diff --git a/src/base/arg.c b/src/base/arg.c
new file mode 100644
index 0000000..269043e
--- /dev/null
+++ b/src/base/arg.c
@@ -0,0 +1,71 @@
+#include <u.h>
+#include <base.h>
+
+// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN.
+
+#define UTFmax 4
+#define RuneSync 0x80u
+#define RuneSelf 0x80u
+#define RuneErr 0xFFFDu
+#define RuneMax 0x10FFFFu
+#define RuneMask 0x1FFFFFu
+
+#define Bit(i) (7-(i))
+/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
+#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
+/* 0000 0000 0000 0111 1111 1111 */
+#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
+enum
+{
+ Bitx = Bit(1),
+ Tx = Tbyte(1),
+ Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
+
+ Maskx = (1 << Bitx) - 1, /* 0011 1111 */
+ Testx = Maskx ^ 0xff, /* 1100 0000 */
+
+ SurrogateMin = 0xD800,
+ SurrogateMax = 0xDFFF,
+ Bad = RuneErr,
+};
+
+
+int
+arg·bytetorune(uint32* r, byte* s)
+{
+ int c[4], i;
+ uint32 l;
+
+ c[0] = *(ubyte*)(s);
+ if(c[0] < Tx) {
+ *r = c[0];
+ return 1;
+ }
+
+ l = c[0];
+ for(i = 1; i < UTFmax; i++) {
+ c[i] = *(ubyte*)(s+i);
+ c[i] ^= Tx;
+ if (c[i] & Testx) goto bad;
+
+ l = (l << Bitx) | c[i];
+ if(c[0] < Tbyte(i + 2)) {
+ l &= RuneX(i + 1);
+ if (i == 1) {
+ if (c[0] < Tbyte(2) || l <= Rune1)
+ goto bad;
+ } else if (l <= RuneX(i) || l > RuneMax)
+ goto bad;
+ if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
+
+ *r = l;
+ return i + 1;
+ }
+ }
+bad:
+ *r = RuneErr;
+ return 1;
+}
+
+char *argv0;