aboutsummaryrefslogtreecommitdiff
path: root/sys/base/arg.c
blob: 269043e9f70dd6ab9d989b4809aa27076bfe3186 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <u.h>
#include <base.h>

// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN.

#define UTFmax   4
#define RuneSync 0x80u
#define RuneSelf 0x80u
#define RuneErr  0xFFFDu
#define RuneMax  0x10FFFFu
#define RuneMask 0x1FFFFFu

#define Bit(i) (7-(i))
/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
/* 0000 0000 0000 0111 1111 1111 */
#define	RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
enum
{
    Bitx  = Bit(1),
    Tx    = Tbyte(1),
    Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,

    Maskx = (1 << Bitx) - 1, /* 0011 1111 */
    Testx = Maskx ^ 0xff,    /* 1100 0000 */

    SurrogateMin = 0xD800,
    SurrogateMax = 0xDFFF,
    Bad = RuneErr,
};


int
argยทbytetorune(uint32* r, byte* s)
{
    int c[4], i;
    uint32 l;

    c[0] = *(ubyte*)(s);
    if(c[0] < Tx) {
        *r = c[0];
        return 1;
    }

    l = c[0];
    for(i = 1; i < UTFmax; i++) {
        c[i]  = *(ubyte*)(s+i);
        c[i] ^= Tx;
        if (c[i] & Testx) goto bad;

        l = (l << Bitx) | c[i];
        if(c[0] < Tbyte(i + 2)) {
            l &= RuneX(i + 1);
            if (i == 1) {
                if (c[0] < Tbyte(2) || l <= Rune1)
                    goto bad;
            } else if (l <= RuneX(i) || l > RuneMax)
                goto bad;
            if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
                goto bad;

            *r = l;
            return i + 1;
        }
    }
bad:
    *r = RuneErr;
    return 1;
}

char *argv0;