blob: 269043e9f70dd6ab9d989b4809aa27076bfe3186 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#include <u.h>
#include <base.h>
// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN.
#define UTFmax 4
#define RuneSync 0x80u
#define RuneSelf 0x80u
#define RuneErr 0xFFFDu
#define RuneMax 0x10FFFFu
#define RuneMask 0x1FFFFFu
#define Bit(i) (7-(i))
/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
/* 0000 0000 0000 0111 1111 1111 */
#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
enum
{
Bitx = Bit(1),
Tx = Tbyte(1),
Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
Maskx = (1 << Bitx) - 1, /* 0011 1111 */
Testx = Maskx ^ 0xff, /* 1100 0000 */
SurrogateMin = 0xD800,
SurrogateMax = 0xDFFF,
Bad = RuneErr,
};
int
argยทbytetorune(uint32* r, byte* s)
{
int c[4], i;
uint32 l;
c[0] = *(ubyte*)(s);
if(c[0] < Tx) {
*r = c[0];
return 1;
}
l = c[0];
for(i = 1; i < UTFmax; i++) {
c[i] = *(ubyte*)(s+i);
c[i] ^= Tx;
if (c[i] & Testx) goto bad;
l = (l << Bitx) | c[i];
if(c[0] < Tbyte(i + 2)) {
l &= RuneX(i + 1);
if (i == 1) {
if (c[0] < Tbyte(2) || l <= Rune1)
goto bad;
} else if (l <= RuneX(i) || l > RuneMax)
goto bad;
if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
goto bad;
*r = l;
return i + 1;
}
}
bad:
*r = RuneErr;
return 1;
}
char *argv0;
|