#include "internal.h" #define ACCEPT 0 #define REJECT 12 static uint8 decode[] = { /* * the first part of the table maps bytes to character classes that * to reduce the size of the transition table and create bitmasks */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, /* * the second part is a transition table that maps a combination * of a state of the automaton and a character class to a state */ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,12,12,12,12,12, }; int utf8·decode(char *s, rune *r) { int n; rune v; uint8 b, t, x=ACCEPT; b = ((uint8 *)s)[0]; t = decode[b]; v = (0xFF >> t) & b; x = decode[256+x+t]; for(n=1; x > REJECT && n < UTFmax; n++){ b = ((uint8 *)s)[n]; t = decode[b]; v = (v << 6) | (b & TMask); x = decode[256+x+t]; } if(x != ACCEPT){ *r = RuneErr; return 1; } *r = v; return n; } #if 0 int utf8·decode(byte *s, rune *r) { int c[UTFmax], i; rune l; c[0] = *(ubyte*)(s); if(c[0] < Tx){ *r = c[0]; return 1; } l = c[0]; for(i = 1; i < UTFmax; i++){ c[i] = *(ubyte*)(s+i); c[i] ^= Tx; if(c[i] & Testx) goto bad; l = (l << Bitx) | c[i]; if(c[0] < Tbyte(i + 2)){ l &= RuneX(i + 1); if(i == 1){ if(c[0] < Tbyte(2) || l <= Rune1) goto bad; }else if(l <= RuneX(i) || l > RuneMax) goto bad; if(i == 2 && SurrogateMin <= l && l <= SurrogateMax) goto bad; *r = l; return i + 1; } } bad: *r = RuneErr; return 1; } #endif