From 6318ddb69ad8380694bbca35ca5a72230c2ee694 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 28 Oct 2021 11:55:47 -0700 Subject: Feat(libunicode): cleaned up decode/encode Additionally, decode can now apply backwards on a byte string. --- sys/libunicode/decodeprev.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 sys/libunicode/decodeprev.c (limited to 'sys/libunicode/decodeprev.c') diff --git a/sys/libunicode/decodeprev.c b/sys/libunicode/decodeprev.c new file mode 100644 index 0000000..27dced6 --- /dev/null +++ b/sys/libunicode/decodeprev.c @@ -0,0 +1,60 @@ +#include "internal.h" + +#define ACCEPT 0 +#define REJECT 12 + +static uint8 decode[] = { + /* + * the first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + // 0 1 2 3 4 5 6 7 8 9 10 11 + 0,24,12,12,12,12,12,24,12,24,12,12, + 0,24,12,12,12,12,12,24,12,24,12,12, + 12,36, 0,12,12,12,12,48,12,36,12,12, + 12,60,12, 0, 0,12,12,72,12,72,12,12, + 12,60,12, 0,12,12,12,72,12,72, 0,12, + 12,12,12,12,12, 0, 0,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12, 0 +}; + +int +utf8·decodeprev(byte *s, rune *r) +{ + int n; + rune v; + uint8 b, t, d, x=ACCEPT; + + v=0, n=0, d=0; +nextbyte: + b = ((uint8 *)s)[-n++]; + t = decode[b]; + x = decode[256+x+t]; + + if(x > REJECT && n < UTFmax){ + v = v | ((b & TMask) << d); + d += 6; + goto nextbyte; + } + + if(x != ACCEPT) + *r = RuneErr; + else{ + v |= (((0xFFu >> t) & b) << d); + *r = v; + } + + return n; +} -- cgit v1.2.1