diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2021-10-26 21:01:41 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2021-10-26 21:01:41 -0700 |
commit | 29b56ef4e4113bcd091b19d6926f18814162ca53 (patch) | |
tree | a4888a16927576592af13928bb805f1f2b1159d6 /sys/libunicode/bytetorune.c | |
parent | e34a4791b72e426b02f33496fe03be1ad81819a6 (diff) |
Feat(libunicode): Added an explicit unicode library
Refactored code to pull out utf8 functions from base into a standalone
library. Also left the required function inside arg.c so that code that
calls ARG_BEGIN doesn't have to link to libunicode.
Diffstat (limited to 'sys/libunicode/bytetorune.c')
-rw-r--r-- | sys/libunicode/bytetorune.c | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/sys/libunicode/bytetorune.c b/sys/libunicode/bytetorune.c new file mode 100644 index 0000000..fe41e7a --- /dev/null +++ b/sys/libunicode/bytetorune.c @@ -0,0 +1,40 @@ +#include "internal.h" + +int +utf8·bytetorune(rune* r, byte* s) +{ + int c[UTFmax], i; + rune l; + + c[0] = *(ubyte*)(s); + if(c[0] < Tx) { + *r = c[0]; + return 1; + } + + l = c[0]; + for(i = 1; i < UTFmax; i++) { + c[i] = *(ubyte*)(s+i); + c[i] ^= Tx; + if (c[i] & Testx) goto bad; + + l = (l << Bitx) | c[i]; + if(c[0] < Tbyte(i + 2)) { + l &= RuneX(i + 1); + if (i == 1) { + if (c[0] < Tbyte(2) || l <= Rune1) + goto bad; + } else if (l <= RuneX(i) || l > RuneMax) + goto bad; + if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) + goto bad; + + *r = l; + return i + 1; + } + } +bad: + *r = RuneErr; + return 1; +} + |