From 6318ddb69ad8380694bbca35ca5a72230c2ee694 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 28 Oct 2021 11:55:47 -0700 Subject: Feat(libunicode): cleaned up decode/encode Additionally, decode can now apply backwards on a byte string. --- sys/libunicode/encode.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'sys/libunicode/encode.c') diff --git a/sys/libunicode/encode.c b/sys/libunicode/encode.c index 8f4d212..fa7c93e 100644 --- a/sys/libunicode/encode.c +++ b/sys/libunicode/encode.c @@ -1,5 +1,42 @@ #include "internal.h" +int +utf8·encode(rune *r, byte *s) +{ + rune c; + + c = *r; + if(c < Rune1Byte){ // 7 bits + s[0] = (uint8)c; + return 1; + } + + if(c < Rune2Byte){ // 11 bits + s[0] = TByte1 | (c >> 6); + s[1] = Tx | (c & TMask); + return 2; + } + + if(c < Rune3Byte){ // 16 bits + s[0] = TByte2 | ((c >> 12)); + s[1] = Tx | ((c >> 6) & TMask); + s[2] = Tx | ((c) & TMask); + return 3; + } + + // 22 bits + if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) + c = RuneErr; + + s[0] = TByte3 | ((c >> 18)); + s[1] = Tx | ((c >> 12) & TMask); + s[2] = Tx | ((c >> 6) & TMask); + s[3] = Tx | ((c) & TMask); + + return 4; +} + +#if 0 int utf8·encode(rune* r, byte* s) { @@ -29,3 +66,4 @@ utf8·encode(rune* r, byte* s) return UTFmax; } +#endif -- cgit v1.2.1