diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2021-10-28 11:55:47 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2021-10-28 11:55:47 -0700 |
commit | 6318ddb69ad8380694bbca35ca5a72230c2ee694 (patch) | |
tree | e9ef65d79c264d88c2d02402a587c2561b5b1299 /sys/libunicode/encode.c | |
parent | 189b9e23edfe60b7e82c4c7b6071a3f98799653a (diff) |
Feat(libunicode): cleaned up decode/encode
Additionally, decode can now apply backwards on a byte string.
Diffstat (limited to 'sys/libunicode/encode.c')
-rw-r--r-- | sys/libunicode/encode.c | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/sys/libunicode/encode.c b/sys/libunicode/encode.c index 8f4d212..fa7c93e 100644 --- a/sys/libunicode/encode.c +++ b/sys/libunicode/encode.c @@ -1,6 +1,43 @@ #include "internal.h" int +utf8·encode(rune *r, byte *s) +{ + rune c; + + c = *r; + if(c < Rune1Byte){ // 7 bits + s[0] = (uint8)c; + return 1; + } + + if(c < Rune2Byte){ // 11 bits + s[0] = TByte1 | (c >> 6); + s[1] = Tx | (c & TMask); + return 2; + } + + if(c < Rune3Byte){ // 16 bits + s[0] = TByte2 | ((c >> 12)); + s[1] = Tx | ((c >> 6) & TMask); + s[2] = Tx | ((c) & TMask); + return 3; + } + + // 22 bits + if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) + c = RuneErr; + + s[0] = TByte3 | ((c >> 18)); + s[1] = Tx | ((c >> 12) & TMask); + s[2] = Tx | ((c >> 6) & TMask); + s[3] = Tx | ((c) & TMask); + + return 4; +} + +#if 0 +int utf8·encode(rune* r, byte* s) { int i, j; @@ -29,3 +66,4 @@ utf8·encode(rune* r, byte* s) return UTFmax; } +#endif |