From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libutf/encode.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/libutf/encode.c (limited to 'src/libutf/encode.c') diff --git a/src/libutf/encode.c b/src/libutf/encode.c new file mode 100644 index 0000000..fa7c93e --- /dev/null +++ b/src/libutf/encode.c @@ -0,0 +1,69 @@ +#include "internal.h" + +int +utf8·encode(rune *r, byte *s) +{ + rune c; + + c = *r; + if(c < Rune1Byte){ // 7 bits + s[0] = (uint8)c; + return 1; + } + + if(c < Rune2Byte){ // 11 bits + s[0] = TByte1 | (c >> 6); + s[1] = Tx | (c & TMask); + return 2; + } + + if(c < Rune3Byte){ // 16 bits + s[0] = TByte2 | ((c >> 12)); + s[1] = Tx | ((c >> 6) & TMask); + s[2] = Tx | ((c) & TMask); + return 3; + } + + // 22 bits + if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) + c = RuneErr; + + s[0] = TByte3 | ((c >> 18)); + s[1] = Tx | ((c >> 12) & TMask); + s[2] = Tx | ((c >> 6) & TMask); + s[3] = Tx | ((c) & TMask); + + return 4; +} + +#if 0 +int +utf8·encode(rune* r, byte* s) +{ + int i, j; + rune c; + + c = *r; + if(c <= Rune1) { + s[0] = c; + return 1; + } + + for(i = 2; i < UTFmax + 1; i++){ + if(i == 3){ + if(c > RuneMax) + c = RuneErr; + if(SurrogateMin <= c && c <= SurrogateMax) + c = RuneErr; + } + if(c <= RuneX(i) || i == UTFmax) { + s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); + for(j = 1; j < i; j++) + s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); + return i; + } + } + + return UTFmax; +} +#endif -- cgit v1.2.1