aboutsummaryrefslogtreecommitdiff
path: root/sys/libunicode/internal.h
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-28 11:55:47 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-28 11:55:47 -0700
commit6318ddb69ad8380694bbca35ca5a72230c2ee694 (patch)
treee9ef65d79c264d88c2d02402a587c2561b5b1299 /sys/libunicode/internal.h
parent189b9e23edfe60b7e82c4c7b6071a3f98799653a (diff)
Feat(libunicode): cleaned up decode/encode
Additionally, decode can now apply backwards on a byte string.
Diffstat (limited to 'sys/libunicode/internal.h')
-rw-r--r--sys/libunicode/internal.h37
1 files changed, 20 insertions, 17 deletions
diff --git a/sys/libunicode/internal.h b/sys/libunicode/internal.h
index 31ed2ae..6b0657b 100644
--- a/sys/libunicode/internal.h
+++ b/sys/libunicode/internal.h
@@ -4,7 +4,9 @@
#include <base.h>
#include <libunicode.h>
-/* UTF-8 code
+/*
+ * NOTE: we use the preprocessor to ensure we have unsigned constants.
+ * UTF-8 code:
* 1 byte:
* 0xxxxxxx
* 2 byte:
@@ -14,22 +16,23 @@
* 4 byte:
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
-#define Bit(i) (7-(i))
-/* 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
-#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
-/* 0000 0000 0000 0111 1111 1111 */
-#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
-enum
-{
- Bitx = Bit(1),
- Tx = Tbyte(1),
- Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
+#define Tx 0x80u // 0b10000000 transfer header
+#define TMask 0x3Fu // 0b00111111 transfer mask
- Maskx = (1 << Bitx) - 1, /* 0011 1111 */
- Testx = Maskx ^ 0xff, /* 1100 0000 */
+#define TByte1 0xC0u // 0b11000000
+#define TByte2 0xE0u // 0b11100000
+#define TByte3 0xF0u // 0b11110000
+#define TByte4 0xF8u // 0b11111000
- SurrogateMin = 0xD800,
- SurrogateMax = 0xDFFF,
- Bad = RuneErr,
-};
+#define Rune1Byte 0x000080u // 1 << 8 (1 byte)
+#define Rune2Byte 0x001000u // 1 << 12 (2 bytes)
+#define Rune3Byte 0x020000u // 1 << 17 (3 bytes)
+#define Rune4Byte 0x400000u // 1 << 22 (4 bytes)
+
+#define RuneMax 0x10FFFFu
+#define RuneMask 0x1FFFFFu
+
+/* UTF-16 nonsense */
+#define RuneSurrogateMin 0x0D8000
+#define RuneSurrogateMax 0x0D8FFF