From b5de2d7bd1e08d27f7780434e601d6d3a0750583 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Wed, 10 Nov 2021 20:12:45 -0800 Subject: chore: libunicode -> libutf --- sys/libutf/internal.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 sys/libutf/internal.h (limited to 'sys/libutf/internal.h') diff --git a/sys/libutf/internal.h b/sys/libutf/internal.h new file mode 100644 index 0000000..9719977 --- /dev/null +++ b/sys/libutf/internal.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include + +/* + * NOTE: we use the preprocessor to ensure we have unsigned constants. + * UTF-8 code: + * 1 byte: + * 0xxxxxxx + * 2 byte: + * 110xxxxx 10xxxxxx + * 3 byte: + * 1110xxxx 10xxxxxx 10xxxxxx + * 4 byte: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +#define Tx 0x80u // 0b10000000 transfer header +#define TMask 0x3Fu // 0b00111111 transfer mask + +#define TByte1 0xC0u // 0b11000000 +#define TByte2 0xE0u // 0b11100000 +#define TByte3 0xF0u // 0b11110000 +#define TByte4 0xF8u // 0b11111000 + +#define RuneMask 0x1FFFFFu + +#define Rune1Byte 0x000080u // 1 << 8 (1 byte) +#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) +#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) +#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) + + +/* UTF-16 nonsense */ +#define RuneSurrogateMin 0x0D8000 +#define RuneSurrogateMax 0x0D8FFF -- cgit v1.2.1