From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libutf/internal.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/libutf/internal.h (limited to 'src/libutf/internal.h') diff --git a/src/libutf/internal.h b/src/libutf/internal.h new file mode 100644 index 0000000..9719977 --- /dev/null +++ b/src/libutf/internal.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include + +/* + * NOTE: we use the preprocessor to ensure we have unsigned constants. + * UTF-8 code: + * 1 byte: + * 0xxxxxxx + * 2 byte: + * 110xxxxx 10xxxxxx + * 3 byte: + * 1110xxxx 10xxxxxx 10xxxxxx + * 4 byte: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +#define Tx 0x80u // 0b10000000 transfer header +#define TMask 0x3Fu // 0b00111111 transfer mask + +#define TByte1 0xC0u // 0b11000000 +#define TByte2 0xE0u // 0b11100000 +#define TByte3 0xF0u // 0b11110000 +#define TByte4 0xF8u // 0b11111000 + +#define RuneMask 0x1FFFFFu + +#define Rune1Byte 0x000080u // 1 << 8 (1 byte) +#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) +#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) +#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) + + +/* UTF-16 nonsense */ +#define RuneSurrogateMin 0x0D8000 +#define RuneSurrogateMax 0x0D8FFF -- cgit v1.2.1