diff options
author | Nicholas <nbnoll@eml.cc> | 2021-11-10 20:12:45 -0800 |
---|---|---|
committer | Nicholas <nbnoll@eml.cc> | 2021-11-11 08:16:47 -0800 |
commit | 7ea1cdb7d31f00024f5a1d124b42cd19a03b959a (patch) | |
tree | f65abf9fa32856287de586129d4ed7c1c473864a /sys/libutf/internal.h | |
parent | 43688fe7190d0350349d47727c3663421d5618dc (diff) |
chore: libunicode -> libutf
Diffstat (limited to 'sys/libutf/internal.h')
-rw-r--r-- | sys/libutf/internal.h | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/sys/libutf/internal.h b/sys/libutf/internal.h new file mode 100644 index 0000000..9719977 --- /dev/null +++ b/sys/libutf/internal.h @@ -0,0 +1,38 @@ +#pragma once + +#include <u.h> +#include <base.h> +#include <libutf.h> + +/* + * NOTE: we use the preprocessor to ensure we have unsigned constants. + * UTF-8 code: + * 1 byte: + * 0xxxxxxx + * 2 byte: + * 110xxxxx 10xxxxxx + * 3 byte: + * 1110xxxx 10xxxxxx 10xxxxxx + * 4 byte: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +#define Tx 0x80u // 0b10000000 transfer header +#define TMask 0x3Fu // 0b00111111 transfer mask + +#define TByte1 0xC0u // 0b11000000 +#define TByte2 0xE0u // 0b11100000 +#define TByte3 0xF0u // 0b11110000 +#define TByte4 0xF8u // 0b11111000 + +#define RuneMask 0x1FFFFFu + +#define Rune1Byte 0x000080u // 1 << 8 (1 byte) +#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) +#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) +#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) + + +/* UTF-16 nonsense */ +#define RuneSurrogateMin 0x0D8000 +#define RuneSurrogateMax 0x0D8FFF |