From 29b56ef4e4113bcd091b19d6926f18814162ca53 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Tue, 26 Oct 2021 21:01:41 -0700 Subject: Feat(libunicode): Added an explicit unicode library Refactored code to pull out utf8 functions from base into a standalone library. Also left the required function inside arg.c so that code that calls ARG_BEGIN doesn't have to link to libunicode. --- include/base.h | 43 +++++-------------------------------------- include/libunicode.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 38 deletions(-) create mode 100644 include/libunicode.h (limited to 'include') diff --git a/include/base.h b/include/base.h index 699786f..a7813e6 100644 --- a/include/base.h +++ b/include/base.h @@ -118,38 +118,6 @@ void str·replace(string s, const byte* from, const byte* to); string* str·split(string s, const byte* tok); string str·join(vlong len, byte** fields, const byte* sep); -/* - * UTF-8 functions. - * Perhaps break into own unit - * TODO: Add to(upper|lower|title) - */ -typedef uint32 rune; - -/* - * We have to use the preprocessor to ensure - * we have unsigned constants. Unfortunate... - */ - -#define UTFmax 4 -#define RuneSync 0x80u -#define RuneSelf 0x80u -#define RuneErr 0xFFFDu -#define RuneMax 0x10FFFFu -#define RuneMask 0x1FFFFFu - -/* utf8 helpers */ -int utf8·fullrune(byte *s, int n); -byte *utf8·findrune(byte *s, long i); -byte *utf8·findrrune(byte* s, long c); -int utf8·bytetorune(rune *r, byte *s); -int utf8·runetobyte(byte *s, rune *r); -int utf8·len(byte *s); -int utf8·runelen(rune r); -int utf8·isletter(rune r); -int utf8·isdigit(rune r); -int utf8·isspace(rune r); -int utf8·istitle(rune r); - // ----------------------------------------------------------------------------- // i/o @@ -270,7 +238,7 @@ enum bufio·end = 1 << 3, }; -struct io·Buffer +struct io·Buffer { int state; int runesize; @@ -288,8 +256,6 @@ error bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h); void bufio·finireader(io·Buffer *buf); int bufio·getbyte(io·Buffer *buf); error bufio·ungetbyte(io·Buffer *buf, byte c); -rune bufio·getrune(io·Buffer *buf); -error bufio·ungetrune(io·Buffer *buf, rune r); int bufio·read(io·Buffer *buf, int sz, int n, void *out); // ----------------------------------------------------------------------------- @@ -432,7 +398,7 @@ uint64 rng·poisson(double mean); /* from plan9 libc */ -#define ERRMAX 128 /* max length of error string */ +#define ERRMAX 128 /* max length of error string */ #define SET(x) ((x)=0) #define USED(x) if(x){}else{} @@ -444,17 +410,18 @@ uint64 rng·poisson(double mean); #endif extern char *argv0; +int arg·bytetorune(uint32*, byte *); #define ARGBEGIN for((argv0?0:(argv0=*argv)),argv++,argc--; \ argv[0] && argv[0][0]=='-' && argv[0][1]; \ argc--, argv++) { \ byte *_args, *_argt; \ - rune _argc; \ + uint32 _argc; \ _args = &argv[0][1]; \ if(_args[0]=='-' && _args[1]==0){ \ argc--; argv++; break; \ } \ _argc = 0; \ - while(*_args && (_args += utf8·bytetorune(&_argc, _args)))\ + while(*_args && (_args += arg·bytetorune(&_argc, _args)))\ switch(_argc) #define ARGEND SET(_argt);USED(_argt);USED(_argc);USED(_args);}USED(argv);USED(argc); #define ARGF() (_argt=_args, _args="",\ diff --git a/include/libunicode.h b/include/libunicode.h new file mode 100644 index 0000000..e017ac5 --- /dev/null +++ b/include/libunicode.h @@ -0,0 +1,36 @@ +#pragma once + +typedef uint32 rune; + +/* + * We have to use the preprocessor to ensure + * we have unsigned constants. Unfortunate... + */ + +#define UTFmax 4 +#define RuneSync 0x80u +#define RuneSelf 0x80u +#define RuneErr 0xFFFDu +#define RuneMax 0x10FFFFu +#define RuneMask 0x1FFFFFu + +/* + * UTF-8 functions. + */ +int utf8·len(char *s); +int utf8·runelen(rune r); + +int utf8·fullrune(char *s, int n); +char *utf8·findrune(char *s, long i); +char *utf8·findrrune(char* s, long c); +int utf8·bytetorune(rune *r, char *s); +int utf8·runetobyte(char *s, rune *r); + +int utf8·isletter(rune r); +int utf8·isdigit(rune r); +int utf8·isspace(rune r); +int utf8·istitle(rune r); + +rune utf8·toupper(rune r); +rune utf8·tolower(rune r); +rune utf8·totitle(rune r); -- cgit v1.2.1