aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-10-26 21:01:41 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-10-26 21:01:41 -0700
commit29b56ef4e4113bcd091b19d6926f18814162ca53 (patch)
treea4888a16927576592af13928bb805f1f2b1159d6 /include
parente34a4791b72e426b02f33496fe03be1ad81819a6 (diff)
Feat(libunicode): Added an explicit unicode library
Refactored code to pull out utf8 functions from base into a standalone library. Also left the required function inside arg.c so that code that calls ARG_BEGIN doesn't have to link to libunicode.
Diffstat (limited to 'include')
-rw-r--r--include/base.h43
-rw-r--r--include/libunicode.h36
2 files changed, 41 insertions, 38 deletions
diff --git a/include/base.h b/include/base.h
index 699786f..a7813e6 100644
--- a/include/base.h
+++ b/include/base.h
@@ -118,38 +118,6 @@ void str·replace(string s, const byte* from, const byte* to);
string* str·split(string s, const byte* tok);
string str·join(vlong len, byte** fields, const byte* sep);
-/*
- * UTF-8 functions.
- * Perhaps break into own unit
- * TODO: Add to(upper|lower|title)
- */
-typedef uint32 rune;
-
-/*
- * We have to use the preprocessor to ensure
- * we have unsigned constants. Unfortunate...
- */
-
-#define UTFmax 4
-#define RuneSync 0x80u
-#define RuneSelf 0x80u
-#define RuneErr 0xFFFDu
-#define RuneMax 0x10FFFFu
-#define RuneMask 0x1FFFFFu
-
-/* utf8 helpers */
-int utf8·fullrune(byte *s, int n);
-byte *utf8·findrune(byte *s, long i);
-byte *utf8·findrrune(byte* s, long c);
-int utf8·bytetorune(rune *r, byte *s);
-int utf8·runetobyte(byte *s, rune *r);
-int utf8·len(byte *s);
-int utf8·runelen(rune r);
-int utf8·isletter(rune r);
-int utf8·isdigit(rune r);
-int utf8·isspace(rune r);
-int utf8·istitle(rune r);
-
// -----------------------------------------------------------------------------
// i/o
@@ -270,7 +238,7 @@ enum
bufio·end = 1 << 3,
};
-struct io·Buffer
+struct io·Buffer
{
int state;
int runesize;
@@ -288,8 +256,6 @@ error bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h);
void bufio·finireader(io·Buffer *buf);
int bufio·getbyte(io·Buffer *buf);
error bufio·ungetbyte(io·Buffer *buf, byte c);
-rune bufio·getrune(io·Buffer *buf);
-error bufio·ungetrune(io·Buffer *buf, rune r);
int bufio·read(io·Buffer *buf, int sz, int n, void *out);
// -----------------------------------------------------------------------------
@@ -432,7 +398,7 @@ uint64 rng·poisson(double mean);
/* from plan9 libc */
-#define ERRMAX 128 /* max length of error string */
+#define ERRMAX 128 /* max length of error string */
#define SET(x) ((x)=0)
#define USED(x) if(x){}else{}
@@ -444,17 +410,18 @@ uint64 rng·poisson(double mean);
#endif
extern char *argv0;
+int arg·bytetorune(uint32*, byte *);
#define ARGBEGIN for((argv0?0:(argv0=*argv)),argv++,argc--; \
argv[0] && argv[0][0]=='-' && argv[0][1]; \
argc--, argv++) { \
byte *_args, *_argt; \
- rune _argc; \
+ uint32 _argc; \
_args = &argv[0][1]; \
if(_args[0]=='-' && _args[1]==0){ \
argc--; argv++; break; \
} \
_argc = 0; \
- while(*_args && (_args += utf8·bytetorune(&_argc, _args)))\
+ while(*_args && (_args += arg·bytetorune(&_argc, _args)))\
switch(_argc)
#define ARGEND SET(_argt);USED(_argt);USED(_argc);USED(_args);}USED(argv);USED(argc);
#define ARGF() (_argt=_args, _args="",\
diff --git a/include/libunicode.h b/include/libunicode.h
new file mode 100644
index 0000000..e017ac5
--- /dev/null
+++ b/include/libunicode.h
@@ -0,0 +1,36 @@
+#pragma once
+
+typedef uint32 rune;
+
+/*
+ * We have to use the preprocessor to ensure
+ * we have unsigned constants. Unfortunate...
+ */
+
+#define UTFmax 4
+#define RuneSync 0x80u
+#define RuneSelf 0x80u
+#define RuneErr 0xFFFDu
+#define RuneMax 0x10FFFFu
+#define RuneMask 0x1FFFFFu
+
+/*
+ * UTF-8 functions.
+ */
+int utf8·len(char *s);
+int utf8·runelen(rune r);
+
+int utf8·fullrune(char *s, int n);
+char *utf8·findrune(char *s, long i);
+char *utf8·findrrune(char* s, long c);
+int utf8·bytetorune(rune *r, char *s);
+int utf8·runetobyte(char *s, rune *r);
+
+int utf8·isletter(rune r);
+int utf8·isdigit(rune r);
+int utf8·isspace(rune r);
+int utf8·istitle(rune r);
+
+rune utf8·toupper(rune r);
+rune utf8·tolower(rune r);
+rune utf8·totitle(rune r);