diff options
author | Nicholas <nbnoll@eml.cc> | 2021-11-20 10:53:19 -0800 |
---|---|---|
committer | Nicholas <nbnoll@eml.cc> | 2021-11-20 10:53:19 -0800 |
commit | a9bfe650038afea8b751175cac16f6027345e45f (patch) | |
tree | 9a7f9feb76a64bb3efe573036d80b7bdbf8a59a5 /src/base/utf/vendor/common.c | |
parent | 1c8d4e69205fd875f6bec3fa3bd929c2e7f52f62 (diff) |
Chore: reorganize libutf and libfmt into base
I found the split to be arbitrary. Better to include the functionality
in the standard library. I also split the headers to allow for more
granular inclusion (but the library is still monolithic). The only
ugliness is the circular dependency introduced with libutf's generated
functions. We put explicit prereqs with the necessary object files
instead.
Diffstat (limited to 'src/base/utf/vendor/common.c')
-rw-r--r-- | src/base/utf/vendor/common.c | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c new file mode 100644 index 0000000..fcf1177 --- /dev/null +++ b/src/base/utf/vendor/common.c @@ -0,0 +1,220 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// input functions + +int +parse(io·Stream *io, int nfield, char **field, int len, char *line) +{ + int n; + if((n=io·readln(io, len, line)) <= 0) + return ParseEOF; + + if(n == len) + panicf("line too long"); + + if(line[n-1] != '\n') + panicf("invalid line: expected '\n', found '%c'", line[n]); + + line[n-1] = 0; + + if(line[0] == '#' || line[0] == 0) + return ParseSkip; + + /* tokenize line into fields */ + n = 0; + field[n] = line; + while(*line){ + if(*line == ';'){ + *line = 0; + field[++n] = line+1; + } + line++; + } + + if(n != nfield-1) + panicf("expected %d number of fields, got %d: %s", nfield, n, line); + + return ParseOK; +} + +int +codepoint(char *s) +{ + int c, b; + + c = 0; + while((b=*s++)){ + c <<= 4; + if(b >= '0' && b <= '9') + c += b - '0'; + else if(b >= 'A' && b <= 'F') + c += b - 'A' + 10; + else + panicf("bad codepoint char '%c'", b); + } + + return c; +} + +void +codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop) +{ + int e, c; + char *other[NumFields], line[1024]; + + // XXX: the stop variable passes in the previous stopping character + e = *stop; + c = codepoint(field[Fcode]); + + if(c >= NumRunes) + panicf("unexpected large codepoint %x", c); + if(c <= e) + panicf("bad code sequence: %x then %x", e, c); + e = c; + + if(strstr(field[Fname], ", First>") != nil){ + if(!parse(utf8, arrlen(other), other, arrlen(line), line)) + panicf("range start at end of file"); + if(strstr(other[Fname], ", Last>") == nil) + panicf("range start not followed by range end"); + + e = codepoint(other[Fcode]); + + if(e <= c) + panicf("bad code sequence: %x then %x", c, e); + if(strcmp(field[Fcategory], other[Fcategory]) != 0) + panicf("range with mismatched category"); + } + + *start = c; + *stop = e; +} + +// ----------------------------------------------------------------------- +// output functions + +void +putsearch(void) +{ + puts( + "#include <u.h>\n" + "#include <base/utf.h>\n" + "\n" + "static\n" + "rune*\n" + "rangesearch(rune c, rune *t, int n, int ne)\n" + "{\n" + " rune *p;\n" + " int m;\n" + " while(n > 1) {\n" + " m = n >> 1;\n" + " p = t + m*ne;\n" + " if(c >= p[0]){\n" + " t = p;\n" + " n = n-m;\n" + " }else\n" + " n = m;\n" + " }\n" + " if(n && c >= t[0])\n" + " return t;\n" + " return 0;\n" + "}\n" + ); + +} + +int +putrange(char *ident, char *prop, int force) +{ + int l, r, start; + + start = 0; + for(l = 0; l < NumRunes;) { + if(!prop[l]){ + l++; + continue; + } + + for(r = l+1; r < NumRunes; r++){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(force || r > l + 1){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r-1); + } + + l = r; + } + + if(start) + printf("};\n\n"); + + return start; +} + +int +putpair(char *ident, char *prop) +{ + int l, r, start; + + start = 0; + for(l=0; l+2 < NumRunes; ){ + if(!prop[l]){ + l++; + continue; + } + + for(r = l + 2; r < NumRunes; r += 2){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(r != l + 2){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r - 2); + } + + l = r; + } + + if(start) + printf("};\n\n"); + return start; +} + +int +putsingle(char *ident, char *prop) +{ + int i, start; + + start = 0; + for(i = 0; i < NumRunes; i++) { + if(!prop[i]) + continue; + + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[i] = 0; + printf("\t0x%.4x,\n", i); + } + + if(start) + printf("};\n\n"); + + return start; +} |