From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libutf/vendor/common.c | 220 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 src/libutf/vendor/common.c (limited to 'src/libutf/vendor/common.c') diff --git a/src/libutf/vendor/common.c b/src/libutf/vendor/common.c new file mode 100644 index 0000000..5a03a50 --- /dev/null +++ b/src/libutf/vendor/common.c @@ -0,0 +1,220 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// input functions + +int +parse(io·Stream *io, int nfield, char **field, int len, char *line) +{ + int n; + if((n=io·readln(io, len, line)) <= 0) + return ParseEOF; + + if(n == len) + panicf("line too long"); + + if(line[n-1] != '\n') + panicf("invalid line: expected '\n', found '%c'", line[n]); + + line[n-1] = 0; + + if(line[0] == '#' || line[0] == 0) + return ParseSkip; + + /* tokenize line into fields */ + n = 0; + field[n] = line; + while(*line){ + if(*line == ';'){ + *line = 0; + field[++n] = line+1; + } + line++; + } + + if(n != nfield-1) + panicf("expected %d number of fields, got %d: %s", nfield, n, line); + + return ParseOK; +} + +int +codepoint(char *s) +{ + int c, b; + + c = 0; + while((b=*s++)){ + c <<= 4; + if(b >= '0' && b <= '9') + c += b - '0'; + else if(b >= 'A' && b <= 'F') + c += b - 'A' + 10; + else + panicf("bad codepoint char '%c'", b); + } + + return c; +} + +void +codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop) +{ + int e, c; + char *other[NumFields], line[1024]; + + // XXX: the stop variable passes in the previous stopping character + e = *stop; + c = codepoint(field[Fcode]); + + if(c >= NumRunes) + panicf("unexpected large codepoint %x", c); + if(c <= e) + panicf("bad code sequence: %x then %x", e, c); + e = c; + + if(strstr(field[Fname], ", First>") != nil){ + if(!parse(utf8, arrlen(other), other, arrlen(line), line)) + panicf("range start at end of file"); + if(strstr(other[Fname], ", Last>") == nil) + panicf("range start not followed by range end"); + + e = codepoint(other[Fcode]); + + if(e <= c) + panicf("bad code sequence: %x then %x", c, e); + if(strcmp(field[Fcategory], other[Fcategory]) != 0) + panicf("range with mismatched category"); + } + + *start = c; + *stop = e; +} + +// ----------------------------------------------------------------------- +// output functions + +void +putsearch(void) +{ + puts( + "#include \n" + "#include \n" + "\n" + "static\n" + "rune*\n" + "rangesearch(rune c, rune *t, int n, int ne)\n" + "{\n" + " rune *p;\n" + " int m;\n" + " while(n > 1) {\n" + " m = n >> 1;\n" + " p = t + m*ne;\n" + " if(c >= p[0]){\n" + " t = p;\n" + " n = n-m;\n" + " }else\n" + " n = m;\n" + " }\n" + " if(n && c >= t[0])\n" + " return t;\n" + " return 0;\n" + "}\n" + ); + +} + +int +putrange(char *ident, char *prop, int force) +{ + int l, r, start; + + start = 0; + for(l = 0; l < NumRunes;) { + if(!prop[l]){ + l++; + continue; + } + + for(r = l+1; r < NumRunes; r++){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(force || r > l + 1){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r-1); + } + + l = r; + } + + if(start) + printf("};\n\n"); + + return start; +} + +int +putpair(char *ident, char *prop) +{ + int l, r, start; + + start = 0; + for(l=0; l+2 < NumRunes; ){ + if(!prop[l]){ + l++; + continue; + } + + for(r = l + 2; r < NumRunes; r += 2){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(r != l + 2){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r - 2); + } + + l = r; + } + + if(start) + printf("};\n\n"); + return start; +} + +int +putsingle(char *ident, char *prop) +{ + int i, start; + + start = 0; + for(i = 0; i < NumRunes; i++) { + if(!prop[i]) + continue; + + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[i] = 0; + printf("\t0x%.4x,\n", i); + } + + if(start) + printf("};\n\n"); + + return start; +} -- cgit v1.2.1