From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/libutf/vendor/mkrunetype.c | 388 ----------------------------------------- 1 file changed, 388 deletions(-) delete mode 100644 sys/libutf/vendor/mkrunetype.c (limited to 'sys/libutf/vendor/mkrunetype.c') diff --git a/sys/libutf/vendor/mkrunetype.c b/sys/libutf/vendor/mkrunetype.c deleted file mode 100644 index 9f939f4..0000000 --- a/sys/libutf/vendor/mkrunetype.c +++ /dev/null @@ -1,388 +0,0 @@ -#include "common.h" - -// ----------------------------------------------------------------------- -// globals - -#define OFFSET (1 << 20) -#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x)) - -// TODO: use bitarrays. will reduce executable size 8x -struct Table -{ - /* properties */ - char isspace[NumRunes]; - char isalpha[NumRunes]; - char ismark[NumRunes]; - char isdigit[NumRunes]; - char isupper[NumRunes]; - char islower[NumRunes]; - char istitle[NumRunes]; - char ispunct[NumRunes]; - char issymbl[NumRunes]; - char iscntrl[NumRunes]; - - char combine[NumRunes]; - - /* transformations */ - int toupper[NumRunes]; - int tolower[NumRunes]; - int totitle[NumRunes]; -}; - -static struct Table table; - -// ----------------------------------------------------------------------- -// internal functions - -static -int -isrange(char *label, char *prop, int force) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putrange(ident, prop, force); -} - -static -int -ispair(char *label, char *prop) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putpair(ident, prop); -} - -static -int -issingle(char *label, char *prop) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putsingle(ident, prop); -} - -static -void -makeis(char *label, char *table, int pairs, int onlyranges) -{ - int hasr, hasp=0, hass=0; - - hasr = isrange(label, table, onlyranges); - if(!onlyranges && pairs) - hasp = ispair(label, table); - if(!onlyranges) - hass = issingle(label, table); - - printf( - "int\n" - "utf8·is%s(rune c)\n" - "{\n" - " rune *p;\n" - "\n", - label); - - if(hasr){ - printf( - " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1])\n" - " return 1;\n", - label, label); - } - - if(hasp){ - printf( - " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return 1;\n", - label, label); - } - - if(hass) - printf( - " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n" - " if(p && c == p[0])\n" - " return 1;\n", - label, label); - - printf( - " return 0;\n" - "}\n" - "\n"); -} - -static -int -torange(char *label, int *index, int force) -{ - int l, r, d, start = 0; - - for(l = 0; l < NumRunes; ){ - if(index[l] == l){ - l++; - continue; - } - - d = DELTA(index[l], l); - if(d != (rune)d) - panicf("bad map delta %d", d); - - for(r = l+1; r < NumRunes; r++){ - if(DELTA(index[r], r) != d) - break; - index[r] = r; - } - - if(force || r != l + 1){ - if(!start){ - printf("static rune to%s_range[] = {\n", label); - start = 1; - } - index[l] = l; - printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d); - } - l = r; - } - if(start) - printf("};\n\n"); - - return start; -} - -static -int -topair(char *label, int *index) -{ - int l, r, d, start = 0; - - for(l = 0; l + 2 < NumRunes; ){ - if(index[l] == l){ - l++; - continue; - } - - d = DELTA(index[l], l); - if(d != (rune)d) - panicf("bad delta %d", d); - - for(r = l+2; r < NumRunes; r += 2){ - if(DELTA(index[r], r) != d) - break; - index[r] = r; - } - - if(r > l+2){ - if(!start){ - printf("static rune to%s_pair[] = {\n", label); - start = 1; - } - index[l] = l; - printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d); - } - - l = r; - } - if(start) - printf("};\n\n"); - - return start; -} - -static -int -tosingle(char *label, int *index) -{ - int i, d, start = 0; - - for(i=0; i < NumRunes; i++) { - if(index[i] == i) - continue; - - d = DELTA(index[i], i); - if(d != (rune)d) - panicf("bad map delta %d", d); - - if(!start){ - printf("static rune to%s_single[] = {\n", label); - start = 1; - } - index[i] = i; - printf("\t0x%.4x, %d,\n", i, d); - } - if(start) - printf("};\n\n"); - - return start; -} - -static -void -mkto(char *label, int *index, int pairs, int onlyrange) -{ - int hasr, hasp=0, hass=0; - - hasr = torange(label, index, !onlyrange); - if(!onlyrange && pairs) - hasp = topair(label, index); - if(!onlyrange) - hass = tosingle(label, index); - - printf( - "rune\n" - "utf8·to%s(rune c)\n" - "{\n" - " rune *p;\n" - "\n", - label); - - if(hasr) - printf( - " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n" - " if(p && c >= p[0] && c <= p[1])\n" - " return c + p[2] - %d;\n", - label, label, OFFSET); - - if(hasp) - printf( - " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n" - " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return c + p[2] - %d;\n", - label, label, OFFSET); - - if(hass) - printf( - " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n" - " if(p && c == p[0])\n" - " return c + p[1] - %d;\n", - label, label, OFFSET); - - - printf( - " return c;\n" - "}\n" - "\n" - ); -} - -// ----------------------------------------------------------------------- -// main point of entry - -static -void -usage(void) -{ - fprintf(stderr, "usage: mkrunetype \n"); - exit(1); -} - -int -main(int argc, char *argv[]) -{ - int i, sc, c, ec; - io·Stream *utf8; - char *prop, *field[NumFields], line[1024]; - - ARGBEGIN{ - }ARGEND; - - if(argc != 1) - usage(); - - if(!(utf8 = io·open(argv[0], "r"))) - panicf("can't open %s\n", argv[0]); - - /* by default each character maps to itself */ - for(i = 0; i < NumRunes; i++) { - table.toupper[i] = i; - table.tolower[i] = i; - table.totitle[i] = i; - } - - /* ensure all C local white space characters pass */ - table.isspace['\t'] = 1; - table.isspace['\n'] = 1; - table.isspace['\r'] = 1; - table.isspace['\f'] = 1; - table.isspace['\v'] = 1; - table.isspace[0x85] = 1; - - ec = -1; - // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any - while(parse(utf8, arrlen(field), field, arrlen(line), line)){ - /* parse unicode range */ - codepointrange(utf8, field, &sc, &ec); - prop = field[Fcategory]; - - for(c = sc; c <= ec; c++){ - /* grab properties */ - switch(prop[0]){ - case 'L': - table.isalpha[c] = 1; - switch(prop[1]){ - case 'u': table.isupper[c] = 1; break; - case 'l': table.islower[c] = 1; break; - case 't': table.istitle[c] = 1; break; - case 'm': break; // modifier letters - case 'o': break; // ideograph letters - default: - goto badproperty; - } - break; - - case 'Z': - table.isspace[c] = 1; - break; - - case 'M': - table.ismark[c] = 1; - break; - - case 'N': - table.isdigit[c] = 1; - break; - - case 'P': - table.ispunct[c] = 1; - break; - - case 'S': - table.issymbl[c] = 1; - break; - - case 'C': - table.iscntrl[c] = 1; - break; - - default: badproperty: - panicf("unrecognized category '%s'", prop); - } - /* grab transformations */ - if(*field[Fupper]) - table.toupper[c] = codepoint(field[Fupper]); - if(*field[Flower]) - table.tolower[c] = codepoint(field[Flower]); - if(*field[Ftitle]) - table.totitle[c] = codepoint(field[Ftitle]); - } - } - io·close(utf8); - - putsearch(); - - makeis("space", table.isspace, 0, 1); - makeis("digit", table.isdigit, 0, 1); - makeis("alpha", table.isalpha, 0, 0); - makeis("upper", table.isupper, 1, 0); - makeis("lower", table.islower, 1, 0); - makeis("title", table.istitle, 1, 0); - makeis("punct", table.ispunct, 1, 0); - - mkto("upper", table.toupper, 1, 0); - mkto("lower", table.tolower, 1, 0); - mkto("title", table.totitle, 1, 0); -} -- cgit v1.2.1