aboutsummaryrefslogtreecommitdiff
path: root/src/libutf/vendor/mkrunetype.c
diff options
context:
space:
mode:
authorNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
committerNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
commita9bfe650038afea8b751175cac16f6027345e45f (patch)
tree9a7f9feb76a64bb3efe573036d80b7bdbf8a59a5 /src/libutf/vendor/mkrunetype.c
parent1c8d4e69205fd875f6bec3fa3bd929c2e7f52f62 (diff)
Chore: reorganize libutf and libfmt into base
I found the split to be arbitrary. Better to include the functionality in the standard library. I also split the headers to allow for more granular inclusion (but the library is still monolithic). The only ugliness is the circular dependency introduced with libutf's generated functions. We put explicit prereqs with the necessary object files instead.
Diffstat (limited to 'src/libutf/vendor/mkrunetype.c')
-rw-r--r--src/libutf/vendor/mkrunetype.c390
1 files changed, 0 insertions, 390 deletions
diff --git a/src/libutf/vendor/mkrunetype.c b/src/libutf/vendor/mkrunetype.c
deleted file mode 100644
index b33df32..0000000
--- a/src/libutf/vendor/mkrunetype.c
+++ /dev/null
@@ -1,390 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// globals
-
-#define OFFSET (1 << 20)
-#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x))
-
-// TODO: use bitarrays. will reduce executable size 8x
-struct Table
-{
- /* properties */
- char isspace[NumRunes];
- char isalpha[NumRunes];
- char ismark[NumRunes];
- char isdigit[NumRunes];
- char isupper[NumRunes];
- char islower[NumRunes];
- char istitle[NumRunes];
- char ispunct[NumRunes];
- char issymbl[NumRunes];
- char iscntrl[NumRunes];
-
- char combine[NumRunes];
-
- /* transformations */
- int toupper[NumRunes];
- int tolower[NumRunes];
- int totitle[NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-int
-isrange(char *label, char *prop, int force)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putrange(ident, prop, force);
-}
-
-static
-int
-ispair(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putpair(ident, prop);
-}
-
-static
-int
-issingle(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putsingle(ident, prop);
-}
-
-static
-void
-makeis(char *label, char *table, int pairs, int onlyranges)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = isrange(label, table, onlyranges);
- if(!onlyranges && pairs)
- hasp = ispair(label, table);
- if(!onlyranges)
- hass = issingle(label, table);
-
- printf(
- "int\n"
- "utf8·is%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr){
- printf(
- " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return 1;\n",
- label, label);
- }
-
- if(hasp){
- printf(
- " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return 1;\n",
- label, label);
- }
-
- if(hass)
- printf(
- " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
- " if(p && c == p[0])\n"
- " return 1;\n",
- label, label);
-
- printf(
- " return 0;\n"
- "}\n"
- "\n");
-}
-
-static
-int
-torange(char *label, int *index, int force)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- for(r = l+1; r < NumRunes; r++){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(force || r != l + 1){
- if(!start){
- printf("static rune to%s_range[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
- }
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-topair(char *label, int *index)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l + 2 < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad delta %d", d);
-
- for(r = l+2; r < NumRunes; r += 2){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(r > l+2){
- if(!start){
- printf("static rune to%s_pair[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
- }
-
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-tosingle(char *label, int *index)
-{
- int i, d, start = 0;
-
- for(i=0; i < NumRunes; i++) {
- if(index[i] == i)
- continue;
-
- d = DELTA(index[i], i);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- if(!start){
- printf("static rune to%s_single[] = {\n", label);
- start = 1;
- }
- index[i] = i;
- printf("\t0x%.4x, %d,\n", i, d);
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-void
-mkto(char *label, int *index, int pairs, int onlyrange)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = torange(label, index, !onlyrange);
- if(!onlyrange && pairs)
- hasp = topair(label, index);
- if(!onlyrange)
- hass = tosingle(label, index);
-
- printf(
- "rune\n"
- "utf8·to%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr)
- printf(
- " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hasp)
- printf(
- " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hass)
- printf(
- " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
- " if(p && c == p[0])\n"
- " return c + p[1] - %d;\n",
- label, label, OFFSET);
-
-
- printf(
- " return c;\n"
- "}\n"
- "\n"
- );
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- int i, sc, c, ec;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
-
- ARGBEGIN{
- }ARGEND;
-
- if(argc != 1)
- usage();
-
- if(!(utf8 = io·open(argv[0], "r")))
- panicf("can't open %s\n", argv[0]);
-
- /* by default each character maps to itself */
- for(i = 0; i < NumRunes; i++) {
- table.toupper[i] = i;
- table.tolower[i] = i;
- table.totitle[i] = i;
- }
-
- /* ensure all C local white space characters pass */
- table.isspace['\t'] = 1;
- table.isspace['\n'] = 1;
- table.isspace['\r'] = 1;
- table.isspace['\f'] = 1;
- table.isspace['\v'] = 1;
- table.isspace[0x85] = 1;
-
- ec = -1;
- // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- /* parse unicode range */
- codepointrange(utf8, field, &sc, &ec);
- prop = field[Fcategory];
-
- for(c = sc; c <= ec; c++){
- /* grab properties */
- switch(prop[0]){
- case 'L':
- table.isalpha[c] = 1;
- switch(prop[1]){
- case 'u': table.isupper[c] = 1; break;
- case 'l': table.islower[c] = 1; break;
- case 't': table.istitle[c] = 1; break;
- case 'm': break; // modifier letters
- case 'o': break; // ideograph letters
- default:
- goto badproperty;
- }
- break;
-
- case 'Z':
- table.isspace[c] = 1;
- break;
-
- case 'M':
- table.ismark[c] = 1;
- break;
-
- case 'N':
- table.isdigit[c] = 1;
- break;
-
- case 'P':
- table.ispunct[c] = 1;
- break;
-
- case 'S':
- table.issymbl[c] = 1;
- break;
-
- case 'C':
- table.iscntrl[c] = 1;
- break;
-
- default: badproperty:
- panicf("unrecognized category '%s'", prop);
- }
- /* grab transformations */
- if(*field[Fupper])
- table.toupper[c] = codepoint(field[Fupper]);
- if(*field[Flower])
- table.tolower[c] = codepoint(field[Flower]);
- if(*field[Ftitle])
- table.totitle[c] = codepoint(field[Ftitle]);
- }
- }
- io·close(utf8);
-
- putsearch();
-
- makeis("space", table.isspace, 0, 1);
- makeis("digit", table.isdigit, 0, 1);
- makeis("alpha", table.isalpha, 0, 0);
- makeis("upper", table.isupper, 1, 0);
- makeis("lower", table.islower, 1, 0);
- makeis("title", table.istitle, 1, 0);
- makeis("punct", table.ispunct, 1, 0);
-
- mkto("upper", table.toupper, 1, 0);
- mkto("lower", table.tolower, 1, 0);
- mkto("title", table.totitle, 1, 0);
-
- return 0;
-}