aboutsummaryrefslogtreecommitdiff
path: root/sys/libunicode/vendor/mkrunetype.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libunicode/vendor/mkrunetype.c')
-rw-r--r--sys/libunicode/vendor/mkrunetype.c388
1 files changed, 0 insertions, 388 deletions
diff --git a/sys/libunicode/vendor/mkrunetype.c b/sys/libunicode/vendor/mkrunetype.c
deleted file mode 100644
index 9f939f4..0000000
--- a/sys/libunicode/vendor/mkrunetype.c
+++ /dev/null
@@ -1,388 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// globals
-
-#define OFFSET (1 << 20)
-#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x))
-
-// TODO: use bitarrays. will reduce executable size 8x
-struct Table
-{
- /* properties */
- char isspace[NumRunes];
- char isalpha[NumRunes];
- char ismark[NumRunes];
- char isdigit[NumRunes];
- char isupper[NumRunes];
- char islower[NumRunes];
- char istitle[NumRunes];
- char ispunct[NumRunes];
- char issymbl[NumRunes];
- char iscntrl[NumRunes];
-
- char combine[NumRunes];
-
- /* transformations */
- int toupper[NumRunes];
- int tolower[NumRunes];
- int totitle[NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-int
-isrange(char *label, char *prop, int force)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putrange(ident, prop, force);
-}
-
-static
-int
-ispair(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putpair(ident, prop);
-}
-
-static
-int
-issingle(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putsingle(ident, prop);
-}
-
-static
-void
-makeis(char *label, char *table, int pairs, int onlyranges)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = isrange(label, table, onlyranges);
- if(!onlyranges && pairs)
- hasp = ispair(label, table);
- if(!onlyranges)
- hass = issingle(label, table);
-
- printf(
- "int\n"
- "utf8·is%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr){
- printf(
- " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return 1;\n",
- label, label);
- }
-
- if(hasp){
- printf(
- " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return 1;\n",
- label, label);
- }
-
- if(hass)
- printf(
- " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
- " if(p && c == p[0])\n"
- " return 1;\n",
- label, label);
-
- printf(
- " return 0;\n"
- "}\n"
- "\n");
-}
-
-static
-int
-torange(char *label, int *index, int force)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- for(r = l+1; r < NumRunes; r++){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(force || r != l + 1){
- if(!start){
- printf("static rune to%s_range[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
- }
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-topair(char *label, int *index)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l + 2 < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad delta %d", d);
-
- for(r = l+2; r < NumRunes; r += 2){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(r > l+2){
- if(!start){
- printf("static rune to%s_pair[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
- }
-
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-tosingle(char *label, int *index)
-{
- int i, d, start = 0;
-
- for(i=0; i < NumRunes; i++) {
- if(index[i] == i)
- continue;
-
- d = DELTA(index[i], i);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- if(!start){
- printf("static rune to%s_single[] = {\n", label);
- start = 1;
- }
- index[i] = i;
- printf("\t0x%.4x, %d,\n", i, d);
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-void
-mkto(char *label, int *index, int pairs, int onlyrange)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = torange(label, index, !onlyrange);
- if(!onlyrange && pairs)
- hasp = topair(label, index);
- if(!onlyrange)
- hass = tosingle(label, index);
-
- printf(
- "rune\n"
- "utf8·to%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr)
- printf(
- " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hasp)
- printf(
- " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hass)
- printf(
- " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
- " if(p && c == p[0])\n"
- " return c + p[1] - %d;\n",
- label, label, OFFSET);
-
-
- printf(
- " return c;\n"
- "}\n"
- "\n"
- );
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- int i, sc, c, ec;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
-
- ARGBEGIN{
- }ARGEND;
-
- if(argc != 1)
- usage();
-
- if(!(utf8 = io·open(argv[0], "r")))
- panicf("can't open %s\n", argv[0]);
-
- /* by default each character maps to itself */
- for(i = 0; i < NumRunes; i++) {
- table.toupper[i] = i;
- table.tolower[i] = i;
- table.totitle[i] = i;
- }
-
- /* ensure all C local white space characters pass */
- table.isspace['\t'] = 1;
- table.isspace['\n'] = 1;
- table.isspace['\r'] = 1;
- table.isspace['\f'] = 1;
- table.isspace['\v'] = 1;
- table.isspace[0x85] = 1;
-
- ec = -1;
- // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- /* parse unicode range */
- codepointrange(utf8, field, &sc, &ec);
- prop = field[Fcategory];
-
- for(c = sc; c <= ec; c++){
- /* grab properties */
- switch(prop[0]){
- case 'L':
- table.isalpha[c] = 1;
- switch(prop[1]){
- case 'u': table.isupper[c] = 1; break;
- case 'l': table.islower[c] = 1; break;
- case 't': table.istitle[c] = 1; break;
- case 'm': break; // modifier letters
- case 'o': break; // ideograph letters
- default:
- goto badproperty;
- }
- break;
-
- case 'Z':
- table.isspace[c] = 1;
- break;
-
- case 'M':
- table.ismark[c] = 1;
- break;
-
- case 'N':
- table.isdigit[c] = 1;
- break;
-
- case 'P':
- table.ispunct[c] = 1;
- break;
-
- case 'S':
- table.issymbl[c] = 1;
- break;
-
- case 'C':
- table.iscntrl[c] = 1;
- break;
-
- default: badproperty:
- panicf("unrecognized category '%s'", prop);
- }
- /* grab transformations */
- if(*field[Fupper])
- table.toupper[c] = codepoint(field[Fupper]);
- if(*field[Flower])
- table.tolower[c] = codepoint(field[Flower]);
- if(*field[Ftitle])
- table.totitle[c] = codepoint(field[Ftitle]);
- }
- }
- io·close(utf8);
-
- putsearch();
-
- makeis("space", table.isspace, 0, 1);
- makeis("digit", table.isdigit, 0, 1);
- makeis("alpha", table.isalpha, 0, 0);
- makeis("upper", table.isupper, 1, 0);
- makeis("lower", table.islower, 1, 0);
- makeis("title", table.istitle, 1, 0);
- makeis("punct", table.ispunct, 1, 0);
-
- mkto("upper", table.toupper, 1, 0);
- mkto("lower", table.tolower, 1, 0);
- mkto("title", table.totitle, 1, 0);
-}