From d584897456ef419f066be962f2c2628eeaef75bc Mon Sep 17 00:00:00 2001 From: Nicholas Date: Thu, 11 Nov 2021 08:21:23 -0800 Subject: chore: forgot to move old generators --- sys/libutf/vendor/mkrunewidth.c | 280 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 sys/libutf/vendor/mkrunewidth.c (limited to 'sys/libutf/vendor/mkrunewidth.c') diff --git a/sys/libutf/vendor/mkrunewidth.c b/sys/libutf/vendor/mkrunewidth.c new file mode 100644 index 0000000..43feb5e --- /dev/null +++ b/sys/libutf/vendor/mkrunewidth.c @@ -0,0 +1,280 @@ +#include "common.h" + +/* + * inspired by design choices in utf8proc/charwidths.jl + * all widths default to 1 unless they fall within the categories: + * 1. Mn 2. Mc 3. Me 4. Zl + * 5. Zp 6. Cc 7. Cf 8. Cs + * these default to zero width + */ +enum +{ + /* width ? */ + WidthNeutral, /* (N) practially treated like narrow but unclear ... */ + WidthAmbiguous, /* (A) sometimes wide and sometimes not... */ + /* width 1 */ + WidthHalf, /* (H) = to narrow (compatability equivalent) */ + WidthNarrow, /* (Na) ASCII width */ + /* width 2 */ + WidthWide, /* (W) 2x width */ + WidthFull, /* (F) = to wide (compatability equivalent) */ +}; + +struct Table +{ + char width[3][NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +void +parse_category(char *path) +{ + int sc, c, ec, w; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + if(!(utf8 = io·open(path, "r"))) + panicf("can't open %s\n", path); + + // NOTE: we don't check for comments here + ec = -1; + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + codepointrange(utf8, field, &sc, &ec); + + prop = field[Fcategory]; + + switch(prop[0]){ + case 'M': + switch(prop[1]){ + case 'n': case 'c': case 'e': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'Z': + switch(prop[1]){ + case 'l': case 'p': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'C': + switch(prop[1]){ + case 'c': case 'f': case 's': + w = 0; + break; + default: + w = 1; + break; + } + default: + w = 1; + } + + for(c = sc; c <= ec; c++) + table.width[w][c] = 1; + } + + io·close(utf8); +} + +static +void +parse_eawidths(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *s, *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + switch(field[1][0]){ + case 'A': continue; + case 'N': + if(field[1][1] != 'a') + continue; + /* fallthrough */ + case 'H': w = 1; break; + + case 'W': /* fallthrough */ + case 'F': w = 2; break; + + default: + panicf("malformed east asian width class: %s\n", field[1]); + } + + if(!(s = strstr(field[0], ".."))) + l=codepoint(field[0]), r=l; + else{ + *s++ = 0, *s++ = 0; + l=codepoint(field[0]); + r=codepoint(s); + } + + for(c=l; c <= r; c++){ + /* ensure it only exists in one table */ + table.width[w][c] = 1; + table.width[(w+1)%3][c] = 0; + table.width[(w+2)%3][c] = 0; + } + } + io·close(utf8); +} + +static +void +maketable(char *label, char *table, int pairs, int onlyranges) +{ + int r, p=0, s=0; + char ident[3][128]; + + enum + { + Irange, + Ipair, + Isingle, + }; + + /* ranges */ + if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange])) + panicf("out of identifier space\n"); + r = putrange(ident[Irange], table, onlyranges); + + if(!onlyranges && pairs){ + if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair])) + panicf("out of identifier space\n"); + p = putpair(ident[Ipair], table); + } + if(!onlyranges){ + if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle])) + panicf("out of identifier space\n"); + + s = putsingle(ident[Isingle], table); + } + + printf( + "static int\n" + "is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(r){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + ident[Irange], ident[Irange]); + } + + if(p){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + ident[Ipair], ident[Ipair]); + } + + if(s) + printf( + " p = rangesearch(c, %s, arrlen(%s), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + ident[Isingle], ident[Isingle]); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + + + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunewidth \n"); + exit(1); +} + +#define SETW0(c) \ + table.width[0][(c)] = 1, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 0; + +#define SETW1(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 1, \ + table.width[2][(c)] = 0; + +#define SETW2(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 1; + + +int +main(int argc, char *argv[]) +{ + int c; + + ARGBEGIN{ + }ARGEND; + + if(argc != 2) + usage(); + + parse_category(*argv++); + parse_eawidths(*argv); + + /* overrides */ + SETW0(0x2028); + SETW0(0x2029); + + SETW1(0x00AD); + + /* simple checking */ + for(c=0; c 1) + panicf("improper table state"); + } + + putsearch(); + + maketable("width0", table.width[0], 1, 0); + maketable("width1", table.width[1], 1, 0); + maketable("width2", table.width[2], 1, 0); + + puts( + "\n" + "int\n" + "utf8·runewidth(rune c)\n" + "{\n" + " if(iswidth1(c))\n" + " return 1;\n" + " if(iswidth2(c))\n" + " return 2;\n" + " return 0;\n" + "}" + ); +} -- cgit v1.2.1 From da8ac7ebf766051245a14da1542634c6ab788457 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Thu, 11 Nov 2021 08:31:46 -0800 Subject: chore: updated old copy of mkrunewidth --- sys/libutf/vendor/mkrunewidth.c | 71 +++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 13 deletions(-) (limited to 'sys/libutf/vendor/mkrunewidth.c') diff --git a/sys/libutf/vendor/mkrunewidth.c b/sys/libutf/vendor/mkrunewidth.c index 43feb5e..14e6973 100644 --- a/sys/libutf/vendor/mkrunewidth.c +++ b/sys/libutf/vendor/mkrunewidth.c @@ -89,6 +89,21 @@ parse_category(char *path) io·close(utf8); } +static +void +coderange(char *field, int *l, int *r) +{ + char *s; + + if(!(s = strstr(field, ".."))) + *l=*r=codepoint(field); + else{ + *s++ = 0, *s++ = 0; + *l=codepoint(field); + *r=codepoint(s); + } +} + static void parse_eawidths(char *path) @@ -96,7 +111,7 @@ parse_eawidths(char *path) int at, w; int l, c, r; io·Stream *utf8; - char *s, *field[2], line[1024]; + char *field[2], line[1024]; utf8 = io·open(path, "r"); while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ @@ -118,13 +133,7 @@ parse_eawidths(char *path) panicf("malformed east asian width class: %s\n", field[1]); } - if(!(s = strstr(field[0], ".."))) - l=codepoint(field[0]), r=l; - else{ - *s++ = 0, *s++ = 0; - l=codepoint(field[0]); - r=codepoint(s); - } + coderange(field[0], &l, &r); for(c=l; c <= r; c++){ /* ensure it only exists in one table */ @@ -136,6 +145,43 @@ parse_eawidths(char *path) io·close(utf8); } +static +void +parse_emoji(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *s, *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + /* only override emoji presentation */ + if(!strstr(field[1], "Emoji_Presentation")) + continue; + + /* trim trailing space */ + for(s=field[0]; *s; s++){ + if(*s == ' ') + *s = 0; + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + table.width[0][c] = 0; + table.width[1][c] = 0; + table.width[2][c] = 1; + } + } + + io·close(utf8); +} + +/* output functions */ static void maketable(char *label, char *table, int pairs, int onlyranges) @@ -204,8 +250,6 @@ maketable(char *label, char *table, int pairs, int onlyranges) "\n"); } - - // ----------------------------------------------------------------------- // main point of entry @@ -213,7 +257,7 @@ static void usage(void) { - fprintf(stderr, "usage: mkrunewidth \n"); + fprintf(stderr, "usage: mkrunewidth \n"); exit(1); } @@ -241,11 +285,12 @@ main(int argc, char *argv[]) ARGBEGIN{ }ARGEND; - if(argc != 2) + if(argc != 3) usage(); parse_category(*argv++); - parse_eawidths(*argv); + parse_eawidths(*argv++); + parse_emoji(*argv); /* overrides */ SETW0(0x2028); -- cgit v1.2.1