From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libutf/vendor/mkrunewidth.c | 325 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 src/libutf/vendor/mkrunewidth.c (limited to 'src/libutf/vendor/mkrunewidth.c') diff --git a/src/libutf/vendor/mkrunewidth.c b/src/libutf/vendor/mkrunewidth.c new file mode 100644 index 0000000..14e6973 --- /dev/null +++ b/src/libutf/vendor/mkrunewidth.c @@ -0,0 +1,325 @@ +#include "common.h" + +/* + * inspired by design choices in utf8proc/charwidths.jl + * all widths default to 1 unless they fall within the categories: + * 1. Mn 2. Mc 3. Me 4. Zl + * 5. Zp 6. Cc 7. Cf 8. Cs + * these default to zero width + */ +enum +{ + /* width ? */ + WidthNeutral, /* (N) practially treated like narrow but unclear ... */ + WidthAmbiguous, /* (A) sometimes wide and sometimes not... */ + /* width 1 */ + WidthHalf, /* (H) = to narrow (compatability equivalent) */ + WidthNarrow, /* (Na) ASCII width */ + /* width 2 */ + WidthWide, /* (W) 2x width */ + WidthFull, /* (F) = to wide (compatability equivalent) */ +}; + +struct Table +{ + char width[3][NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +void +parse_category(char *path) +{ + int sc, c, ec, w; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + if(!(utf8 = io·open(path, "r"))) + panicf("can't open %s\n", path); + + // NOTE: we don't check for comments here + ec = -1; + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + codepointrange(utf8, field, &sc, &ec); + + prop = field[Fcategory]; + + switch(prop[0]){ + case 'M': + switch(prop[1]){ + case 'n': case 'c': case 'e': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'Z': + switch(prop[1]){ + case 'l': case 'p': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'C': + switch(prop[1]){ + case 'c': case 'f': case 's': + w = 0; + break; + default: + w = 1; + break; + } + default: + w = 1; + } + + for(c = sc; c <= ec; c++) + table.width[w][c] = 1; + } + + io·close(utf8); +} + +static +void +coderange(char *field, int *l, int *r) +{ + char *s; + + if(!(s = strstr(field, ".."))) + *l=*r=codepoint(field); + else{ + *s++ = 0, *s++ = 0; + *l=codepoint(field); + *r=codepoint(s); + } +} + +static +void +parse_eawidths(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + switch(field[1][0]){ + case 'A': continue; + case 'N': + if(field[1][1] != 'a') + continue; + /* fallthrough */ + case 'H': w = 1; break; + + case 'W': /* fallthrough */ + case 'F': w = 2; break; + + default: + panicf("malformed east asian width class: %s\n", field[1]); + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + /* ensure it only exists in one table */ + table.width[w][c] = 1; + table.width[(w+1)%3][c] = 0; + table.width[(w+2)%3][c] = 0; + } + } + io·close(utf8); +} + +static +void +parse_emoji(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *s, *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + /* only override emoji presentation */ + if(!strstr(field[1], "Emoji_Presentation")) + continue; + + /* trim trailing space */ + for(s=field[0]; *s; s++){ + if(*s == ' ') + *s = 0; + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + table.width[0][c] = 0; + table.width[1][c] = 0; + table.width[2][c] = 1; + } + } + + io·close(utf8); +} + +/* output functions */ +static +void +maketable(char *label, char *table, int pairs, int onlyranges) +{ + int r, p=0, s=0; + char ident[3][128]; + + enum + { + Irange, + Ipair, + Isingle, + }; + + /* ranges */ + if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange])) + panicf("out of identifier space\n"); + r = putrange(ident[Irange], table, onlyranges); + + if(!onlyranges && pairs){ + if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair])) + panicf("out of identifier space\n"); + p = putpair(ident[Ipair], table); + } + if(!onlyranges){ + if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle])) + panicf("out of identifier space\n"); + + s = putsingle(ident[Isingle], table); + } + + printf( + "static int\n" + "is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(r){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + ident[Irange], ident[Irange]); + } + + if(p){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + ident[Ipair], ident[Ipair]); + } + + if(s) + printf( + " p = rangesearch(c, %s, arrlen(%s), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + ident[Isingle], ident[Isingle]); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunewidth \n"); + exit(1); +} + +#define SETW0(c) \ + table.width[0][(c)] = 1, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 0; + +#define SETW1(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 1, \ + table.width[2][(c)] = 0; + +#define SETW2(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 1; + + +int +main(int argc, char *argv[]) +{ + int c; + + ARGBEGIN{ + }ARGEND; + + if(argc != 3) + usage(); + + parse_category(*argv++); + parse_eawidths(*argv++); + parse_emoji(*argv); + + /* overrides */ + SETW0(0x2028); + SETW0(0x2029); + + SETW1(0x00AD); + + /* simple checking */ + for(c=0; c 1) + panicf("improper table state"); + } + + putsearch(); + + maketable("width0", table.width[0], 1, 0); + maketable("width1", table.width[1], 1, 0); + maketable("width2", table.width[2], 1, 0); + + puts( + "\n" + "int\n" + "utf8·runewidth(rune c)\n" + "{\n" + " if(iswidth1(c))\n" + " return 1;\n" + " if(iswidth2(c))\n" + " return 2;\n" + " return 0;\n" + "}" + ); +} -- cgit v1.2.1