aboutsummaryrefslogtreecommitdiff
path: root/sys/libunicode/vendor/mkrunewidth.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libunicode/vendor/mkrunewidth.c')
-rw-r--r--sys/libunicode/vendor/mkrunewidth.c325
1 files changed, 0 insertions, 325 deletions
diff --git a/sys/libunicode/vendor/mkrunewidth.c b/sys/libunicode/vendor/mkrunewidth.c
deleted file mode 100644
index 14e6973..0000000
--- a/sys/libunicode/vendor/mkrunewidth.c
+++ /dev/null
@@ -1,325 +0,0 @@
-#include "common.h"
-
-/*
- * inspired by design choices in utf8proc/charwidths.jl
- * all widths default to 1 unless they fall within the categories:
- * 1. Mn 2. Mc 3. Me 4. Zl
- * 5. Zp 6. Cc 7. Cf 8. Cs
- * these default to zero width
- */
-enum
-{
- /* width ? */
- WidthNeutral, /* (N) practially treated like narrow but unclear ... */
- WidthAmbiguous, /* (A) sometimes wide and sometimes not... */
- /* width 1 */
- WidthHalf, /* (H) = to narrow (compatability equivalent) */
- WidthNarrow, /* (Na) ASCII width */
- /* width 2 */
- WidthWide, /* (W) 2x width */
- WidthFull, /* (F) = to wide (compatability equivalent) */
-};
-
-struct Table
-{
- char width[3][NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-void
-parse_category(char *path)
-{
- int sc, c, ec, w;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
-
- if(!(utf8 = io·open(path, "r")))
- panicf("can't open %s\n", path);
-
- // NOTE: we don't check for comments here
- ec = -1;
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- codepointrange(utf8, field, &sc, &ec);
-
- prop = field[Fcategory];
-
- switch(prop[0]){
- case 'M':
- switch(prop[1]){
- case 'n': case 'c': case 'e':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- break;
- case 'Z':
- switch(prop[1]){
- case 'l': case 'p':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- break;
- case 'C':
- switch(prop[1]){
- case 'c': case 'f': case 's':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- default:
- w = 1;
- }
-
- for(c = sc; c <= ec; c++)
- table.width[w][c] = 1;
- }
-
- io·close(utf8);
-}
-
-static
-void
-coderange(char *field, int *l, int *r)
-{
- char *s;
-
- if(!(s = strstr(field, "..")))
- *l=*r=codepoint(field);
- else{
- *s++ = 0, *s++ = 0;
- *l=codepoint(field);
- *r=codepoint(s);
- }
-}
-
-static
-void
-parse_eawidths(char *path)
-{
- int at, w;
- int l, c, r;
- io·Stream *utf8;
- char *field[2], line[1024];
-
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
- if(at == ParseSkip)
- continue;
-
- switch(field[1][0]){
- case 'A': continue;
- case 'N':
- if(field[1][1] != 'a')
- continue;
- /* fallthrough */
- case 'H': w = 1; break;
-
- case 'W': /* fallthrough */
- case 'F': w = 2; break;
-
- default:
- panicf("malformed east asian width class: %s\n", field[1]);
- }
-
- coderange(field[0], &l, &r);
-
- for(c=l; c <= r; c++){
- /* ensure it only exists in one table */
- table.width[w][c] = 1;
- table.width[(w+1)%3][c] = 0;
- table.width[(w+2)%3][c] = 0;
- }
- }
- io·close(utf8);
-}
-
-static
-void
-parse_emoji(char *path)
-{
- int at, w;
- int l, c, r;
- io·Stream *utf8;
- char *s, *field[2], line[1024];
-
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
- if(at == ParseSkip)
- continue;
-
- /* only override emoji presentation */
- if(!strstr(field[1], "Emoji_Presentation"))
- continue;
-
- /* trim trailing space */
- for(s=field[0]; *s; s++){
- if(*s == ' ')
- *s = 0;
- }
-
- coderange(field[0], &l, &r);
-
- for(c=l; c <= r; c++){
- table.width[0][c] = 0;
- table.width[1][c] = 0;
- table.width[2][c] = 1;
- }
- }
-
- io·close(utf8);
-}
-
-/* output functions */
-static
-void
-maketable(char *label, char *table, int pairs, int onlyranges)
-{
- int r, p=0, s=0;
- char ident[3][128];
-
- enum
- {
- Irange,
- Ipair,
- Isingle,
- };
-
- /* ranges */
- if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange]))
- panicf("out of identifier space\n");
- r = putrange(ident[Irange], table, onlyranges);
-
- if(!onlyranges && pairs){
- if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair]))
- panicf("out of identifier space\n");
- p = putpair(ident[Ipair], table);
- }
- if(!onlyranges){
- if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle]))
- panicf("out of identifier space\n");
-
- s = putsingle(ident[Isingle], table);
- }
-
- printf(
- "static int\n"
- "is%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(r){
- printf(
- " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return 1;\n",
- ident[Irange], ident[Irange]);
- }
-
- if(p){
- printf(
- " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return 1;\n",
- ident[Ipair], ident[Ipair]);
- }
-
- if(s)
- printf(
- " p = rangesearch(c, %s, arrlen(%s), 1);\n"
- " if(p && c == p[0])\n"
- " return 1;\n",
- ident[Isingle], ident[Isingle]);
-
- printf(
- " return 0;\n"
- "}\n"
- "\n");
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n");
- exit(1);
-}
-
-#define SETW0(c) \
- table.width[0][(c)] = 1, \
- table.width[1][(c)] = 0, \
- table.width[2][(c)] = 0;
-
-#define SETW1(c) \
- table.width[0][(c)] = 0, \
- table.width[1][(c)] = 1, \
- table.width[2][(c)] = 0;
-
-#define SETW2(c) \
- table.width[0][(c)] = 0, \
- table.width[1][(c)] = 0, \
- table.width[2][(c)] = 1;
-
-
-int
-main(int argc, char *argv[])
-{
- int c;
-
- ARGBEGIN{
- }ARGEND;
-
- if(argc != 3)
- usage();
-
- parse_category(*argv++);
- parse_eawidths(*argv++);
- parse_emoji(*argv);
-
- /* overrides */
- SETW0(0x2028);
- SETW0(0x2029);
-
- SETW1(0x00AD);
-
- /* simple checking */
- for(c=0; c<NumRunes; c++){
- if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1)
- panicf("improper table state");
- }
-
- putsearch();
-
- maketable("width0", table.width[0], 1, 0);
- maketable("width1", table.width[1], 1, 0);
- maketable("width2", table.width[2], 1, 0);
-
- puts(
- "\n"
- "int\n"
- "utf8·runewidth(rune c)\n"
- "{\n"
- " if(iswidth1(c))\n"
- " return 1;\n"
- " if(iswidth2(c))\n"
- " return 2;\n"
- " return 0;\n"
- "}"
- );
-}