aboutsummaryrefslogtreecommitdiff
path: root/src/base/utf/vendor
diff options
context:
space:
mode:
authorNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
committerNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
commita9bfe650038afea8b751175cac16f6027345e45f (patch)
tree9a7f9feb76a64bb3efe573036d80b7bdbf8a59a5 /src/base/utf/vendor
parent1c8d4e69205fd875f6bec3fa3bd929c2e7f52f62 (diff)
Chore: reorganize libutf and libfmt into base
I found the split to be arbitrary. Better to include the functionality in the standard library. I also split the headers to allow for more granular inclusion (but the library is still monolithic). The only ugliness is the circular dependency introduced with libutf's generated functions. We put explicit prereqs with the necessary object files instead.
Diffstat (limited to 'src/base/utf/vendor')
-rw-r--r--src/base/utf/vendor/common.c220
-rw-r--r--src/base/utf/vendor/common.h45
-rw-r--r--src/base/utf/vendor/mkgraphemedata.c24
-rw-r--r--src/base/utf/vendor/mkrunetype.c390
-rw-r--r--src/base/utf/vendor/mkrunewidth.c325
5 files changed, 1004 insertions, 0 deletions
diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c
new file mode 100644
index 0000000..fcf1177
--- /dev/null
+++ b/src/base/utf/vendor/common.c
@@ -0,0 +1,220 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// input functions
+
+int
+parse(io·Stream *io, int nfield, char **field, int len, char *line)
+{
+ int n;
+ if((n=io·readln(io, len, line)) <= 0)
+ return ParseEOF;
+
+ if(n == len)
+ panicf("line too long");
+
+ if(line[n-1] != '\n')
+ panicf("invalid line: expected '\n', found '%c'", line[n]);
+
+ line[n-1] = 0;
+
+ if(line[0] == '#' || line[0] == 0)
+ return ParseSkip;
+
+ /* tokenize line into fields */
+ n = 0;
+ field[n] = line;
+ while(*line){
+ if(*line == ';'){
+ *line = 0;
+ field[++n] = line+1;
+ }
+ line++;
+ }
+
+ if(n != nfield-1)
+ panicf("expected %d number of fields, got %d: %s", nfield, n, line);
+
+ return ParseOK;
+}
+
+int
+codepoint(char *s)
+{
+ int c, b;
+
+ c = 0;
+ while((b=*s++)){
+ c <<= 4;
+ if(b >= '0' && b <= '9')
+ c += b - '0';
+ else if(b >= 'A' && b <= 'F')
+ c += b - 'A' + 10;
+ else
+ panicf("bad codepoint char '%c'", b);
+ }
+
+ return c;
+}
+
+void
+codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
+{
+ int e, c;
+ char *other[NumFields], line[1024];
+
+ // XXX: the stop variable passes in the previous stopping character
+ e = *stop;
+ c = codepoint(field[Fcode]);
+
+ if(c >= NumRunes)
+ panicf("unexpected large codepoint %x", c);
+ if(c <= e)
+ panicf("bad code sequence: %x then %x", e, c);
+ e = c;
+
+ if(strstr(field[Fname], ", First>") != nil){
+ if(!parse(utf8, arrlen(other), other, arrlen(line), line))
+ panicf("range start at end of file");
+ if(strstr(other[Fname], ", Last>") == nil)
+ panicf("range start not followed by range end");
+
+ e = codepoint(other[Fcode]);
+
+ if(e <= c)
+ panicf("bad code sequence: %x then %x", c, e);
+ if(strcmp(field[Fcategory], other[Fcategory]) != 0)
+ panicf("range with mismatched category");
+ }
+
+ *start = c;
+ *stop = e;
+}
+
+// -----------------------------------------------------------------------
+// output functions
+
+void
+putsearch(void)
+{
+ puts(
+ "#include <u.h>\n"
+ "#include <base/utf.h>\n"
+ "\n"
+ "static\n"
+ "rune*\n"
+ "rangesearch(rune c, rune *t, int n, int ne)\n"
+ "{\n"
+ " rune *p;\n"
+ " int m;\n"
+ " while(n > 1) {\n"
+ " m = n >> 1;\n"
+ " p = t + m*ne;\n"
+ " if(c >= p[0]){\n"
+ " t = p;\n"
+ " n = n-m;\n"
+ " }else\n"
+ " n = m;\n"
+ " }\n"
+ " if(n && c >= t[0])\n"
+ " return t;\n"
+ " return 0;\n"
+ "}\n"
+ );
+
+}
+
+int
+putrange(char *ident, char *prop, int force)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l = 0; l < NumRunes;) {
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l+1; r < NumRunes; r++){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(force || r > l + 1){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+int
+putpair(char *ident, char *prop)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l=0; l+2 < NumRunes; ){
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l + 2; r < NumRunes; r += 2){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(r != l + 2){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+ return start;
+}
+
+int
+putsingle(char *ident, char *prop)
+{
+ int i, start;
+
+ start = 0;
+ for(i = 0; i < NumRunes; i++) {
+ if(!prop[i])
+ continue;
+
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[i] = 0;
+ printf("\t0x%.4x,\n", i);
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
diff --git a/src/base/utf/vendor/common.h b/src/base/utf/vendor/common.h
new file mode 100644
index 0000000..51a53bd
--- /dev/null
+++ b/src/base/utf/vendor/common.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <u.h>
+#include <base.h>
+
+enum
+{
+ // Fields inside UnicodeData.txt
+ Fcode,
+ Fname,
+ Fcategory,
+ Fcombine,
+ Fbidir,
+ Fdecomp,
+ Fdecimal,
+ Fdigit,
+ Fnumeric,
+ Fmirror,
+ Foldname,
+ Fcomment,
+ Fupper,
+ Flower,
+ Ftitle,
+
+ NumFields,
+ NumRunes = 1 << 21,
+};
+
+/* input functions */
+enum
+{
+ ParseEOF,
+ ParseOK,
+ ParseSkip,
+};
+
+int parse(io·Stream *io, int nfield, char **field, int len, char *line);
+int codepoint(char *s);
+void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop);
+
+/* output functions */
+void putsearch(void);
+int putrange(char *ident, char *prop, int force);
+int putpair(char *ident, char *prop);
+int putsingle(char *ident, char *prop);
diff --git a/src/base/utf/vendor/mkgraphemedata.c b/src/base/utf/vendor/mkgraphemedata.c
new file mode 100644
index 0000000..ce5a952
--- /dev/null
+++ b/src/base/utf/vendor/mkgraphemedata.c
@@ -0,0 +1,24 @@
+#include <u.h>
+#include <base.h>
+#include <libutf.h>
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+ fprintf(stderr, "usage: mkgraphemedata <GraphemeBreakProperty.txt>\n");
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ io·Stream *utf8;
+ char line[1024];
+
+ ARGBEGIN{
+ }ARGEND;
+}
diff --git a/src/base/utf/vendor/mkrunetype.c b/src/base/utf/vendor/mkrunetype.c
new file mode 100644
index 0000000..b33df32
--- /dev/null
+++ b/src/base/utf/vendor/mkrunetype.c
@@ -0,0 +1,390 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// globals
+
+#define OFFSET (1 << 20)
+#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x))
+
+// TODO: use bitarrays. will reduce executable size 8x
+struct Table
+{
+ /* properties */
+ char isspace[NumRunes];
+ char isalpha[NumRunes];
+ char ismark[NumRunes];
+ char isdigit[NumRunes];
+ char isupper[NumRunes];
+ char islower[NumRunes];
+ char istitle[NumRunes];
+ char ispunct[NumRunes];
+ char issymbl[NumRunes];
+ char iscntrl[NumRunes];
+
+ char combine[NumRunes];
+
+ /* transformations */
+ int toupper[NumRunes];
+ int tolower[NumRunes];
+ int totitle[NumRunes];
+};
+
+static struct Table table;
+
+// -----------------------------------------------------------------------
+// internal functions
+
+static
+int
+isrange(char *label, char *prop, int force)
+{
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
+ panicf("out of identifier space\n");
+
+ return putrange(ident, prop, force);
+}
+
+static
+int
+ispair(char *label, char *prop)
+{
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
+ panicf("out of identifier space\n");
+
+ return putpair(ident, prop);
+}
+
+static
+int
+issingle(char *label, char *prop)
+{
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
+ panicf("out of identifier space\n");
+
+ return putsingle(ident, prop);
+}
+
+static
+void
+makeis(char *label, char *table, int pairs, int onlyranges)
+{
+ int hasr, hasp=0, hass=0;
+
+ hasr = isrange(label, table, onlyranges);
+ if(!onlyranges && pairs)
+ hasp = ispair(label, table);
+ if(!onlyranges)
+ hass = issingle(label, table);
+
+ printf(
+ "int\n"
+ "utf8·is%s(rune c)\n"
+ "{\n"
+ " rune *p;\n"
+ "\n",
+ label);
+
+ if(hasr){
+ printf(
+ " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return 1;\n",
+ label, label);
+ }
+
+ if(hasp){
+ printf(
+ " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+ " return 1;\n",
+ label, label);
+ }
+
+ if(hass)
+ printf(
+ " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
+ " if(p && c == p[0])\n"
+ " return 1;\n",
+ label, label);
+
+ printf(
+ " return 0;\n"
+ "}\n"
+ "\n");
+}
+
+static
+int
+torange(char *label, int *index, int force)
+{
+ int l, r, d, start = 0;
+
+ for(l = 0; l < NumRunes; ){
+ if(index[l] == l){
+ l++;
+ continue;
+ }
+
+ d = DELTA(index[l], l);
+ if(d != (rune)d)
+ panicf("bad map delta %d", d);
+
+ for(r = l+1; r < NumRunes; r++){
+ if(DELTA(index[r], r) != d)
+ break;
+ index[r] = r;
+ }
+
+ if(force || r != l + 1){
+ if(!start){
+ printf("static rune to%s_range[] = {\n", label);
+ start = 1;
+ }
+ index[l] = l;
+ printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
+ }
+ l = r;
+ }
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+static
+int
+topair(char *label, int *index)
+{
+ int l, r, d, start = 0;
+
+ for(l = 0; l + 2 < NumRunes; ){
+ if(index[l] == l){
+ l++;
+ continue;
+ }
+
+ d = DELTA(index[l], l);
+ if(d != (rune)d)
+ panicf("bad delta %d", d);
+
+ for(r = l+2; r < NumRunes; r += 2){
+ if(DELTA(index[r], r) != d)
+ break;
+ index[r] = r;
+ }
+
+ if(r > l+2){
+ if(!start){
+ printf("static rune to%s_pair[] = {\n", label);
+ start = 1;
+ }
+ index[l] = l;
+ printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
+ }
+
+ l = r;
+ }
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+static
+int
+tosingle(char *label, int *index)
+{
+ int i, d, start = 0;
+
+ for(i=0; i < NumRunes; i++) {
+ if(index[i] == i)
+ continue;
+
+ d = DELTA(index[i], i);
+ if(d != (rune)d)
+ panicf("bad map delta %d", d);
+
+ if(!start){
+ printf("static rune to%s_single[] = {\n", label);
+ start = 1;
+ }
+ index[i] = i;
+ printf("\t0x%.4x, %d,\n", i, d);
+ }
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+static
+void
+mkto(char *label, int *index, int pairs, int onlyrange)
+{
+ int hasr, hasp=0, hass=0;
+
+ hasr = torange(label, index, !onlyrange);
+ if(!onlyrange && pairs)
+ hasp = topair(label, index);
+ if(!onlyrange)
+ hass = tosingle(label, index);
+
+ printf(
+ "rune\n"
+ "utf8·to%s(rune c)\n"
+ "{\n"
+ " rune *p;\n"
+ "\n",
+ label);
+
+ if(hasr)
+ printf(
+ " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return c + p[2] - %d;\n",
+ label, label, OFFSET);
+
+ if(hasp)
+ printf(
+ " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
+ " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+ " return c + p[2] - %d;\n",
+ label, label, OFFSET);
+
+ if(hass)
+ printf(
+ " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
+ " if(p && c == p[0])\n"
+ " return c + p[1] - %d;\n",
+ label, label, OFFSET);
+
+
+ printf(
+ " return c;\n"
+ "}\n"
+ "\n"
+ );
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+ fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int i, sc, c, ec;
+ io·Stream *utf8;
+ char *prop, *field[NumFields], line[1024];
+
+ ARGBEGIN{
+ }ARGEND;
+
+ if(argc != 1)
+ usage();
+
+ if(!(utf8 = io·open(argv[0], "r")))
+ panicf("can't open %s\n", argv[0]);
+
+ /* by default each character maps to itself */
+ for(i = 0; i < NumRunes; i++) {
+ table.toupper[i] = i;
+ table.tolower[i] = i;
+ table.totitle[i] = i;
+ }
+
+ /* ensure all C local white space characters pass */
+ table.isspace['\t'] = 1;
+ table.isspace['\n'] = 1;
+ table.isspace['\r'] = 1;
+ table.isspace['\f'] = 1;
+ table.isspace['\v'] = 1;
+ table.isspace[0x85] = 1;
+
+ ec = -1;
+ // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
+ while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+ /* parse unicode range */
+ codepointrange(utf8, field, &sc, &ec);
+ prop = field[Fcategory];
+
+ for(c = sc; c <= ec; c++){
+ /* grab properties */
+ switch(prop[0]){
+ case 'L':
+ table.isalpha[c] = 1;
+ switch(prop[1]){
+ case 'u': table.isupper[c] = 1; break;
+ case 'l': table.islower[c] = 1; break;
+ case 't': table.istitle[c] = 1; break;
+ case 'm': break; // modifier letters
+ case 'o': break; // ideograph letters
+ default:
+ goto badproperty;
+ }
+ break;
+
+ case 'Z':
+ table.isspace[c] = 1;
+ break;
+
+ case 'M':
+ table.ismark[c] = 1;
+ break;
+
+ case 'N':
+ table.isdigit[c] = 1;
+ break;
+
+ case 'P':
+ table.ispunct[c] = 1;
+ break;
+
+ case 'S':
+ table.issymbl[c] = 1;
+ break;
+
+ case 'C':
+ table.iscntrl[c] = 1;
+ break;
+
+ default: badproperty:
+ panicf("unrecognized category '%s'", prop);
+ }
+ /* grab transformations */
+ if(*field[Fupper])
+ table.toupper[c] = codepoint(field[Fupper]);
+ if(*field[Flower])
+ table.tolower[c] = codepoint(field[Flower]);
+ if(*field[Ftitle])
+ table.totitle[c] = codepoint(field[Ftitle]);
+ }
+ }
+ io·close(utf8);
+
+ putsearch();
+
+ makeis("space", table.isspace, 0, 1);
+ makeis("digit", table.isdigit, 0, 1);
+ makeis("alpha", table.isalpha, 0, 0);
+ makeis("upper", table.isupper, 1, 0);
+ makeis("lower", table.islower, 1, 0);
+ makeis("title", table.istitle, 1, 0);
+ makeis("punct", table.ispunct, 1, 0);
+
+ mkto("upper", table.toupper, 1, 0);
+ mkto("lower", table.tolower, 1, 0);
+ mkto("title", table.totitle, 1, 0);
+
+ return 0;
+}
diff --git a/src/base/utf/vendor/mkrunewidth.c b/src/base/utf/vendor/mkrunewidth.c
new file mode 100644
index 0000000..14e6973
--- /dev/null
+++ b/src/base/utf/vendor/mkrunewidth.c
@@ -0,0 +1,325 @@
+#include "common.h"
+
+/*
+ * inspired by design choices in utf8proc/charwidths.jl
+ * all widths default to 1 unless they fall within the categories:
+ * 1. Mn 2. Mc 3. Me 4. Zl
+ * 5. Zp 6. Cc 7. Cf 8. Cs
+ * these default to zero width
+ */
+enum
+{
+ /* width ? */
+ WidthNeutral, /* (N) practially treated like narrow but unclear ... */
+ WidthAmbiguous, /* (A) sometimes wide and sometimes not... */
+ /* width 1 */
+ WidthHalf, /* (H) = to narrow (compatability equivalent) */
+ WidthNarrow, /* (Na) ASCII width */
+ /* width 2 */
+ WidthWide, /* (W) 2x width */
+ WidthFull, /* (F) = to wide (compatability equivalent) */
+};
+
+struct Table
+{
+ char width[3][NumRunes];
+};
+
+static struct Table table;
+
+// -----------------------------------------------------------------------
+// internal functions
+
+static
+void
+parse_category(char *path)
+{
+ int sc, c, ec, w;
+ io·Stream *utf8;
+ char *prop, *field[NumFields], line[1024];
+
+ if(!(utf8 = io·open(path, "r")))
+ panicf("can't open %s\n", path);
+
+ // NOTE: we don't check for comments here
+ ec = -1;
+ while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+ codepointrange(utf8, field, &sc, &ec);
+
+ prop = field[Fcategory];
+
+ switch(prop[0]){
+ case 'M':
+ switch(prop[1]){
+ case 'n': case 'c': case 'e':
+ w = 0;
+ break;
+ default:
+ w = 1;
+ break;
+ }
+ break;
+ case 'Z':
+ switch(prop[1]){
+ case 'l': case 'p':
+ w = 0;
+ break;
+ default:
+ w = 1;
+ break;
+ }
+ break;
+ case 'C':
+ switch(prop[1]){
+ case 'c': case 'f': case 's':
+ w = 0;
+ break;
+ default:
+ w = 1;
+ break;
+ }
+ default:
+ w = 1;
+ }
+
+ for(c = sc; c <= ec; c++)
+ table.width[w][c] = 1;
+ }
+
+ io·close(utf8);
+}
+
+static
+void
+coderange(char *field, int *l, int *r)
+{
+ char *s;
+
+ if(!(s = strstr(field, "..")))
+ *l=*r=codepoint(field);
+ else{
+ *s++ = 0, *s++ = 0;
+ *l=codepoint(field);
+ *r=codepoint(s);
+ }
+}
+
+static
+void
+parse_eawidths(char *path)
+{
+ int at, w;
+ int l, c, r;
+ io·Stream *utf8;
+ char *field[2], line[1024];
+
+ utf8 = io·open(path, "r");
+ while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+ if(at == ParseSkip)
+ continue;
+
+ switch(field[1][0]){
+ case 'A': continue;
+ case 'N':
+ if(field[1][1] != 'a')
+ continue;
+ /* fallthrough */
+ case 'H': w = 1; break;
+
+ case 'W': /* fallthrough */
+ case 'F': w = 2; break;
+
+ default:
+ panicf("malformed east asian width class: %s\n", field[1]);
+ }
+
+ coderange(field[0], &l, &r);
+
+ for(c=l; c <= r; c++){
+ /* ensure it only exists in one table */
+ table.width[w][c] = 1;
+ table.width[(w+1)%3][c] = 0;
+ table.width[(w+2)%3][c] = 0;
+ }
+ }
+ io·close(utf8);
+}
+
+static
+void
+parse_emoji(char *path)
+{
+ int at, w;
+ int l, c, r;
+ io·Stream *utf8;
+ char *s, *field[2], line[1024];
+
+ utf8 = io·open(path, "r");
+ while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+ if(at == ParseSkip)
+ continue;
+
+ /* only override emoji presentation */
+ if(!strstr(field[1], "Emoji_Presentation"))
+ continue;
+
+ /* trim trailing space */
+ for(s=field[0]; *s; s++){
+ if(*s == ' ')
+ *s = 0;
+ }
+
+ coderange(field[0], &l, &r);
+
+ for(c=l; c <= r; c++){
+ table.width[0][c] = 0;
+ table.width[1][c] = 0;
+ table.width[2][c] = 1;
+ }
+ }
+
+ io·close(utf8);
+}
+
+/* output functions */
+static
+void
+maketable(char *label, char *table, int pairs, int onlyranges)
+{
+ int r, p=0, s=0;
+ char ident[3][128];
+
+ enum
+ {
+ Irange,
+ Ipair,
+ Isingle,
+ };
+
+ /* ranges */
+ if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange]))
+ panicf("out of identifier space\n");
+ r = putrange(ident[Irange], table, onlyranges);
+
+ if(!onlyranges && pairs){
+ if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair]))
+ panicf("out of identifier space\n");
+ p = putpair(ident[Ipair], table);
+ }
+ if(!onlyranges){
+ if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle]))
+ panicf("out of identifier space\n");
+
+ s = putsingle(ident[Isingle], table);
+ }
+
+ printf(
+ "static int\n"
+ "is%s(rune c)\n"
+ "{\n"
+ " rune *p;\n"
+ "\n",
+ label);
+
+ if(r){
+ printf(
+ " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1])\n"
+ " return 1;\n",
+ ident[Irange], ident[Irange]);
+ }
+
+ if(p){
+ printf(
+ " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
+ " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+ " return 1;\n",
+ ident[Ipair], ident[Ipair]);
+ }
+
+ if(s)
+ printf(
+ " p = rangesearch(c, %s, arrlen(%s), 1);\n"
+ " if(p && c == p[0])\n"
+ " return 1;\n",
+ ident[Isingle], ident[Isingle]);
+
+ printf(
+ " return 0;\n"
+ "}\n"
+ "\n");
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+ fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n");
+ exit(1);
+}
+
+#define SETW0(c) \
+ table.width[0][(c)] = 1, \
+ table.width[1][(c)] = 0, \
+ table.width[2][(c)] = 0;
+
+#define SETW1(c) \
+ table.width[0][(c)] = 0, \
+ table.width[1][(c)] = 1, \
+ table.width[2][(c)] = 0;
+
+#define SETW2(c) \
+ table.width[0][(c)] = 0, \
+ table.width[1][(c)] = 0, \
+ table.width[2][(c)] = 1;
+
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+
+ ARGBEGIN{
+ }ARGEND;
+
+ if(argc != 3)
+ usage();
+
+ parse_category(*argv++);
+ parse_eawidths(*argv++);
+ parse_emoji(*argv);
+
+ /* overrides */
+ SETW0(0x2028);
+ SETW0(0x2029);
+
+ SETW1(0x00AD);
+
+ /* simple checking */
+ for(c=0; c<NumRunes; c++){
+ if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1)
+ panicf("improper table state");
+ }
+
+ putsearch();
+
+ maketable("width0", table.width[0], 1, 0);
+ maketable("width1", table.width[1], 1, 0);
+ maketable("width2", table.width[2], 1, 0);
+
+ puts(
+ "\n"
+ "int\n"
+ "utf8·runewidth(rune c)\n"
+ "{\n"
+ " if(iswidth1(c))\n"
+ " return 1;\n"
+ " if(iswidth2(c))\n"
+ " return 2;\n"
+ " return 0;\n"
+ "}"
+ );
+}