diff options
Diffstat (limited to 'src/base')
40 files changed, 3719 insertions, 80 deletions
diff --git a/src/base/arg.c b/src/base/arg.c index 269043e..64e4dd6 100644 --- a/src/base/arg.c +++ b/src/base/arg.c @@ -1,71 +1 @@ -#include <u.h> -#include <base.h> - -// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN. - -#define UTFmax 4 -#define RuneSync 0x80u -#define RuneSelf 0x80u -#define RuneErr 0xFFFDu -#define RuneMax 0x10FFFFu -#define RuneMask 0x1FFFFFu - -#define Bit(i) (7-(i)) -/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */ -#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) -/* 0000 0000 0000 0111 1111 1111 */ -#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) -enum -{ - Bitx = Bit(1), - Tx = Tbyte(1), - Rune1 = (1 << (Bit(0)+0*Bitx)) - 1, - - Maskx = (1 << Bitx) - 1, /* 0011 1111 */ - Testx = Maskx ^ 0xff, /* 1100 0000 */ - - SurrogateMin = 0xD800, - SurrogateMax = 0xDFFF, - Bad = RuneErr, -}; - - -int -arg·bytetorune(uint32* r, byte* s) -{ - int c[4], i; - uint32 l; - - c[0] = *(ubyte*)(s); - if(c[0] < Tx) { - *r = c[0]; - return 1; - } - - l = c[0]; - for(i = 1; i < UTFmax; i++) { - c[i] = *(ubyte*)(s+i); - c[i] ^= Tx; - if (c[i] & Testx) goto bad; - - l = (l << Bitx) | c[i]; - if(c[0] < Tbyte(i + 2)) { - l &= RuneX(i + 1); - if (i == 1) { - if (c[0] < Tbyte(2) || l <= Rune1) - goto bad; - } else if (l <= RuneX(i) || l > RuneMax) - goto bad; - if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) - goto bad; - - *r = l; - return i + 1; - } - } -bad: - *r = RuneErr; - return 1; -} - char *argv0; diff --git a/src/base/fmt/buffer.c b/src/base/fmt/buffer.c new file mode 100644 index 0000000..0099e72 --- /dev/null +++ b/src/base/fmt/buffer.c @@ -0,0 +1,60 @@ +#include "internal.h" + +static int +flush(fmt·State *io) +{ + int n; + char *s; + + void *heap = io->heap; + mem·Reallocator mem = io->mem; + + if(!io->buffer.beg) + return 0; + + n = 2*(uintptr)io->file; + s = io->buffer.beg; + + io->buffer.beg = mem.realloc(heap, io->buffer.beg, n, 1); + if(!io->buffer.beg){ + io->file = io->buffer.cur = io->buffer.end = nil; + mem.free(heap, s); + return 0; + } + io->file = (void*)(uintptr)n; + io->buffer.cur = io->buffer.beg + (io->buffer.cur - s); + io->buffer.end = io->buffer.beg + n - 1; + + return 1; +} + +int +fmt·make(mem·Reallocator mem, void *heap, fmt·State *io) +{ + int n; + + memset(io, 0, sizeof(*io)); + + n = 32; + io->buffer.beg = io->buffer.cur = mem.alloc(heap, n, 1); + if(!io->buffer.beg) + return -1; + io->buffer.end = io->buffer.beg + n - 1; + + io->flush = flush; + io->file = (void*)(uintptr)n; + io->n = 0; + + fmt·setlocale(io, nil, nil, nil); + return 0; +} + +void +fmt·free(fmt·State *io) +{ + void *heap = io->heap; + mem·Reallocator mem = io->mem; + + mem.free(heap, io->buffer.beg); + io->buffer.beg = io->buffer.cur = io->buffer.end = nil; +} diff --git a/src/base/fmt/do.c b/src/base/fmt/do.c new file mode 100644 index 0000000..bd2e65c --- /dev/null +++ b/src/base/fmt/do.c @@ -0,0 +1,728 @@ +#include "internal.h" +#include <arch/atomic.h> + +#define MaxFmt 128 +#define atomic·load(p) (*(p)) + +// ----------------------------------------------------------------------- +// globals + +/* built in verbs */ +static int fmtflag(fmt·State *); +static int fmtpercent(fmt·State *); +static int fmtrune(fmt·State *); +static int fmtfloat(fmt·State *); +static int fmtutf8(fmt·State *); +static int fmtint(fmt·State *); +static int fmtchar(fmt·State *); +static int fmtcount(fmt·State *); +static int fmtstring(fmt·State *); +static int fmterror(fmt·State *); + +static int badfmt(fmt·State *); + +static struct +{ + volatile int len; + Verb verb[MaxFmt]; +} formatter = +{ + 30, + { + {' ', fmtflag}, + {'#', fmtflag}, + {'%', fmtpercent}, + {'\'',fmtflag}, + {'+', fmtflag}, + {',', fmtflag}, + {'-', fmtflag}, + {'C', fmtrune}, + {'E', fmtfloat}, + {'F', fmtfloat}, + {'G', fmtfloat}, + {'L', fmtflag}, + {'S', fmtutf8}, + {'X', fmtint}, + {'b', fmtint}, + {'c', fmtchar}, + {'d', fmtint}, + {'e', fmtfloat}, + {'f', fmtfloat}, + {'g', fmtfloat}, + {'h', fmtflag}, + {'i', fmtint}, + {'l', fmtflag}, + {'n', fmtcount}, + {'o', fmtint}, + {'p', fmtint}, + {'r', fmterror}, + {'s', fmtstring}, + {'U', fmtflag}, + {'u', fmtint}, + {'x', fmtint}, + } +}; + +// ----------------------------------------------------------------------- +// internal functions + +static Formatter +format(int c) +{ + Verb *v, *e; + e = &formatter.verb[atomic·load(&formatter.len)]; + for(v=e; v > formatter.verb; --v){ + if(v->c == c) + return v->fmt; + } + + return badfmt; +} + +static char * +dispatch(fmt·State *io, char *fmt) +{ + rune r; + int i, n; + + io->flag = 0; + io->width = io->prec = 0; + + /* + * the form of each print verb: + * % [flags] verb + * + the verb is a single character + * + each flag is either + * - a single character + * - a decimal numeric string + * - up to 2 decimal strings can be used + * - [width|*].[prec|*] + * - if missing, set to 0 + * - if *, grab from varargs + */ + for(;;){ + fmt += utf8·decode(fmt, &r); + io->verb = r; + switch(r){ + case 0: + return nil; + case '.': + io->flag |= fmt·Width|fmt·Prec; + continue; + case '0': + if(!(io->flag & fmt·Width)){ + io->flag |= fmt·Zero; + continue; + } + /* fallthrough */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + i = 0; + while('0' <= r && r <= '9'){ + i = 10*i + (r-'0'); + r = *fmt++; + } + fmt--; + number: + if(io->flag & fmt·Width){ + io->flag |= fmt·Prec; + io->prec = i; + }else{ + io->flag |= fmt·Width; + io->width = i; + } + continue; + case '*': + i = va_arg(io->args, int); + if(i < 0){ + if(io->flag&fmt·Prec){ + io->flag &= ~fmt·Prec; + io->prec = 0; + continue; + } + i = -i; + io->flag |= fmt·Left; + } + goto number; + } + n = format(r)(io); + if(n < 0) + return nil; + if(!n) + return fmt; + } +} + +static char * +flush(fmt·State *io, char *b, int len) +{ + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(!io->flush || !(*io->flush)(io) || io->buffer.cur + len >= io->buffer.end) { + io->buffer.end = io->buffer.cur; + return nil; + } + return io->buffer.cur; +} + +static int +pad(fmt·State *io, int n) +{ + int i; + char *b=io->buffer.cur, *e=io->buffer.end; + + for(i=0; i<n; i++){ + if(b>=e){ + if(!(b=flush(io, b, 1))) + return -1; + e = io->buffer.end; + } + *b++ = ' '; + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + return 0; +} + +static int +copy(fmt·State *io, char *m, int sz, int n) +{ + ulong f; + rune r; + int nc, w, nb; + char *b, *e, *me; + + w = 0; + f = io->flag; + me = m + sz; + + if(f&fmt·Width) + w = io->width; + if(f&fmt·Prec && n > io->prec) + n = io->prec; + if(!(f&fmt·Left) && pad(io, w-n)<0) + return -1; + + b = io->buffer.cur; + e = io->buffer.end; + + for(nc=n; nc>0; nc--){ + r = *(uchar *)m; + if(utf8·onebyte(r)){ + nb=1; + m++; + }else if((me-m) >= UTFmax || utf8·canfit(m, me-m)){ + nb=utf8·decode(m, &r); + m+=n; + }else + break; + + if(b+n>e){ + if(!(b=flush(io, b, nb))) + return -1; + e = io->buffer.end; + } + b += utf8·encode(&r, b); + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(f&fmt·Left && pad(io, w-n)<0) + return -1; + + return 0; +} + +static int +copyrune(fmt·State *io, rune *m, int n) +{ + ulong f; + rune r, *me; + int w, nb; + char *b, *e; + + w = 0; + f = io->flag; + + if(f&fmt·Width) + w = io->width; + if(f&fmt·Prec && n > io->prec) + n = io->prec; + + if(!(f&fmt·Left) && pad(io, w-n)<0) + return -1; + + b = io->buffer.cur; + e = io->buffer.end; + + for(me=m+n; m < me; m++){ + r = *m; + nb = utf8·runelen(r); + if(b + nb > e){ + if(!(b=flush(io, b, nb))) + return -1; + e = io->buffer.end; + } + b += utf8·encode(&r, b); + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(f&fmt·Left && pad(io, w-n)<0) + return -1; + + return 0; +} + +static int +copystring(fmt·State *io, char *s) +{ + rune r; + int i,j; + + if(!s) + return copy(io, "<nil>", 5, 5); + + if(io->flag&fmt·Prec){ + i = 0; + for(j=0; j < io->prec && s[i]; j++) + i += utf8·decode(s+i, &r); + + return copy(io, s, i, j); + } + return copy(io, s, strlen(s), utf8·len(s)); +} + +static int +copyutf8(fmt·State *io, rune *s) +{ + rune *e; + int n,p; + + if(!s) + return copy(io, "<nil>", 5, 5); + + if(io->flag & fmt·Prec){ + p = io->prec; + for(n=0; n<p; n++) + if(!s[n]) + break; + }else{ + for(e=s; *e; e++) + ; + n = e - s; + } + + return copyrune(io, s, n); +} + +// ----------------------------------------------------------------------- +// format helpers + +static int +needseperate(int *digits, char **groups) +{ + int group; + + (*digits)++; + group = *(uchar *)*groups; + + if(group == 0xFF || group == 0x7f || group == 0x00) + return 0; + if(*digits > group){ + if((*groups)[1] != 0) + (*groups)++; + *digits = 1; + return 1; + } + return 0; +} + +// ----------------------------------------------------------------------- +// formatters + +static int +fmtchar(fmt·State *io) +{ + char x[1]; + x[0] = va_arg(io->args, int); + io->prec = 1; + + return copy(io, x, 1, 1); +} + +static int +fmtstring(fmt·State *io) +{ + char *s; + s = va_arg(io->args, char *); + return copystring(io, s); +} + +static int +fmterror(fmt·State *io) +{ + char *s; + s = strerror(errno); + return copystring(io, s); +} + +static int +fmtrune(fmt·State *io) +{ + rune x[1]; + + x[0] = va_arg(io->args, int); + return copyrune(io, x, 1); +} + +static int +fmtutf8(fmt·State *io) +{ + rune *s; + + s = va_arg(io->args, rune *); + return copyutf8(io, s); +} + +static int +fmtpercent(fmt·State *io) +{ + rune x[1]; + + x[0] = io->verb; + io->prec = 1; + return copyrune(io, x, 1); +} + +static int +fmtint(fmt·State *io) +{ + union{ + ulong u; + uvlong v; + } val; + int neg, base, i, n, f, w, isv; + int digits, bytes, runes, excess; + char *groups, *thousands; + char *p, *conv, buf[140]; + + f = io->flag; + neg = 0; + isv = 0; + val.u = 0; + + switch(io->verb){ + case 'o': case 'p': case 'u': case 'x': case 'X': + f |= fmt·Unsigned; + f &= ~(fmt·Sign|fmt·Space); + } + + /* set flags */ + if(io->verb=='p'){ + val.u = (ulong)va_arg(io->args, void*); + io->verb = 'x'; + f |= fmt·Unsigned; + }else if(f&fmt·Vlong){ + isv=1; + if(f&fmt·Unsigned) + val.v = va_arg(io->args, uvlong); + else + val.v = va_arg(io->args, vlong); + }else if(f&fmt·Long){ + if(f&fmt·Unsigned) + val.u = va_arg(io->args, ulong); + else + val.u = va_arg(io->args, long); + }else if(f&fmt·Byte){ + if(f&fmt·Unsigned) + val.u = (uchar)va_arg(io->args, int); + else + val.u = (char)va_arg(io->args, int); + }else if(f&fmt·Short){ + if(f&fmt·Unsigned) + val.u = (ushort)va_arg(io->args, int); + else + val.u = (short)va_arg(io->args, int); + }else{ + if(f&fmt·Unsigned) + val.u = va_arg(io->args, uint); + else + val.u = va_arg(io->args, int); + } + + conv = "0123456789abcdef"; + groups = "\4"; + thousands = io->thousands; + /* get base */ + switch(io->verb){ + case 'd': case 'i': case 'u': + base = 10; + groups = io->groups; + break; + case 'X': + conv = "0123456789ABCDEF"; + /*fallthrough*/ + case 'x': + base = 16; + thousands = ":"; + break; + case 'b': + base = 2; + thousands = ":"; + break; + case 'o': + base = 8; + break; + default: + return -1; + } + + /* check for negativity */ + if(!(f&fmt·Unsigned)){ + if(isv && (vlong)val.v < 0){ + val.v = -(vlong)val.v; + neg = 1; + }else if(!isv && (long)val.u < 0){ + val.u = -(long)val.u; + neg = 1; + } + } + + p = buf + sizeof(buf) - 1; + n = 0; + digits = 0; + excess = 0; + runes = utf8·len(thousands); + bytes = strlen(thousands); + +#define PARSE(VALUE) \ + while((VALUE)){ \ + i = (VALUE) % base; \ + (VALUE) /= base; \ + if((f&fmt·Comma) && n%4 == 3){ \ + *p-- = ','; \ + n++; \ + } \ + if((f&fmt·Apost) && needseperate(&digits, &groups)){ \ + n += runes; \ + excess += bytes - runes; \ + p -= bytes; \ + memmove(p+1, thousands, bytes); \ + } \ + *p-- = conv[i]; \ + n++; \ + } + if(isv) + PARSE(val.v) + else + PARSE(val.u) +#undef PARSE + + if(!n){ + if(!(f&fmt·Prec) || io->prec != 0 || (io->verb == 'o' && (f&fmt·Sharp))){ + *p-- = '0'; + n = 1; + if(f&fmt·Apost) + needseperate(&digits,&groups); + } + + if(io->verb == 'x' || io->verb == 'X') + f &= ~fmt·Sharp; + } + + for(w = io->prec; n < w && p > buf+3; n++){ + if((f&fmt·Apost) && needseperate(&digits, &groups)){ + n += runes; + excess += bytes - runes; + p -= bytes; + memmove(p+1, thousands, bytes); + } + *p-- = '0'; + } + + if(neg || (f&(fmt·Sign|fmt·Space))) + n++; + + if(f&fmt·Sharp){ + if(base==16) + n += 2; + else if(base == 8){ + if(p[1] == '0') + f &= ~fmt·Sharp; + else + n++; + } + } + + if(f&fmt·Zero && !(f & (fmt·Left|fmt·Prec))){ + w = 0; + if(f & fmt·Width) + w = io->width; + for(; n < w && p > buf+3; n++){ + if((f & fmt·Apost) && needseperate(&digits, &groups)){ + n += runes; + excess += bytes - runes; + p -= bytes; + memmove(p+1, thousands, bytes); + } + *p-- = '0'; + } + io->flag &= ~fmt·Width; + } + + if(f&fmt·Sharp){ + if(base==16) + *p-- = io->verb; + if(base==16 || base == 8) + *p-- = '0'; + } + + if(neg) + *p-- = '-'; + else if(f & fmt·Sign) + *p-- = '+'; + else if (f & fmt·Space) + *p-- = ' '; + + io->flag &= ~fmt·Prec; + return copy(io, p+1, n+excess, n); +} + +static int +fmtcount(fmt·State *io) +{ + void *p; + ulong f; + + f = io->flag; + p = va_arg(io->args, void*); + + if(f&fmt·Vlong) + *(vlong*)p = io->n; + else if(f&fmt·Long) + *(long*)p = io->n; + else if(f&fmt·Byte) + *(char*)p = io->n; + else if(f&fmt·Short) + *(short*)p = io->n; + else + *(int*)p = io->n; + + return 0; +} + +static int +fmtflag(fmt·State *io) +{ + switch(io->verb){ + case ',': io->flag |= fmt·Comma; break; + case '-': io->flag |= fmt·Left; break; + case '+': io->flag |= fmt·Sign; break; + case '#': io->flag |= fmt·Sharp; break; + case '\'': io->flag |= fmt·Apost; break; + case ' ': io->flag |= fmt·Space; break; + case 'u': io->flag |= fmt·Unsigned; break; + case 'L': io->flag |= fmt·Ldouble; break; + case 'h': + if(io->flag&fmt·Short) + io->flag |= fmt·Byte; + io->flag |= fmt·Short; + break; + case 'l': + if(io->flag&fmt·Long) + io->flag |= fmt·Vlong; + io->flag |= fmt·Long; + break; + } + return 1; +} + +static int +badfmt(fmt·State *io) +{ + int n; + char x[UTFmax+2]; + + x[0] = '%'; + n = 1 + utf8·encode(&io->verb, x+1); + x[n++] = '%'; + io->prec = n; + copy(io, x, n, n); + + return 0; +} + +#include "float.c" + +// ----------------------------------------------------------------------- +// exports + +int +fmt·do(fmt·State *io, char *fmt) +{ + rune r; + int c, n; + char *b, *e; + + for(;;){ + b = io->buffer.cur; + e = io->buffer.end; + while((c = *(uchar *)fmt) && c != '%'){ + if(utf8·onebyte(c)){ + if(b >= e){ + if(!(b=flush(io, b, 1))) + return -1; + e = io->buffer.end; + } + *b++ = *fmt++; + }else{ + n = utf8·decode(fmt, &r); + if(b + n > e){ + if(!(b=flush(io, b, n))) + return -1; + e = io->buffer.end; + } + while(n--) + *b++ = *fmt++; + } + } + fmt++; + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(!c) /* we hit our nul terminator */ + return io->n - n; + io->buffer.end = e; + + if(!(fmt=dispatch(io, fmt))) + return -1; + } +} + +int +fmt·install(int verb, Formatter func) +{ + Verb *v; + int i, ret; + +lock: + if(verb <= 0 || verb >= 65536){ + ret = -1; + goto unlock; + } + if(!func) + func = badfmt; + + if((i = atomic·load(&formatter.len))==MaxFmt) + return -1; + + v = &formatter.verb[i]; + v->c = verb; + v->fmt = func; + + atomic·store(&formatter.len, i+1); + ret = 0; +unlock: + return ret; +} diff --git a/src/base/fmt/esprint.c b/src/base/fmt/esprint.c new file mode 100644 index 0000000..6d97340 --- /dev/null +++ b/src/base/fmt/esprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +char * +fmt·esprint(char *buf, char *end, char *fmt, ...) +{ + char *p; + va_list args; + + va_start(args, fmt); + p = fmt·vesprint(buf, end, fmt, args); + va_end(args); + + return p; +} diff --git a/src/base/fmt/float.c b/src/base/fmt/float.c new file mode 100644 index 0000000..63ea80f --- /dev/null +++ b/src/base/fmt/float.c @@ -0,0 +1,1077 @@ +#define FDIGIT 30 +#define FDEFLT 6 +#define NSIGNIF 17 + +static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; +static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; +static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; + +static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; + +static int +isNaN(double val) +{ + union{ + uvlong i; + double f; + }x; + + x.f = val; + return (x.i&uvinf) == uvinf && (x.i&~uvneginf) != 0; +} + +static double +NaN(void) +{ + union{ + uvlong i; + double f; + }x; + x.i = uvnan; + return x.f; +} + +static int +isInf(double val, int sign) +{ + union{ + uvlong i; + double f; + }x; + + x.f = val; + if(sign == 0) + return x.i == uvinf || x.i == uvneginf; + else if(sign == 1) + return x.i == uvinf; + else + return x.i == uvneginf; +} + +static double pows10[] = +{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, +}; + +static double +fpow10(int n) +{ + double d; + int neg; + + neg = 0; + if(n < 0){ + neg = 1; + n = -n; + } + + if(n<arrlen(pows10)) + d = pows10[n]; + else{ + d = pows10[arrlen(pows10)-1]; + for(;;){ + n -= arrlen(pows10)- 1; + if(n < arrlen(pows10)){ + d *= pows10[n]; + break; + } + d *= pows10[arrlen(pows10)- 1]; + } + } + if(neg) + return 1./d; + return d; +} + +static int +add1(char *a, int n) +{ + int c; + char *b; + + if(n < 0 || n > NSIGNIF) + return 0; + + for(b = a+n-1; b >= a; b--){ + c = *b + 1; + if(c <= '9'){ + *b = c; + return 0; + } + *b = '0'; + } + /* + * need to overflow adding digit. + * shift number down and insert 1 at beginning. + * decimal is known to be 0s or we wouldn't + * have gotten this far. (e.g., 99999+1 => 00000) + */ + a[0] = '1'; + return 1; +} + +static int +sub1(char *a, int n) +{ + int c; + char *b; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--){ + c = *b - 1; + if(c >= '0'){ + if(c == '0' && b == a){ + /* + * just zeroed the top digit; shift everyone up. + * decimal is known to be 9s or we wouldn't + * have gotten this far. (e.g., 10000-1 => 09999) + */ + *b = '9'; + return 1; + } + *b = c; + return 0; + } + *b = '9'; + } + /* + * can't get here. the number a is always normalized + * so that it has a nonzero first digit. + */ + abort(); +} + +// ----------------------------------------------------------------------- +// strtod + +#define Nbits 28 +#define Nmant 53 +#define Prec ((Nmant+Nbits+1)/Nbits) + +#define Sigbit (1<<(Prec*Nbits-Nmant)) /* first significant bit of Prec-th word */ +#define Ndig 1500 +#define One (ulong)(1<<Nbits) +#define Half (ulong)(One>>1) +#define Maxe 310 + +#define Fsign (1<<0) /* found - */ +#define Fesign (1<<1) /* found e- */ +#define Fdpoint (1<<2) /* found . */ + +#define S0 0 /* _ _S0 +S1 #S2 .S3 */ +#define S1 1 /* _+ #S2 .S3 */ +#define S2 2 /* _+# #S2 .S4 eS5 */ +#define S3 3 /* _+. #S4 */ +#define S4 4 /* _+#.# #S4 eS5 */ +#define S5 5 /* _+#.#e +S6 #S7 */ +#define S6 6 /* _+#.#e+ #S7 */ +#define S7 7 /* _+#.#e+# #S7 */ + +typedef struct Tab Tab; +struct Tab +{ + int bp; + int siz; + char *cmp; +}; + +static ulong +umuldiv(ulong a, ulong b, ulong c) +{ + double d; + + d = ((double)a * (double)b) / (double)c; + if(d >= 4294967295.) + d = 4294967295.; + return (ulong)d; +} + +static void +frnorm(ulong *f) +{ + int i, c; + + c = 0; + for(i=Prec-1; i>0; i--) { + f[i] += c; + c = f[i] >> Nbits; + f[i] &= One-1; + } + f[0] += c; +} + +static int +fpcmp(char *a, ulong* f) +{ + ulong tf[Prec]; + int i, d, c; + + for(i=0; i<Prec; i++) + tf[i] = f[i]; + + for(;;) { + /* tf *= 10 */ + for(i=0; i<Prec; i++) + tf[i] = tf[i]*10; + frnorm(tf); + d = (tf[0] >> Nbits) + '0'; + tf[0] &= One-1; + + /* compare next digit */ + c = *a; + if(c == 0) { + if('0' < d) + return -1; + if(tf[0] != 0) + goto cont; + for(i=1; i<Prec; i++) + if(tf[i] != 0) + goto cont; + return 0; + } + if(c > d) + return +1; + if(c < d) + return -1; + a++; + cont:; +} +} + +static void +divby(char *a, int *na, int b) +{ + int n, c; + char *p; + + p = a; + n = 0; + while(n>>b == 0){ + c = *a++; + if(c == 0) { + while(n) { + c = n*10; + if(c>>b) + break; + n = c; + } + goto xx; + } + n = n*10 + c-'0'; + (*na)--; + } + for(;;){ + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + c = *a++; + if(c == 0) + break; + n = n*10 + c-'0'; + } + (*na)++; + xx: + while(n){ + n = n*10; + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + (*na)++; + } + *p = 0; +} + +static Tab tab1[] = +{ + 1, 0, "", + 3, 1, "7", + 6, 2, "63", + 9, 3, "511", + 13, 4, "8191", + 16, 5, "65535", + 19, 6, "524287", + 23, 7, "8388607", + 26, 8, "67108863", + 27, 9, "134217727", +}; + +static void +divascii(char *a, int *na, int *dp, int *bp) +{ + int b, d; + Tab *t; + + d = *dp; + if(d >= (int)(arrlen(tab1))) + d = (int)(arrlen(tab1))-1; + t = tab1 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) > 0) + d--; + *dp -= d; + *bp += b; + divby(a, na, b); +} + +static void +mulby(char *a, char *p, char *q, int b) +{ + int n, c; + + n = 0; + *p = 0; + for(;;) { + q--; + if(q < a) + break; + c = *q - '0'; + c = (c<<b) + n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } + while(n) { + c = n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } +} + +static Tab tab2[] = +{ + 1, 1, "", /* dp = 0-0 */ + 3, 3, "125", + 6, 5, "15625", + 9, 7, "1953125", + 13, 10, "1220703125", + 16, 12, "152587890625", + 19, 14, "19073486328125", + 23, 17, "11920928955078125", + 26, 19, "1490116119384765625", + 27, 19, "7450580596923828125", /* dp 8-9 */ +}; + +static void +mulascii(char *a, int *na, int *dp, int *bp) +{ + char *p; + int d, b; + Tab *t; + + d = -*dp; + if(d >= (int)(arrlen(tab2))) + d = (int)(arrlen(tab2))-1; + t = tab2 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) < 0) + d--; + p = a + *na; + *bp -= b; + *dp += d; + *na += d; + mulby(a, p+d, p, b); +} + +static int +cmp(char *a, char *b) +{ + int c1, c2; + + while((c1 = *b++) != '\0') { + c2 = *a++; + if(isupper(c2)) + c2 = tolower(c2); + if(c1 != c2) + return 1; + } + return 0; +} + +double +fmtstrtod(char *as, char **aas) +{ + int na, ex, dp, bp, c, i, flag, state; + ulong low[Prec], hig[Prec], mid[Prec]; + double d; + char *s, a[Ndig]; + + flag = 0; /* Fsign, Fesign, Fdpoint */ + na = 0; /* number of digits of a[] */ + dp = 0; /* na of decimal point */ + ex = 0; /* exonent */ + + state = S0; + for(s=as;;s++){ + c = *s; + if('0' <= c && c <= '9'){ + switch(state){ + case S0: case S1: case S2: + state = S2; + break; + case S3: case S4: + state = S4; + break; + case S5: case S6: case S7: + state = S7; + ex = ex*10 + (c-'0'); + continue; + } + + if(na == 0 && c == '0'){ + dp--; + continue; + } + if(na < Ndig-50) + a[na++] = c; + continue; + } + switch(c){ + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + if(state == S0) + continue; + break; + case '-': + if(state == S0) + flag |= Fsign; + else + flag |= Fesign; + case '+': + if(state == S0) + state = S1; + else + if(state == S5) + state = S6; + else + break; /* syntax */ + continue; + case '.': + flag |= Fdpoint; + dp = na; + if(state == S0 || state == S1){ + state = S3; + continue; + } + if(state == S2){ + state = S4; + continue; + } + break; + case 'e': case 'E': + if(state == S2 || state == S4){ + state = S5; + continue; + } + break; + } + break; + } + + /* clean up return char-pointer */ + switch(state) { + case S0: + if(cmp(s, "nan") == 0){ + if(aas != nil) + *aas = s+3; + goto retnan; + } + case S1: + if(cmp(s, "infinity") == 0){ + if(aas != nil) + *aas = s+8; + goto retinf; + } + if(cmp(s, "inf") == 0){ + if(aas != nil) + *aas = s+3; + goto retinf; + } + case S3: + if(aas != nil) + *aas = as; + goto ret0; /* no digits found */ + case S6: + s--; /* back over +- */ + case S5: + s--; /* back over e */ + break; + } + if(aas != nil) + *aas = s; + + if(flag & Fdpoint) + while(na > 0 && a[na-1] == '0') + na--; + if(na == 0) + goto ret0; /* zero */ + a[na] = 0; + if(!(flag & Fdpoint)) + dp = na; + if(flag & Fesign) + ex = -ex; + dp += ex; + if(dp < -Maxe){ + errno = ERANGE; + goto ret0; /* underflow by exp */ + } else + if(dp > +Maxe) + goto retinf; /* overflow by exp */ + + /* + * normalize the decimal ascii number + * to range .[5-9][0-9]* e0 + */ + bp = 0; /* binary exponent */ + while(dp > 0) + divascii(a, &na, &dp, &bp); + while(dp < 0 || a[0] < '5') + mulascii(a, &na, &dp, &bp); + + /* close approx by naive conversion */ + mid[0] = 0; + mid[1] = 1; + for(i=0; (c=a[i]) != '\0'; i++) { + mid[0] = mid[0]*10 + (c-'0'); + mid[1] = mid[1]*10; + if(i >= 8) + break; + } + low[0] = umuldiv(mid[0], One, mid[1]); + hig[0] = umuldiv(mid[0]+1, One, mid[1]); + for(i=1; i<Prec; i++) { + low[i] = 0; + hig[i] = One-1; + } + + /* binary search for closest mantissa */ + for(;;) { + /* mid = (hig + low) / 2 */ + c = 0; + for(i=0; i<Prec; i++) { + mid[i] = hig[i] + low[i]; + if(c) + mid[i] += One; + c = mid[i] & 1; + mid[i] >>= 1; + } + frnorm(mid); + + /* compare */ + c = fpcmp(a, mid); + if(c > 0) { + c = 1; + for(i=0; i<Prec; i++) + if(low[i] != mid[i]) { + c = 0; + low[i] = mid[i]; + } + if(c) + break; /* between mid and hig */ + continue; + } + if(c < 0) { + for(i=0; i<Prec; i++) + hig[i] = mid[i]; + continue; + } + + /* only hard part is if even/odd roundings wants to go up */ + c = mid[Prec-1] & (Sigbit-1); + if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0) + mid[Prec-1] -= c; + break; /* exactly mid */ + } + + /* normal rounding applies */ + c = mid[Prec-1] & (Sigbit-1); + mid[Prec-1] -= c; + if(c >= Sigbit/2) { + mid[Prec-1] += Sigbit; + frnorm(mid); + } + goto out; + +ret0: + return 0; + +retnan: + return NaN(); + +retinf: + /* Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ + errno = ERANGE; + if(flag & Fsign) + return -HUGE_VAL; + return HUGE_VAL; + +out: + d = 0; + for(i=0; i<Prec; i++) + d = d*One + mid[i]; + if(flag & Fsign) + d = -d; + d = ldexp(d, bp - Prec*Nbits); + if(d == 0) /* underflow */ + errno = ERANGE; + + return d; +} + +#undef Nbits +#undef Nmant +#undef Prec + +#undef Sigbit +#undef Ndig +#undef One +#undef Half +#undef Maxe + +#undef Fsign +#undef Fesign +#undef Fdpoint + +#undef S0 +#undef S1 +#undef S2 +#undef S3 +#undef S4 +#undef S5 +#undef S6 +#undef S7 + +static void +fmtexp(char *p, int e, int ucase) +{ + int i; + char se[9]; + + *p++ = ucase ? 'E' : 'e'; + if(e < 0){ + *p++ = '-'; + e = -e; + }else + *p++ = '+'; + + i = 0; + while(e){ + se[i++] = e % 10 + '0'; + e /= 10; + } + + while(i < 2) + se[i++] = '0'; + while(i > 0) + *p++ = se[--i]; + + *p++ = '\0'; +} + +/* + * compute decimal integer m, exp such that: + * f = m*10^exp + * m is as short as possible with losing exactness + * assumes special cases (NaN, +Inf, -Inf) have been handled. + */ +static void +dtoa(double f, char *s, int *exp, int *neg, int *len) +{ + int c, d, e2, e, ee, i, ndigit, oerrno; + char buf[NSIGNIF+10]; + double g; + + oerrno = errno; + + *neg = 0; + if(f < 0){ + f = -f; + *neg = 1; + } + + if(f == 0){ + *exp = 0; + s[0] = '0'; + s[1] = 0; + *len = 1; + return; + } + + frexp(f, &e2); + e = (int)(e2 * .301029995664); + g = f * fpow10(-e); + while(g < 1) { + e--; + g = f * fpow10(-e); + } + while(g >= 10){ + e++; + g = f * fpow10(-e); + } + + /* convert nsignif digits as a first approximation */ + for(i=0; i<NSIGNIF; i++){ + d = (int)g; + s[i] = d+'0'; + g = (g-d)*10; + } + s[i] = 0; + + e -= NSIGNIF-1; + fmtexp(s+NSIGNIF, e, 0); + + for(i=0; i<10; i++) { + g=fmtstrtod(s, nil); + if(f > g) { + if(add1(s, NSIGNIF)){ + /* gained a digit */ + e--; + fmtexp(s+NSIGNIF, e, 0); + } + continue; + } + if(f < g){ + if(sub1(s, NSIGNIF)){ + /* lost a digit */ + e++; + fmtexp(s+NSIGNIF, e, 0); + } + continue; + } + break; + } + + /* + * bump last few digits down to 0 as we can. + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--){ + c = s[i]; + if(c != '0'){ + s[i] = '0'; + g=fmtstrtod(s, nil); + if(g != f){ + s[i] = c; + break; + } + } + } + + /* + * remove trailing zeros. + */ + ndigit = NSIGNIF; + while(ndigit > 1 && s[ndigit-1] == '0'){ + e++; + --ndigit; + } + s[ndigit] = 0; + *exp = e; + *len = ndigit; + + errno = oerrno; +} + + +static int +fmtfloat(fmt·State *io) +{ + char buf[NSIGNIF+10], *dot, *digits, *p, *end, suf[10], *cur; + double val; + int c, verb, ndot, e, exp, f, ndigits, neg, newndigits; + int npad, pt, prec, realverb, sign, nsuf, ucase, n, z1, z2; + + if(io->flag&fmt·Long) + val = va_arg(io->args, long double); + else + val = va_arg(io->args, double); + + /* extract formatting flags */ + f = io->flag; + io->flag = 0; + prec = FDEFLT; + if(f & fmt·Prec) + prec = io->prec; + + verb = io->verb; + ucase = 0; + switch(verb) { + case 'A': + case 'E': + case 'F': + case 'G': + verb += 'a'-'A'; + ucase = 1; + break; + } + + /* pick off special numbers. */ + if(isNaN(val)) { + end = special[0+ucase]; + special: + io->flag = f & (fmt·Width|fmt·Left); + return copy(io, end, strlen(end), strlen(end)); + } + if(isInf(val, 1)) { + end = special[2+ucase]; + goto special; + } + if(isInf(val, -1)) { + end = special[4+ucase]; + goto special; + } + + /* get exact representation. */ + digits = buf; + dtoa(val, digits, &exp, &neg, &ndigits); + + /* get locale's decimal point. */ + dot = io->decimal; + if(dot == nil) + dot = "."; + ndot = utf8·len(dot); + + /* + * now the formatting fun begins. + * compute parameters for actual fmt: + * + * pad: number of spaces to insert before/after field. + * z1: number of zeros to insert before digits + * z2: number of zeros to insert after digits + * point: number of digits to print before decimal point + * ndigits: number of digits to use from digits[] + * suf: trailing suffix, like "e-5" + */ + realverb = verb; + switch(verb){ + case 'g': + /* convert to at most prec significant digits. (prec=0 means 1) */ + if(prec == 0) + prec = 1; + if(ndigits > prec) { + if(digits[prec] >= '5' && add1(digits, prec)) + exp++; + exp += ndigits-prec; + ndigits = prec; + } + + /* + * extra rules for %g (implemented below): + * trailing zeros removed after decimal unless FmtSharp. + * decimal point only if digit follows. + */ + + /* fall through to %e */ + default: + case 'e': + /* one significant digit before decimal, no leading zeros. */ + pt = 1; + z1 = 0; + + /* + * decimal point is after ndigits digits right now. + * slide to be after first. + */ + e = exp + (ndigits-1); + + /* if this is %g, check exponent and convert prec */ + if(realverb == 'g') { + if(-4 <= e && e < prec) + goto casef; + prec--; /* one digit before decimal; rest after */ + } + + /* compute trailing zero padding or truncate digits. */ + if(1+prec >= ndigits) + z2 = 1+prec - ndigits; + else { + /* truncate digits */ + assert(realverb != 'g'); + newndigits = 1+prec; + if(digits[newndigits] >= '5' && add1(digits, newndigits)) { + /* had 999e4, now have 100e5 */ + e++; + } + ndigits = newndigits; + z2 = 0; + } + fmtexp(suf, e, ucase); + nsuf = strlen(suf); + break; + + casef: + case 'f': + /* determine where digits go with respect to decimal point */ + if(ndigits+exp > 0) { + pt = ndigits+exp; + z1 = 0; + } else { + pt = 1; + z1 = 1 + -(ndigits+exp); + } + + /* + * %g specifies prec = number of significant digits + * convert to number of digits after decimal point + */ + if(realverb == 'g') + prec += z1 - pt; + + /* compute trailing zero padding or truncate digits. */ + if(pt+prec >= z1+ndigits) + z2 = pt+prec - (z1+ndigits); + else{ + /* truncate digits */ + assert(realverb != 'g'); + newndigits = pt+prec - z1; + if(newndigits < 0){ + z1 += newndigits; + newndigits = 0; + }else if(newndigits == 0){ + /* perhaps round up */ + if(digits[0] >= '5'){ + digits[0] = '1'; + newndigits = 1; + goto newdigit; + } + }else if(digits[newndigits] >= '5' && add1(digits, newndigits)){ + /* digits was 999, is now 100; make it 1000 */ + digits[newndigits++] = '0'; + newdigit: + /* account for new digit */ + if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ + z1--; + else /* 9.99 => 10.00 */ + pt++; + } + z2 = 0; + ndigits = newndigits; + } + nsuf = 0; + break; + } + + /* + * if %g is given without FmtSharp, remove trailing zeros. + * must do after truncation, so that e.g. print %.3g 1.001 + * produces 1, not 1.00. sorry, but them's the rules. + */ + if(realverb == 'g' && !(f & fmt·Sharp)) { + if(z1+ndigits+z2 >= pt) { + if(z1+ndigits < pt) + z2 = pt - (z1+ndigits); + else{ + z2 = 0; + while(z1+ndigits > pt && digits[ndigits-1] == '0') + ndigits--; + } + } + } + + /* + * compute width of all digits and decimal point and suffix if any + */ + n = z1+ndigits+z2; + if(n > pt) + n += ndot; + else if(n == pt){ + if(f & fmt·Sharp) + n += ndot; + else + pt++; /* do not print any decimal point */ + } + n += nsuf; + + /* + * determine sign + */ + sign = 0; + if(neg) + sign = '-'; + else if(f & fmt·Sign) + sign = '+'; + else if(f & fmt·Space) + sign = ' '; + if(sign) + n++; + + /* compute padding */ + npad = 0; + if((f & fmt·Width) && io->width > n) + npad = io->width - n; + if(npad && !(f & fmt·Left) && (f & fmt·Zero)){ + z1 += npad; + pt += npad; + npad = 0; + } + + /* format the actual field. too bad about doing this twice. */ + if(npad && !(f & fmt·Left) && pad(io, npad < 0)) + return -1; + + cur = io->buffer.cur; + end = io->buffer.end; + + if(sign){ + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = sign; + } + + while(z1>0 || ndigits>0 || z2>0){ + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = c; + + if(--pt == 0) + for(p=dot; *p; p++){ + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = *p; + } + } + io->n += cur - (char*)io->buffer.cur; + io->buffer.cur = cur; + if(nsuf && copy(io, suf, nsuf, nsuf) < 0) + return -1; + if(npad && (f & fmt·Left) && pad(io, npad < 0)) + return -1; + + return 0; +} diff --git a/src/base/fmt/fprint.c b/src/base/fmt/fprint.c new file mode 100644 index 0000000..5077359 --- /dev/null +++ b/src/base/fmt/fprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +int +fmt·fprint(int fd, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vfprint(fd, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/internal.h b/src/base/fmt/internal.h new file mode 100644 index 0000000..7bf47af --- /dev/null +++ b/src/base/fmt/internal.h @@ -0,0 +1,15 @@ +#pragma once + +#include <u.h> +#include <base.h> + +typedef int (*Formatter)(fmt·State *io); +typedef struct Verb Verb; + +struct Verb +{ + int c; + Formatter fmt; +}; + +void fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups); diff --git a/src/base/fmt/locale.c b/src/base/fmt/locale.c new file mode 100644 index 0000000..437c61e --- /dev/null +++ b/src/base/fmt/locale.c @@ -0,0 +1,16 @@ +#include "internal.h" + +void +fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups) +{ + if(decimal == nil || decimal[0] == '\0') + decimal = "."; + if(thousands == nil) + thousands = ","; + if(groups == nil) + groups = "\3"; + + io->groups = groups; + io->decimal = decimal; + io->thousands = thousands; +} diff --git a/src/base/fmt/nsprint.c b/src/base/fmt/nsprint.c new file mode 100644 index 0000000..90489e0 --- /dev/null +++ b/src/base/fmt/nsprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +int +fmt·nsprint(int len, char *buf, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vnsprint(len, buf, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/open.c b/src/base/fmt/open.c new file mode 100644 index 0000000..8aadef5 --- /dev/null +++ b/src/base/fmt/open.c @@ -0,0 +1,34 @@ +#include "internal.h" + +static int +flush(fmt·State *io) +{ + int n, fd; + + fd = (uintptr)io->file; + n = io->buffer.cur - io->buffer.beg; + if(n && write(fd, io->buffer.beg, n) != n) + return -1; + + io->buffer.cur = io->buffer.beg; + return io->n; +} + +int +fmt·open(int fd, int len, char *buf, fmt·State *io) +{ + io->buffer.beg = buf; + io->buffer.cur = buf; + io->buffer.end = buf+len; + io->flush = flush; + io->file = (void*)(uintptr)fd; + io->flag = 0; + io->n = 0; + /* no heap needed */ + io->heap = nil; + io->mem = (mem·Reallocator){ 0 }; + + fmt·setlocale(io, nil, nil, nil); + + return 0; +} diff --git a/src/base/fmt/panic.c b/src/base/fmt/panic.c new file mode 100644 index 0000000..25ee277 --- /dev/null +++ b/src/base/fmt/panic.c @@ -0,0 +1,15 @@ +#include "internal.h" + +void +fmt·panic(char *fmt, ...) +{ + char buf[256]; + va_list arg; + + va_start(arg, fmt); + fmt·vesprint(buf, arrend(buf), fmt, arg); + va_end(arg); + + fmt·fprint(2, "%s: %s\n", argv0 ? argv0 : "<prog>", buf); + exits("fatal"); +} diff --git a/src/base/fmt/print.c b/src/base/fmt/print.c new file mode 100644 index 0000000..20b8e00 --- /dev/null +++ b/src/base/fmt/print.c @@ -0,0 +1,13 @@ +#include "internal.h" + +int +fmt·print(char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vfprint(1, fmt, args); + va_end(args); + return n; +} diff --git a/src/base/fmt/rules.mk b/src/base/fmt/rules.mk new file mode 100644 index 0000000..fdfdac0 --- /dev/null +++ b/src/base/fmt/rules.mk @@ -0,0 +1,21 @@ +# Local sources +SRCS_$(d)+=\ + $(d)/fmt/buffer.c\ + $(d)/fmt/do.c\ + $(d)/fmt/esprint.c\ + $(d)/fmt/fprint.c\ + $(d)/fmt/locale.c\ + $(d)/fmt/nsprint.c\ + $(d)/fmt/open.c\ + $(d)/fmt/print.c\ + $(d)/fmt/sprint.c\ + $(d)/fmt/vesprint.c\ + $(d)/fmt/vfprint.c\ + $(d)/fmt/vnsprint.c\ + $(d)/fmt/vprint.c\ + $(d)/fmt/vwrite.c\ + $(d)/fmt/panic.c\ + $(d)/fmt/write.c + +CHECK_$(d)+=\ + $(d)/fmt/test.c diff --git a/src/base/fmt/sprint.c b/src/base/fmt/sprint.c new file mode 100644 index 0000000..f1be6dd --- /dev/null +++ b/src/base/fmt/sprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·sprint(char *buf, char *fmt, ...) +{ + int n; + uint len; + va_list args; + + len = 1 << 30; + if(buf+len < buf) + len = -(uintptr)buf-1; + + va_start(args, fmt); + n = fmt·vnsprint(len, buf, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/test.c b/src/base/fmt/test.c new file mode 100644 index 0000000..d81a62e --- /dev/null +++ b/src/base/fmt/test.c @@ -0,0 +1,72 @@ +#include <u.h> +#include <base.h> +#include <libutf.h> +#include <libfmt.h> + +typedef struct Complex +{ + double r, i; +} Complex; + +int +Xfmt(fmt·State *io) +{ + Complex c; + c = va_arg(io->args, Complex); + + return fmt·write(io, "(real=%g,imag=%g)", c.r, c.i); +} + +int +main(int argc, char *argv[]) +{ + fmt·print("basic tests\n"); + fmt·print("\tx: %x\n", 0x87654321); + fmt·print("\tu: %u\n", 0x87654321); + fmt·print("\td: %d\n", 0x87654321); + fmt·print("\ts: %s\n", "hi there"); + fmt·print("\tc: %c\n", '!'); + fmt·print("\tg: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\te: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\tf: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\tsmiley: %C\n", (rune)0x263a); + fmt·print("\t%g %.18g\n", 2e25, 2e25); + fmt·print("\t%2.18g\n", 1.0); + fmt·print("\t%2.18f\n", 1.0); + fmt·print("\t%f\n", 3.1415927/4); + fmt·print("\t%d\n", 23); + fmt·print("\t%i\n", 23); + fmt·print("\t%0.10d\n", 12345); + + fmt·print("%%4%%d tests\n"); + fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + fmt·print("\t%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); + fmt·print("\t%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); + fmt·print("\t%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); + + /* test %'d formats */ + fmt·print("%%'%%d tests\n"); + fmt·print("\t%'d %'d %'d\n", 1, 2222, 33333333); + fmt·print("\t%'019d\n", 0); + fmt·print("\t%08d %08d %08d\n", 1, 2222, 33333333); + fmt·print("\t%'08d %'08d %'08d\n", 1, 2222, 33333333); + fmt·print("\t%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); + fmt·print("\t%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); + + /* test precision */ + fmt·print("precision tests\n"); + fmt·print("%020.10d\n", 100); + + /* test install */ + fmt·install('X', Xfmt); + Complex c = { 1.5, -2.3 }; + fmt·print("x = %X\n", c); + + return 0; + +} diff --git a/src/base/fmt/vesprint.c b/src/base/fmt/vesprint.c new file mode 100644 index 0000000..18f4dd2 --- /dev/null +++ b/src/base/fmt/vesprint.c @@ -0,0 +1,26 @@ +#include "internal.h" + +char* +fmt·vesprint(char *buf, char *end, char *fmt, va_list args) +{ + fmt·State io; + + if(end <= buf) + return nil; + + io.n = 0; + io.buffer.beg = io.buffer.cur = buf; + io.buffer.end = end-1; + io.flush = nil; + io.file = nil; + + va_copy(io.args, args); + + fmt·setlocale(&io, nil, nil, nil); + fmt·do(&io, fmt); + + va_end(io.args); + + *(io.buffer.cur) = 0; + return io.buffer.cur; +} diff --git a/src/base/fmt/vfprint.c b/src/base/fmt/vfprint.c new file mode 100644 index 0000000..4306ea7 --- /dev/null +++ b/src/base/fmt/vfprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·vfprint(int fd, char *fmt, va_list args) +{ + int n; + fmt·State io; + char buf[256]; + + fmt·open(fd, sizeof(buf), buf, &io); + + va_copy(io.args, args); + n = fmt·do(&io, fmt); + va_end(io.args); + + if(n > 0 && io.flush(&io) < 0) + return -1; + return n; +} diff --git a/src/base/fmt/vnsprint.c b/src/base/fmt/vnsprint.c new file mode 100644 index 0000000..7ded908 --- /dev/null +++ b/src/base/fmt/vnsprint.c @@ -0,0 +1,26 @@ +#include "internal.h" + +int +fmt·vnsprint(int len, char *buf, char *fmt, va_list args) +{ + fmt·State io; + + if(len <= 0) + return -1; + + io.n = 0; + io.buffer.beg = io.buffer.cur = buf; + io.buffer.end = buf+len-1; + io.flush = nil; + io.file = nil; + + va_copy(io.args, args); + + fmt·setlocale(&io, nil, nil, nil); + fmt·do(&io, fmt); + + va_end(io.args); + + *(io.buffer.cur) = 0; + return io.buffer.cur - io.buffer.beg; +} diff --git a/src/base/fmt/vprint.c b/src/base/fmt/vprint.c new file mode 100644 index 0000000..bb3076b --- /dev/null +++ b/src/base/fmt/vprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·vprint(char *fmt, va_list args) +{ + fmt·State io; + int n; + char buf[256]; + + fmt·open(1, sizeof(buf), buf, &io); + + va_copy(io.args, args); + n = fmt·do(&io, fmt); + va_end(io.args); + + if(n > 0 && io.flush(&io) < 0) + return -1; + return n; +} diff --git a/src/base/fmt/vwrite.c b/src/base/fmt/vwrite.c new file mode 100644 index 0000000..cacdef2 --- /dev/null +++ b/src/base/fmt/vwrite.c @@ -0,0 +1,26 @@ +#include "internal.h" + +int +fmt·vwrite(fmt·State *io, char *fmt, va_list args) +{ + int n; + va_list tmp; + + io->flag = io->width = io->prec = 0; + + va_copy(tmp, io->args); + va_end(io->args); + + va_copy(io->args,args); + n = fmt·do(io, fmt); + va_end(io->args); + + va_copy(io->args, tmp); + va_end(tmp); + + io->flag = io->width = io->prec = 0; + + if(n >= 0) + return 0; + return n; +} diff --git a/src/base/fmt/write.c b/src/base/fmt/write.c new file mode 100644 index 0000000..9a77223 --- /dev/null +++ b/src/base/fmt/write.c @@ -0,0 +1,22 @@ +#include "internal.h" + +int +fmt·write(fmt·State *io, char *fmt, ...) +{ + int n; + va_list args; + + io->flag = io->width = io->prec = 0; + + va_copy(args, io->args); + va_end(io->args); + + va_start(io->args, fmt); + n = fmt·do(io, fmt); + va_end(io->args); + + io->flag = io->width = io->prec = 0; + if(n >= 0) + return 0; + return n; +} diff --git a/src/base/io/close.c b/src/base/io/close.c new file mode 100644 index 0000000..5a773cd --- /dev/null +++ b/src/base/io/close.c @@ -0,0 +1,7 @@ +#include "internal.h" + +int +io·close(io·Stream *s) +{ + return fclose(s); +} diff --git a/src/base/io/open.c b/src/base/io/open.c index 71e88d4..fe78255 100644 --- a/src/base/io/open.c +++ b/src/base/io/open.c @@ -5,9 +5,3 @@ io·open(byte *name, byte *mode) { return fopen(name, mode); } - -int -io·close(io·Stream *s) -{ - return fclose(s); -} diff --git a/src/base/io/rules.mk b/src/base/io/rules.mk index 2e03ca5..124cd09 100644 --- a/src/base/io/rules.mk +++ b/src/base/io/rules.mk @@ -3,6 +3,7 @@ SRCS_$(d)+=\ $(d)/io/flush.c\ $(d)/io/interface.c\ $(d)/io/open.c\ + $(d)/io/close.c\ $(d)/io/putbyte.c\ $(d)/io/putstring.c\ $(d)/io/read.c\ @@ -11,4 +12,4 @@ SRCS_$(d)+=\ $(d)/io/stat.c\ $(d)/io/tell.c\ $(d)/io/unget.c\ - $(d)/io/write.c\ + $(d)/io/write.c diff --git a/src/base/rules.mk b/src/base/rules.mk index 9f25d37..0a262c7 100644 --- a/src/base/rules.mk +++ b/src/base/rules.mk @@ -5,10 +5,15 @@ include share/push.mk # local sources SRCS_$(d):=\ $(d)/arg.c + +CHECK_$(d):=\ + $(d)/test.c + include $(d)/bufio/rules.mk include $(d)/coro/rules.mk include $(d)/error/rules.mk include $(d)/flate/rules.mk +include $(d)/fmt/rules.mk include $(d)/fs/rules.mk include $(d)/gz/rules.mk include $(d)/io/rules.mk @@ -18,12 +23,10 @@ include $(d)/os/rules.mk include $(d)/rng/rules.mk include $(d)/sort/rules.mk include $(d)/string/rules.mk -CHECK_$(d):=\ - $(d)/test.c +include $(d)/utf/rules.mk # outputs LIBS_$(d) := $(d)/base.a -BINS_$(d) := include share/paths.mk diff --git a/src/base/utf/canfit.c b/src/base/utf/canfit.c new file mode 100644 index 0000000..4579ab3 --- /dev/null +++ b/src/base/utf/canfit.c @@ -0,0 +1,23 @@ +#include "internal.h" + +/* returns 1 if string of length n is long enough to be decoded */ +int +utf8·canfit(byte* s, int n) +{ + int i; + rune c; + + if(n <= 0) + return 0; + + c = *(ubyte*)s; + if(c < TByte1) + return 1; + + if(c < TByte3) + return n >= 2; + if(c < TByte4) + return n >= 3; + + return n >= UTFmax; +} diff --git a/src/base/utf/decode.c b/src/base/utf/decode.c new file mode 100644 index 0000000..01797f1 --- /dev/null +++ b/src/base/utf/decode.c @@ -0,0 +1,98 @@ +#include "internal.h" + +#define ACCEPT 0 +#define REJECT 12 + +static uint8 decode[] = { + /* + * the first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* + * the second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state + */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +int +utf8·decode(char *s, rune *r) +{ + int n; + rune v; + uint8 b, t, x=ACCEPT; + + b = ((uint8 *)s)[0]; + t = decode[b]; + v = (0xFF >> t) & b; + x = decode[256+x+t]; + + for(n=1; x > REJECT && n < UTFmax; n++){ + b = ((uint8 *)s)[n]; + t = decode[b]; + v = (v << 6) | (b & TMask); + x = decode[256+x+t]; + } + + if(x != ACCEPT){ + *r = RuneErr; + return 1; + } + + *r = v; + return n; +} + +#if 0 +int +utf8·decode(byte *s, rune *r) +{ + int c[UTFmax], i; + rune l; + + c[0] = *(ubyte*)(s); + if(c[0] < Tx){ + *r = c[0]; + return 1; + } + + l = c[0]; + for(i = 1; i < UTFmax; i++){ + c[i] = *(ubyte*)(s+i); + c[i] ^= Tx; + if(c[i] & Testx) goto bad; + + l = (l << Bitx) | c[i]; + if(c[0] < Tbyte(i + 2)){ + l &= RuneX(i + 1); + if(i == 1){ + if(c[0] < Tbyte(2) || l <= Rune1) + goto bad; + }else if(l <= RuneX(i) || l > RuneMax) + goto bad; + + if(i == 2 && SurrogateMin <= l && l <= SurrogateMax) + goto bad; + + *r = l; + return i + 1; + } + } +bad: + *r = RuneErr; + return 1; +} +#endif diff --git a/src/base/utf/decodeprev.c b/src/base/utf/decodeprev.c new file mode 100644 index 0000000..27dced6 --- /dev/null +++ b/src/base/utf/decodeprev.c @@ -0,0 +1,60 @@ +#include "internal.h" + +#define ACCEPT 0 +#define REJECT 12 + +static uint8 decode[] = { + /* + * the first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + // 0 1 2 3 4 5 6 7 8 9 10 11 + 0,24,12,12,12,12,12,24,12,24,12,12, + 0,24,12,12,12,12,12,24,12,24,12,12, + 12,36, 0,12,12,12,12,48,12,36,12,12, + 12,60,12, 0, 0,12,12,72,12,72,12,12, + 12,60,12, 0,12,12,12,72,12,72, 0,12, + 12,12,12,12,12, 0, 0,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12, 0 +}; + +int +utf8·decodeprev(byte *s, rune *r) +{ + int n; + rune v; + uint8 b, t, d, x=ACCEPT; + + v=0, n=0, d=0; +nextbyte: + b = ((uint8 *)s)[-n++]; + t = decode[b]; + x = decode[256+x+t]; + + if(x > REJECT && n < UTFmax){ + v = v | ((b & TMask) << d); + d += 6; + goto nextbyte; + } + + if(x != ACCEPT) + *r = RuneErr; + else{ + v |= (((0xFFu >> t) & b) << d); + *r = v; + } + + return n; +} diff --git a/src/base/utf/encode.c b/src/base/utf/encode.c new file mode 100644 index 0000000..fa7c93e --- /dev/null +++ b/src/base/utf/encode.c @@ -0,0 +1,69 @@ +#include "internal.h" + +int +utf8·encode(rune *r, byte *s) +{ + rune c; + + c = *r; + if(c < Rune1Byte){ // 7 bits + s[0] = (uint8)c; + return 1; + } + + if(c < Rune2Byte){ // 11 bits + s[0] = TByte1 | (c >> 6); + s[1] = Tx | (c & TMask); + return 2; + } + + if(c < Rune3Byte){ // 16 bits + s[0] = TByte2 | ((c >> 12)); + s[1] = Tx | ((c >> 6) & TMask); + s[2] = Tx | ((c) & TMask); + return 3; + } + + // 22 bits + if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) + c = RuneErr; + + s[0] = TByte3 | ((c >> 18)); + s[1] = Tx | ((c >> 12) & TMask); + s[2] = Tx | ((c >> 6) & TMask); + s[3] = Tx | ((c) & TMask); + + return 4; +} + +#if 0 +int +utf8·encode(rune* r, byte* s) +{ + int i, j; + rune c; + + c = *r; + if(c <= Rune1) { + s[0] = c; + return 1; + } + + for(i = 2; i < UTFmax + 1; i++){ + if(i == 3){ + if(c > RuneMax) + c = RuneErr; + if(SurrogateMin <= c && c <= SurrogateMax) + c = RuneErr; + } + if(c <= RuneX(i) || i == UTFmax) { + s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); + for(j = 1; j < i; j++) + s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); + return i; + } + } + + return UTFmax; +} +#endif diff --git a/src/base/utf/find.c b/src/base/utf/find.c new file mode 100644 index 0000000..d75feb8 --- /dev/null +++ b/src/base/utf/find.c @@ -0,0 +1,31 @@ +#include "internal.h" + +byte* +utf8·find(byte* s, rune c) +{ + long c1; + rune r; + int n; + + if(c < Tx) + return strchr(s, c); + + for(;;){ + c1 = *(ubyte*)s; + if(c1 < Tx){ + if(c1 == 0) return nil; + if(c1 == c) return s; + s++; + continue; + } + + n = utf8·decode(s, &r); + + if(r == c) + return s; + + s += n; + } + + return nil; +} diff --git a/src/base/utf/findlast.c b/src/base/utf/findlast.c new file mode 100644 index 0000000..ab25ab2 --- /dev/null +++ b/src/base/utf/findlast.c @@ -0,0 +1,32 @@ +#include "internal.h" + +byte* +utf8·findlast(byte* s, rune c) +{ + long c1; + rune r; + byte *l; + + if(c < Tx) + return strrchr(s, c); + + l = nil; + for(;;){ + c1 = *(ubyte*)s; + if(c1 < Tx){ + if(c1 == 0) return l; + if(c1 == c) l = s; + s++; + continue; + } + + c1 = utf8·decode(s, &r); + + if(r == c) + l = s; + + s += c1; + } + + return nil; +} diff --git a/src/base/utf/internal.h b/src/base/utf/internal.h new file mode 100644 index 0000000..49945dd --- /dev/null +++ b/src/base/utf/internal.h @@ -0,0 +1,37 @@ +#pragma once + +#include <u.h> +#include <base.h> + +/* + * NOTE: we use the preprocessor to ensure we have unsigned constants. + * UTF-8 code: + * 1 byte: + * 0xxxxxxx + * 2 byte: + * 110xxxxx 10xxxxxx + * 3 byte: + * 1110xxxx 10xxxxxx 10xxxxxx + * 4 byte: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +#define Tx 0x80u // 0b10000000 transfer header +#define TMask 0x3Fu // 0b00111111 transfer mask + +#define TByte1 0xC0u // 0b11000000 +#define TByte2 0xE0u // 0b11100000 +#define TByte3 0xF0u // 0b11110000 +#define TByte4 0xF8u // 0b11111000 + +#define RuneMask 0x1FFFFFu + +#define Rune1Byte 0x000080u // 1 << 8 (1 byte) +#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) +#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) +#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) + + +/* UTF-16 nonsense */ +#define RuneSurrogateMin 0x0D8000 +#define RuneSurrogateMax 0x0D8FFF diff --git a/src/base/utf/len.c b/src/base/utf/len.c new file mode 100644 index 0000000..8fbd679 --- /dev/null +++ b/src/base/utf/len.c @@ -0,0 +1,21 @@ +#include "internal.h" + +int +utf8·len(char *s) +{ + int c; + long n; + rune r; + + n = 0; + for(;;){ + c = *(uchar*)s; + if(c < Tx){ + if(c == 0) + return n; + s++; + }else + s += utf8·decode(s, &r); + n++; + } +} diff --git a/src/base/utf/rules.mk b/src/base/utf/rules.mk new file mode 100644 index 0000000..446c113 --- /dev/null +++ b/src/base/utf/rules.mk @@ -0,0 +1,71 @@ +UNICODE=14.0.0 + +SRCS_$(d)+=\ + $(d)/utf/encode.c\ + $(d)/utf/decode.c\ + $(d)/utf/decodeprev.c\ + $(d)/utf/find.c\ + $(d)/utf/findlast.c\ + $(d)/utf/canfit.c\ + $(d)/utf/runelen.c\ + $(d)/utf/len.c\ + $(d)/utf/runetype-$(UNICODE).c\ + $(d)/utf/runewidth-$(UNICODE).c + +# ======================================================================== +# table generation + +# NOTE: this is pretty hacky... +NEED_OBJS=\ + $(OBJ_DIR)/base/arg.o\ + $(OBJ_DIR)/base/utf/decode.o\ + $(OBJ_DIR)/base/error/panicf.o\ + $(OBJ_DIR)/base/io/readln.o\ + $(OBJ_DIR)/base/io/open.o\ + $(OBJ_DIR)/base/io/close.o + +$(d)/utf/vendor/common.o: $(d)/utf/vendor/common.c + $(COMPILE) + +# rune categories +$(d)/utf/vendor/UnicodeData-$(UNICODE).txt: + @echo "GET UnicodeData.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@ + +$(d)/utf/vendor/mkrunetype: $(d)/utf/vendor/mkrunetype.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +GENS += $(d)/utf/vendor/mkrunetype + +$(d)/utf/runetype-$(UNICODE).c: $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/mkrunetype + @$(dir $@)vendor/mkrunetype $< > $@ + +# rune widths +$(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt: + @echo "GET EastAsianWidth.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@ + +$(d)/utf/vendor/EmojiData-$(UNICODE).txt: + @echo "GET EmojiData.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@ + +$(d)/utf/vendor/mkrunewidth: $(d)/utf/vendor/mkrunewidth.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +GENS += $(d)/utf/vendor/mkrunewidth + +$(d)/utf/runewidth-$(UNICODE).c: $(d)/utf/vendor/mkrunewidth $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt $(d)/utf/vendor/EmojiData-$(UNICODE).txt + @$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@ + +# grapheme boundaries +$(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt: + @echo "GET GraphemeBreakProperty.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@ + +$(d)/utf/vendor/mkgraphemedata: $(d)/utf/vendor/mkgraphemedata.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +$(d)/utf/graphemedata-$(UNICODE).c: $(d)/utf/vendor/mkgraphemedata $(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt + $^ > $@ + +GENS += $(d)/utf/vendor/mkgraphemedata diff --git a/src/base/utf/runelen.c b/src/base/utf/runelen.c new file mode 100644 index 0000000..dac7f15 --- /dev/null +++ b/src/base/utf/runelen.c @@ -0,0 +1,8 @@ +#include "internal.h" + +int +utf8·runelen(rune r) +{ + byte s[10]; + return utf8·encode(&r, s); +} diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c new file mode 100644 index 0000000..fcf1177 --- /dev/null +++ b/src/base/utf/vendor/common.c @@ -0,0 +1,220 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// input functions + +int +parse(io·Stream *io, int nfield, char **field, int len, char *line) +{ + int n; + if((n=io·readln(io, len, line)) <= 0) + return ParseEOF; + + if(n == len) + panicf("line too long"); + + if(line[n-1] != '\n') + panicf("invalid line: expected '\n', found '%c'", line[n]); + + line[n-1] = 0; + + if(line[0] == '#' || line[0] == 0) + return ParseSkip; + + /* tokenize line into fields */ + n = 0; + field[n] = line; + while(*line){ + if(*line == ';'){ + *line = 0; + field[++n] = line+1; + } + line++; + } + + if(n != nfield-1) + panicf("expected %d number of fields, got %d: %s", nfield, n, line); + + return ParseOK; +} + +int +codepoint(char *s) +{ + int c, b; + + c = 0; + while((b=*s++)){ + c <<= 4; + if(b >= '0' && b <= '9') + c += b - '0'; + else if(b >= 'A' && b <= 'F') + c += b - 'A' + 10; + else + panicf("bad codepoint char '%c'", b); + } + + return c; +} + +void +codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop) +{ + int e, c; + char *other[NumFields], line[1024]; + + // XXX: the stop variable passes in the previous stopping character + e = *stop; + c = codepoint(field[Fcode]); + + if(c >= NumRunes) + panicf("unexpected large codepoint %x", c); + if(c <= e) + panicf("bad code sequence: %x then %x", e, c); + e = c; + + if(strstr(field[Fname], ", First>") != nil){ + if(!parse(utf8, arrlen(other), other, arrlen(line), line)) + panicf("range start at end of file"); + if(strstr(other[Fname], ", Last>") == nil) + panicf("range start not followed by range end"); + + e = codepoint(other[Fcode]); + + if(e <= c) + panicf("bad code sequence: %x then %x", c, e); + if(strcmp(field[Fcategory], other[Fcategory]) != 0) + panicf("range with mismatched category"); + } + + *start = c; + *stop = e; +} + +// ----------------------------------------------------------------------- +// output functions + +void +putsearch(void) +{ + puts( + "#include <u.h>\n" + "#include <base/utf.h>\n" + "\n" + "static\n" + "rune*\n" + "rangesearch(rune c, rune *t, int n, int ne)\n" + "{\n" + " rune *p;\n" + " int m;\n" + " while(n > 1) {\n" + " m = n >> 1;\n" + " p = t + m*ne;\n" + " if(c >= p[0]){\n" + " t = p;\n" + " n = n-m;\n" + " }else\n" + " n = m;\n" + " }\n" + " if(n && c >= t[0])\n" + " return t;\n" + " return 0;\n" + "}\n" + ); + +} + +int +putrange(char *ident, char *prop, int force) +{ + int l, r, start; + + start = 0; + for(l = 0; l < NumRunes;) { + if(!prop[l]){ + l++; + continue; + } + + for(r = l+1; r < NumRunes; r++){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(force || r > l + 1){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r-1); + } + + l = r; + } + + if(start) + printf("};\n\n"); + + return start; +} + +int +putpair(char *ident, char *prop) +{ + int l, r, start; + + start = 0; + for(l=0; l+2 < NumRunes; ){ + if(!prop[l]){ + l++; + continue; + } + + for(r = l + 2; r < NumRunes; r += 2){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(r != l + 2){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r - 2); + } + + l = r; + } + + if(start) + printf("};\n\n"); + return start; +} + +int +putsingle(char *ident, char *prop) +{ + int i, start; + + start = 0; + for(i = 0; i < NumRunes; i++) { + if(!prop[i]) + continue; + + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[i] = 0; + printf("\t0x%.4x,\n", i); + } + + if(start) + printf("};\n\n"); + + return start; +} diff --git a/src/base/utf/vendor/common.h b/src/base/utf/vendor/common.h new file mode 100644 index 0000000..51a53bd --- /dev/null +++ b/src/base/utf/vendor/common.h @@ -0,0 +1,45 @@ +#pragma once + +#include <u.h> +#include <base.h> + +enum +{ + // Fields inside UnicodeData.txt + Fcode, + Fname, + Fcategory, + Fcombine, + Fbidir, + Fdecomp, + Fdecimal, + Fdigit, + Fnumeric, + Fmirror, + Foldname, + Fcomment, + Fupper, + Flower, + Ftitle, + + NumFields, + NumRunes = 1 << 21, +}; + +/* input functions */ +enum +{ + ParseEOF, + ParseOK, + ParseSkip, +}; + +int parse(io·Stream *io, int nfield, char **field, int len, char *line); +int codepoint(char *s); +void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop); + +/* output functions */ +void putsearch(void); +int putrange(char *ident, char *prop, int force); +int putpair(char *ident, char *prop); +int putsingle(char *ident, char *prop); diff --git a/src/base/utf/vendor/mkgraphemedata.c b/src/base/utf/vendor/mkgraphemedata.c new file mode 100644 index 0000000..ce5a952 --- /dev/null +++ b/src/base/utf/vendor/mkgraphemedata.c @@ -0,0 +1,24 @@ +#include <u.h> +#include <base.h> +#include <libutf.h> + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkgraphemedata <GraphemeBreakProperty.txt>\n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + io·Stream *utf8; + char line[1024]; + + ARGBEGIN{ + }ARGEND; +} diff --git a/src/base/utf/vendor/mkrunetype.c b/src/base/utf/vendor/mkrunetype.c new file mode 100644 index 0000000..b33df32 --- /dev/null +++ b/src/base/utf/vendor/mkrunetype.c @@ -0,0 +1,390 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// globals + +#define OFFSET (1 << 20) +#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x)) + +// TODO: use bitarrays. will reduce executable size 8x +struct Table +{ + /* properties */ + char isspace[NumRunes]; + char isalpha[NumRunes]; + char ismark[NumRunes]; + char isdigit[NumRunes]; + char isupper[NumRunes]; + char islower[NumRunes]; + char istitle[NumRunes]; + char ispunct[NumRunes]; + char issymbl[NumRunes]; + char iscntrl[NumRunes]; + + char combine[NumRunes]; + + /* transformations */ + int toupper[NumRunes]; + int tolower[NumRunes]; + int totitle[NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +int +isrange(char *label, char *prop, int force) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putrange(ident, prop, force); +} + +static +int +ispair(char *label, char *prop) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putpair(ident, prop); +} + +static +int +issingle(char *label, char *prop) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putsingle(ident, prop); +} + +static +void +makeis(char *label, char *table, int pairs, int onlyranges) +{ + int hasr, hasp=0, hass=0; + + hasr = isrange(label, table, onlyranges); + if(!onlyranges && pairs) + hasp = ispair(label, table); + if(!onlyranges) + hass = issingle(label, table); + + printf( + "int\n" + "utf8·is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(hasr){ + printf( + " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + label, label); + } + + if(hasp){ + printf( + " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + label, label); + } + + if(hass) + printf( + " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + label, label); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + +static +int +torange(char *label, int *index, int force) +{ + int l, r, d, start = 0; + + for(l = 0; l < NumRunes; ){ + if(index[l] == l){ + l++; + continue; + } + + d = DELTA(index[l], l); + if(d != (rune)d) + panicf("bad map delta %d", d); + + for(r = l+1; r < NumRunes; r++){ + if(DELTA(index[r], r) != d) + break; + index[r] = r; + } + + if(force || r != l + 1){ + if(!start){ + printf("static rune to%s_range[] = {\n", label); + start = 1; + } + index[l] = l; + printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d); + } + l = r; + } + if(start) + printf("};\n\n"); + + return start; +} + +static +int +topair(char *label, int *index) +{ + int l, r, d, start = 0; + + for(l = 0; l + 2 < NumRunes; ){ + if(index[l] == l){ + l++; + continue; + } + + d = DELTA(index[l], l); + if(d != (rune)d) + panicf("bad delta %d", d); + + for(r = l+2; r < NumRunes; r += 2){ + if(DELTA(index[r], r) != d) + break; + index[r] = r; + } + + if(r > l+2){ + if(!start){ + printf("static rune to%s_pair[] = {\n", label); + start = 1; + } + index[l] = l; + printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d); + } + + l = r; + } + if(start) + printf("};\n\n"); + + return start; +} + +static +int +tosingle(char *label, int *index) +{ + int i, d, start = 0; + + for(i=0; i < NumRunes; i++) { + if(index[i] == i) + continue; + + d = DELTA(index[i], i); + if(d != (rune)d) + panicf("bad map delta %d", d); + + if(!start){ + printf("static rune to%s_single[] = {\n", label); + start = 1; + } + index[i] = i; + printf("\t0x%.4x, %d,\n", i, d); + } + if(start) + printf("};\n\n"); + + return start; +} + +static +void +mkto(char *label, int *index, int pairs, int onlyrange) +{ + int hasr, hasp=0, hass=0; + + hasr = torange(label, index, !onlyrange); + if(!onlyrange && pairs) + hasp = topair(label, index); + if(!onlyrange) + hass = tosingle(label, index); + + printf( + "rune\n" + "utf8·to%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(hasr) + printf( + " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return c + p[2] - %d;\n", + label, label, OFFSET); + + if(hasp) + printf( + " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return c + p[2] - %d;\n", + label, label, OFFSET); + + if(hass) + printf( + " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n" + " if(p && c == p[0])\n" + " return c + p[1] - %d;\n", + label, label, OFFSET); + + + printf( + " return c;\n" + "}\n" + "\n" + ); +} + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int i, sc, c, ec; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + ARGBEGIN{ + }ARGEND; + + if(argc != 1) + usage(); + + if(!(utf8 = io·open(argv[0], "r"))) + panicf("can't open %s\n", argv[0]); + + /* by default each character maps to itself */ + for(i = 0; i < NumRunes; i++) { + table.toupper[i] = i; + table.tolower[i] = i; + table.totitle[i] = i; + } + + /* ensure all C local white space characters pass */ + table.isspace['\t'] = 1; + table.isspace['\n'] = 1; + table.isspace['\r'] = 1; + table.isspace['\f'] = 1; + table.isspace['\v'] = 1; + table.isspace[0x85] = 1; + + ec = -1; + // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + /* parse unicode range */ + codepointrange(utf8, field, &sc, &ec); + prop = field[Fcategory]; + + for(c = sc; c <= ec; c++){ + /* grab properties */ + switch(prop[0]){ + case 'L': + table.isalpha[c] = 1; + switch(prop[1]){ + case 'u': table.isupper[c] = 1; break; + case 'l': table.islower[c] = 1; break; + case 't': table.istitle[c] = 1; break; + case 'm': break; // modifier letters + case 'o': break; // ideograph letters + default: + goto badproperty; + } + break; + + case 'Z': + table.isspace[c] = 1; + break; + + case 'M': + table.ismark[c] = 1; + break; + + case 'N': + table.isdigit[c] = 1; + break; + + case 'P': + table.ispunct[c] = 1; + break; + + case 'S': + table.issymbl[c] = 1; + break; + + case 'C': + table.iscntrl[c] = 1; + break; + + default: badproperty: + panicf("unrecognized category '%s'", prop); + } + /* grab transformations */ + if(*field[Fupper]) + table.toupper[c] = codepoint(field[Fupper]); + if(*field[Flower]) + table.tolower[c] = codepoint(field[Flower]); + if(*field[Ftitle]) + table.totitle[c] = codepoint(field[Ftitle]); + } + } + io·close(utf8); + + putsearch(); + + makeis("space", table.isspace, 0, 1); + makeis("digit", table.isdigit, 0, 1); + makeis("alpha", table.isalpha, 0, 0); + makeis("upper", table.isupper, 1, 0); + makeis("lower", table.islower, 1, 0); + makeis("title", table.istitle, 1, 0); + makeis("punct", table.ispunct, 1, 0); + + mkto("upper", table.toupper, 1, 0); + mkto("lower", table.tolower, 1, 0); + mkto("title", table.totitle, 1, 0); + + return 0; +} diff --git a/src/base/utf/vendor/mkrunewidth.c b/src/base/utf/vendor/mkrunewidth.c new file mode 100644 index 0000000..14e6973 --- /dev/null +++ b/src/base/utf/vendor/mkrunewidth.c @@ -0,0 +1,325 @@ +#include "common.h" + +/* + * inspired by design choices in utf8proc/charwidths.jl + * all widths default to 1 unless they fall within the categories: + * 1. Mn 2. Mc 3. Me 4. Zl + * 5. Zp 6. Cc 7. Cf 8. Cs + * these default to zero width + */ +enum +{ + /* width ? */ + WidthNeutral, /* (N) practially treated like narrow but unclear ... */ + WidthAmbiguous, /* (A) sometimes wide and sometimes not... */ + /* width 1 */ + WidthHalf, /* (H) = to narrow (compatability equivalent) */ + WidthNarrow, /* (Na) ASCII width */ + /* width 2 */ + WidthWide, /* (W) 2x width */ + WidthFull, /* (F) = to wide (compatability equivalent) */ +}; + +struct Table +{ + char width[3][NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +void +parse_category(char *path) +{ + int sc, c, ec, w; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + if(!(utf8 = io·open(path, "r"))) + panicf("can't open %s\n", path); + + // NOTE: we don't check for comments here + ec = -1; + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + codepointrange(utf8, field, &sc, &ec); + + prop = field[Fcategory]; + + switch(prop[0]){ + case 'M': + switch(prop[1]){ + case 'n': case 'c': case 'e': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'Z': + switch(prop[1]){ + case 'l': case 'p': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'C': + switch(prop[1]){ + case 'c': case 'f': case 's': + w = 0; + break; + default: + w = 1; + break; + } + default: + w = 1; + } + + for(c = sc; c <= ec; c++) + table.width[w][c] = 1; + } + + io·close(utf8); +} + +static +void +coderange(char *field, int *l, int *r) +{ + char *s; + + if(!(s = strstr(field, ".."))) + *l=*r=codepoint(field); + else{ + *s++ = 0, *s++ = 0; + *l=codepoint(field); + *r=codepoint(s); + } +} + +static +void +parse_eawidths(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + switch(field[1][0]){ + case 'A': continue; + case 'N': + if(field[1][1] != 'a') + continue; + /* fallthrough */ + case 'H': w = 1; break; + + case 'W': /* fallthrough */ + case 'F': w = 2; break; + + default: + panicf("malformed east asian width class: %s\n", field[1]); + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + /* ensure it only exists in one table */ + table.width[w][c] = 1; + table.width[(w+1)%3][c] = 0; + table.width[(w+2)%3][c] = 0; + } + } + io·close(utf8); +} + +static +void +parse_emoji(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *s, *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + /* only override emoji presentation */ + if(!strstr(field[1], "Emoji_Presentation")) + continue; + + /* trim trailing space */ + for(s=field[0]; *s; s++){ + if(*s == ' ') + *s = 0; + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + table.width[0][c] = 0; + table.width[1][c] = 0; + table.width[2][c] = 1; + } + } + + io·close(utf8); +} + +/* output functions */ +static +void +maketable(char *label, char *table, int pairs, int onlyranges) +{ + int r, p=0, s=0; + char ident[3][128]; + + enum + { + Irange, + Ipair, + Isingle, + }; + + /* ranges */ + if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange])) + panicf("out of identifier space\n"); + r = putrange(ident[Irange], table, onlyranges); + + if(!onlyranges && pairs){ + if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair])) + panicf("out of identifier space\n"); + p = putpair(ident[Ipair], table); + } + if(!onlyranges){ + if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle])) + panicf("out of identifier space\n"); + + s = putsingle(ident[Isingle], table); + } + + printf( + "static int\n" + "is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(r){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + ident[Irange], ident[Irange]); + } + + if(p){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + ident[Ipair], ident[Ipair]); + } + + if(s) + printf( + " p = rangesearch(c, %s, arrlen(%s), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + ident[Isingle], ident[Isingle]); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n"); + exit(1); +} + +#define SETW0(c) \ + table.width[0][(c)] = 1, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 0; + +#define SETW1(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 1, \ + table.width[2][(c)] = 0; + +#define SETW2(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 1; + + +int +main(int argc, char *argv[]) +{ + int c; + + ARGBEGIN{ + }ARGEND; + + if(argc != 3) + usage(); + + parse_category(*argv++); + parse_eawidths(*argv++); + parse_emoji(*argv); + + /* overrides */ + SETW0(0x2028); + SETW0(0x2029); + + SETW1(0x00AD); + + /* simple checking */ + for(c=0; c<NumRunes; c++){ + if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1) + panicf("improper table state"); + } + + putsearch(); + + maketable("width0", table.width[0], 1, 0); + maketable("width1", table.width[1], 1, 0); + maketable("width2", table.width[2], 1, 0); + + puts( + "\n" + "int\n" + "utf8·runewidth(rune c)\n" + "{\n" + " if(iswidth1(c))\n" + " return 1;\n" + " if(iswidth2(c))\n" + " return 2;\n" + " return 0;\n" + "}" + ); +} |