40 files changed, 3719 insertions, 80 deletions
diff --git a/src/base/arg.c b/src/base/arg.c
index 269043e..64e4dd6 100644
--- a/src/base/arg.c
+++ b/src/base/arg.c
@@ -1,71 +1 @@
-#include <u.h>
-#include <base.h>
-
-// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN.
-
-#define UTFmax   4
-#define RuneSync 0x80u
-#define RuneSelf 0x80u
-#define RuneErr  0xFFFDu
-#define RuneMax  0x10FFFFu
-#define RuneMask 0x1FFFFFu
-
-#define Bit(i) (7-(i))
-/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
-#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
-/* 0000 0000 0000 0111 1111 1111 */
-#define	RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
-enum
-{
-    Bitx  = Bit(1),
-    Tx    = Tbyte(1),
-    Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
-
-    Maskx = (1 << Bitx) - 1, /* 0011 1111 */
-    Testx = Maskx ^ 0xff,    /* 1100 0000 */
-
-    SurrogateMin = 0xD800,
-    SurrogateMax = 0xDFFF,
-    Bad = RuneErr,
-};
-
-
-int
-arg·bytetorune(uint32* r, byte* s)
-{
-    int c[4], i;
-    uint32 l;
-
-    c[0] = *(ubyte*)(s);
-    if(c[0] < Tx) {
-        *r = c[0];
-        return 1;
-    }
-
-    l = c[0];
-    for(i = 1; i < UTFmax; i++) {
-        c[i]  = *(ubyte*)(s+i);
-        c[i] ^= Tx;
-        if (c[i] & Testx) goto bad;
-
-        l = (l << Bitx) | c[i];
-        if(c[0] < Tbyte(i + 2)) {
-            l &= RuneX(i + 1);
-            if (i == 1) {
-                if (c[0] < Tbyte(2) || l <= Rune1)
-                    goto bad;
-            } else if (l <= RuneX(i) || l > RuneMax)
-                goto bad;
-            if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
-                goto bad;
-
-            *r = l;
-            return i + 1;
-        }
-    }
-bad:
-    *r = RuneErr;
-    return 1;
-}
-
 char *argv0;
diff --git a/src/base/fmt/buffer.c b/src/base/fmt/buffer.c
new file mode 100644
index 0000000..0099e72
--- /dev/null
+++ b/src/base/fmt/buffer.c
@@ -0,0 +1,60 @@
+#include "internal.h"
+
+static int
+flush(fmt·State *io)
+{
+    int  n;
+    char *s;
+
+    void *heap = io->heap;
+    mem·Reallocator mem = io->mem;
+
+    if(!io->buffer.beg)
+        return 0;
+
+    n = 2*(uintptr)io->file;
+    s = io->buffer.beg;
+
+    io->buffer.beg = mem.realloc(heap, io->buffer.beg, n, 1);
+    if(!io->buffer.beg){
+        io->file = io->buffer.cur = io->buffer.end = nil;
+        mem.free(heap, s);
+        return 0;
+    }
+    io->file = (void*)(uintptr)n;
+    io->buffer.cur = io->buffer.beg + (io->buffer.cur - s);
+    io->buffer.end = io->buffer.beg + n - 1;
+
+    return 1;
+}
+
+int
+fmt·make(mem·Reallocator mem, void *heap, fmt·State *io)
+{
+    int n;
+
+    memset(io, 0, sizeof(*io));
+
+    n = 32;
+    io->buffer.beg = io->buffer.cur = mem.alloc(heap, n, 1);
+    if(!io->buffer.beg)
+        return -1;
+    io->buffer.end = io->buffer.beg + n - 1;
+
+    io->flush = flush;
+    io->file  = (void*)(uintptr)n;
+    io->n     = 0;
+
+    fmt·setlocale(io, nil, nil, nil);
+    return 0;
+}
+
+void
+fmt·free(fmt·State *io)
+{
+    void *heap = io->heap;
+    mem·Reallocator mem = io->mem;
+
+    mem.free(heap, io->buffer.beg);
+    io->buffer.beg = io->buffer.cur = io->buffer.end = nil;
+}
diff --git a/src/base/fmt/do.c b/src/base/fmt/do.c
new file mode 100644
index 0000000..bd2e65c
--- /dev/null
+++ b/src/base/fmt/do.c
@@ -0,0 +1,728 @@
+#include "internal.h"
+#include <arch/atomic.h>
+
+#define MaxFmt 128
+#define atomic·load(p) (*(p))
+
+// -----------------------------------------------------------------------
+// globals
+
+/* built in verbs */
+static int fmtflag(fmt·State *);
+static int fmtpercent(fmt·State *);
+static int fmtrune(fmt·State *);
+static int fmtfloat(fmt·State *);
+static int fmtutf8(fmt·State *);
+static int fmtint(fmt·State *);
+static int fmtchar(fmt·State *);
+static int fmtcount(fmt·State *);
+static int fmtstring(fmt·State *);
+static int fmterror(fmt·State *);
+
+static int badfmt(fmt·State *);
+
+static struct
+{
+    volatile int len;
+    Verb verb[MaxFmt];
+} formatter =
+{
+    30,
+    {
+        {' ', fmtflag},
+        {'#', fmtflag},
+        {'%', fmtpercent},
+        {'\'',fmtflag},
+        {'+', fmtflag},
+        {',', fmtflag},
+        {'-', fmtflag},
+        {'C', fmtrune},
+        {'E', fmtfloat},
+        {'F', fmtfloat},
+        {'G', fmtfloat},
+        {'L', fmtflag},
+        {'S', fmtutf8},
+        {'X', fmtint},
+        {'b', fmtint},
+        {'c', fmtchar},
+        {'d', fmtint},
+        {'e', fmtfloat},
+        {'f', fmtfloat},
+        {'g', fmtfloat},
+        {'h', fmtflag},
+        {'i', fmtint},
+        {'l', fmtflag},
+        {'n', fmtcount},
+        {'o', fmtint},
+        {'p', fmtint},
+        {'r', fmterror},
+        {'s', fmtstring},
+        {'U', fmtflag},
+        {'u', fmtint},
+        {'x', fmtint},
+    }
+};
+
+// -----------------------------------------------------------------------
+// internal functions
+
+static Formatter
+format(int c)
+{
+    Verb *v, *e;
+    e = &formatter.verb[atomic·load(&formatter.len)];
+    for(v=e; v > formatter.verb; --v){
+        if(v->c == c)
+            return v->fmt;
+    }
+
+    return badfmt;
+}
+
+static char *
+dispatch(fmt·State *io, char *fmt)
+{
+    rune r;
+    int i, n;
+
+    io->flag  = 0;
+    io->width = io->prec = 0;
+
+    /*
+     * the form of each print verb:
+     * % [flags] verb
+     *   + the verb is a single character
+     *   + each flag is either
+     *      - a single character
+     *      - a decimal numeric string
+     *      - up to 2 decimal strings can be used
+     *      - [width|*].[prec|*]
+     *      - if missing, set to 0
+     *      - if *, grab from varargs
+     */
+    for(;;){
+        fmt += utf8·decode(fmt, &r);
+        io->verb = r;
+        switch(r){
+        case 0:
+            return nil;
+        case '.':
+            io->flag |= fmt·Width|fmt·Prec;
+            continue;
+        case '0':
+            if(!(io->flag & fmt·Width)){
+                io->flag |= fmt·Zero;
+                continue;
+            }
+            /* fallthrough */
+        case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+            i = 0;
+            while('0' <= r && r <= '9'){
+                i = 10*i + (r-'0');
+                r = *fmt++;
+            }
+            fmt--;
+        number:
+            if(io->flag & fmt·Width){
+                io->flag |= fmt·Prec;
+                io->prec = i;
+            }else{
+                io->flag |= fmt·Width;
+                io->width = i;
+            }
+            continue;
+        case '*':
+            i = va_arg(io->args, int);
+            if(i < 0){
+                if(io->flag&fmt·Prec){
+                    io->flag &= ~fmt·Prec;
+                    io->prec = 0;
+                    continue;
+                }
+                i = -i;
+                io->flag |= fmt·Left;
+            }
+            goto number;
+        }
+        n = format(r)(io);
+        if(n < 0)
+            return nil;
+        if(!n)
+            return fmt;
+    }
+}
+
+static char *
+flush(fmt·State *io, char *b, int len)
+{
+    io->n += b - io->buffer.cur;
+    io->buffer.cur = b;
+    if(!io->flush || !(*io->flush)(io) || io->buffer.cur + len >= io->buffer.end) {
+        io->buffer.end = io->buffer.cur;
+        return nil;
+    }
+    return io->buffer.cur;
+}
+
+static int
+pad(fmt·State *io, int n)
+{
+    int i;
+    char *b=io->buffer.cur, *e=io->buffer.end;
+
+    for(i=0; i<n; i++){
+        if(b>=e){
+            if(!(b=flush(io, b, 1)))
+                return -1;
+            e = io->buffer.end;
+        }
+        *b++ = ' ';
+    }
+
+    io->n += b - io->buffer.cur;
+    io->buffer.cur = b;
+    return 0;
+}
+
+static int
+copy(fmt·State *io, char *m, int sz, int n)
+{
+    ulong f;
+    rune r;
+    int nc, w, nb;
+    char *b, *e, *me;
+
+    w  = 0;
+    f  = io->flag;
+    me = m + sz;
+
+    if(f&fmt·Width)
+        w = io->width;
+    if(f&fmt·Prec && n > io->prec)
+        n = io->prec;
+    if(!(f&fmt·Left) && pad(io, w-n)<0)
+        return -1;
+
+    b = io->buffer.cur;
+    e = io->buffer.end;
+
+    for(nc=n; nc>0; nc--){
+        r = *(uchar *)m;
+        if(utf8·onebyte(r)){
+            nb=1;
+            m++;
+        }else if((me-m) >= UTFmax || utf8·canfit(m, me-m)){
+            nb=utf8·decode(m, &r);
+            m+=n;
+        }else
+            break;
+
+        if(b+n>e){
+            if(!(b=flush(io, b, nb)))
+                return -1;
+            e = io->buffer.end;
+        }
+        b += utf8·encode(&r, b);
+    }
+
+    io->n += b - io->buffer.cur;
+    io->buffer.cur = b;
+    if(f&fmt·Left && pad(io, w-n)<0)
+        return -1;
+
+    return 0;
+}
+
+static int
+copyrune(fmt·State *io, rune *m, int n)
+{
+    ulong f;
+    rune r, *me;
+    int w, nb;
+    char *b, *e;
+
+    w  = 0;
+    f  = io->flag;
+
+    if(f&fmt·Width)
+        w = io->width;
+    if(f&fmt·Prec && n > io->prec)
+        n = io->prec;
+
+    if(!(f&fmt·Left) && pad(io, w-n)<0)
+        return -1;
+
+    b = io->buffer.cur;
+    e = io->buffer.end;
+
+    for(me=m+n; m < me; m++){
+        r  = *m;
+        nb = utf8·runelen(r);
+        if(b + nb > e){
+            if(!(b=flush(io, b, nb)))
+                return -1;
+            e = io->buffer.end;
+        }
+        b += utf8·encode(&r, b);
+    }
+
+    io->n += b - io->buffer.cur;
+    io->buffer.cur = b;
+    if(f&fmt·Left && pad(io, w-n)<0)
+        return -1;
+
+    return 0;
+}
+
+static int
+copystring(fmt·State *io, char *s)
+{
+    rune r;
+    int i,j;
+
+    if(!s)
+        return copy(io, "<nil>", 5, 5);
+
+    if(io->flag&fmt·Prec){
+        i = 0;
+        for(j=0; j < io->prec && s[i]; j++)
+            i += utf8·decode(s+i, &r);
+
+        return copy(io, s, i, j);
+    }
+    return copy(io, s, strlen(s), utf8·len(s));
+}
+
+static int
+copyutf8(fmt·State *io, rune *s)
+{
+    rune *e;
+    int n,p;
+
+    if(!s)
+        return copy(io, "<nil>", 5, 5);
+
+    if(io->flag & fmt·Prec){
+        p = io->prec;
+        for(n=0; n<p; n++)
+            if(!s[n])
+                break;
+    }else{
+        for(e=s; *e; e++)
+            ;
+        n = e - s;
+    }
+
+    return copyrune(io, s, n);
+}
+
+// -----------------------------------------------------------------------
+// format helpers
+
+static int
+needseperate(int *digits, char **groups)
+{
+    int group;
+
+    (*digits)++;
+    group = *(uchar *)*groups;
+
+    if(group == 0xFF || group == 0x7f || group == 0x00)
+        return 0;
+    if(*digits > group){
+        if((*groups)[1] != 0)
+            (*groups)++;
+        *digits = 1;
+        return 1;
+    }
+    return 0;
+}
+
+// -----------------------------------------------------------------------
+// formatters
+
+static int
+fmtchar(fmt·State *io)
+{
+    char x[1];
+    x[0] = va_arg(io->args, int);
+    io->prec = 1;
+
+    return copy(io, x, 1, 1);
+}
+
+static int
+fmtstring(fmt·State *io)
+{
+    char *s;
+    s = va_arg(io->args, char *);
+    return copystring(io, s);
+}
+
+static int
+fmterror(fmt·State *io)
+{
+    char *s;
+    s = strerror(errno);
+    return copystring(io, s);
+}
+
+static int
+fmtrune(fmt·State *io)
+{
+    rune x[1];
+
+    x[0] = va_arg(io->args, int);
+    return copyrune(io, x, 1);
+}
+
+static int
+fmtutf8(fmt·State *io)
+{
+    rune *s;
+
+    s = va_arg(io->args, rune *);
+    return copyutf8(io, s);
+}
+
+static int
+fmtpercent(fmt·State *io)
+{
+    rune x[1];
+
+    x[0] = io->verb;
+    io->prec = 1;
+    return copyrune(io, x, 1);
+}
+
+static int
+fmtint(fmt·State *io)
+{
+    union{
+        ulong  u;
+        uvlong v;
+    } val;
+    int neg, base, i, n, f, w, isv;
+    int digits, bytes, runes, excess;
+    char *groups, *thousands;
+    char *p, *conv, buf[140];
+
+    f = io->flag;
+    neg   = 0;
+    isv   = 0;
+    val.u = 0;
+
+    switch(io->verb){
+    case 'o': case 'p': case 'u': case 'x': case 'X':
+        f |= fmt·Unsigned;
+        f &= ~(fmt·Sign|fmt·Space);
+    }
+
+    /* set flags */
+    if(io->verb=='p'){
+        val.u = (ulong)va_arg(io->args, void*);
+        io->verb = 'x';
+        f |= fmt·Unsigned;
+    }else if(f&fmt·Vlong){
+        isv=1;
+        if(f&fmt·Unsigned)
+            val.v = va_arg(io->args, uvlong);
+        else
+            val.v = va_arg(io->args, vlong);
+    }else if(f&fmt·Long){
+        if(f&fmt·Unsigned)
+            val.u = va_arg(io->args, ulong);
+        else
+            val.u = va_arg(io->args, long);
+    }else if(f&fmt·Byte){
+        if(f&fmt·Unsigned)
+            val.u = (uchar)va_arg(io->args, int);
+        else
+            val.u = (char)va_arg(io->args, int);
+    }else if(f&fmt·Short){
+        if(f&fmt·Unsigned)
+            val.u = (ushort)va_arg(io->args, int);
+        else
+            val.u = (short)va_arg(io->args, int);
+    }else{
+        if(f&fmt·Unsigned)
+            val.u = va_arg(io->args, uint);
+        else
+            val.u = va_arg(io->args, int);
+    }
+
+    conv = "0123456789abcdef";
+    groups = "\4";
+    thousands = io->thousands;
+    /* get base */
+    switch(io->verb){
+    case 'd': case 'i': case 'u':
+        base = 10;
+        groups = io->groups;
+        break;
+    case 'X':
+        conv = "0123456789ABCDEF";
+        /*fallthrough*/
+    case 'x':
+        base = 16;
+        thousands = ":";
+        break;
+    case 'b':
+        base = 2;
+        thousands = ":";
+        break;
+    case 'o':
+        base = 8;
+        break;
+    default:
+        return -1;
+    }
+
+    /* check for negativity */
+    if(!(f&fmt·Unsigned)){
+        if(isv && (vlong)val.v < 0){
+            val.v = -(vlong)val.v;
+            neg = 1;
+        }else if(!isv && (long)val.u < 0){
+            val.u = -(long)val.u;
+            neg = 1;
+        }
+    }
+
+    p = buf + sizeof(buf) - 1;
+    n = 0;
+    digits = 0;
+    excess = 0;
+    runes = utf8·len(thousands);
+    bytes = strlen(thousands);
+
+#define PARSE(VALUE)                                          \
+    while((VALUE)){                                           \
+        i = (VALUE) % base;                                   \
+        (VALUE) /= base;                                      \
+        if((f&fmt·Comma) && n%4 == 3){                        \
+            *p-- = ',';                                       \
+            n++;                                              \
+        }                                                     \
+        if((f&fmt·Apost) && needseperate(&digits, &groups)){  \
+            n += runes;                                       \
+            excess += bytes - runes;                          \
+            p -= bytes;                                       \
+            memmove(p+1, thousands, bytes);                   \
+        }                                                     \
+        *p-- = conv[i];                                       \
+        n++;                                                  \
+    }
+    if(isv)
+        PARSE(val.v)
+    else
+        PARSE(val.u)
+#undef PARSE
+
+    if(!n){
+        if(!(f&fmt·Prec) || io->prec != 0 || (io->verb == 'o' && (f&fmt·Sharp))){
+            *p-- = '0';
+            n = 1;
+            if(f&fmt·Apost)
+                needseperate(&digits,&groups);
+        }
+
+        if(io->verb == 'x' || io->verb == 'X')
+            f &= ~fmt·Sharp;
+    }
+
+    for(w = io->prec; n < w && p > buf+3; n++){
+        if((f&fmt·Apost) && needseperate(&digits, &groups)){
+            n += runes;
+            excess += bytes - runes;
+            p -= bytes;
+            memmove(p+1, thousands, bytes);
+        }
+        *p-- = '0';
+    }
+
+    if(neg || (f&(fmt·Sign|fmt·Space)))
+        n++;
+
+    if(f&fmt·Sharp){
+        if(base==16)
+            n += 2;
+        else if(base == 8){
+            if(p[1] == '0')
+                f &= ~fmt·Sharp;
+            else
+                n++;
+        }
+    }
+
+    if(f&fmt·Zero && !(f & (fmt·Left|fmt·Prec))){
+        w = 0;
+        if(f & fmt·Width)
+            w = io->width;
+        for(; n < w && p > buf+3; n++){
+            if((f & fmt·Apost) && needseperate(&digits, &groups)){
+                n += runes;
+                excess += bytes - runes;
+                p -= bytes;
+                memmove(p+1, thousands, bytes);
+            }
+            *p-- = '0';
+        }
+        io->flag &= ~fmt·Width;
+    }
+
+    if(f&fmt·Sharp){
+        if(base==16)
+            *p-- = io->verb;
+        if(base==16 || base == 8)
+            *p-- = '0';
+    }
+
+    if(neg)
+        *p-- = '-';
+    else if(f & fmt·Sign)
+        *p-- = '+';
+    else if (f & fmt·Space)
+        *p-- = ' ';
+
+    io->flag &= ~fmt·Prec;
+    return copy(io, p+1, n+excess, n);
+}
+
+static int
+fmtcount(fmt·State *io)
+{
+    void *p;
+    ulong f;
+
+    f = io->flag;
+    p = va_arg(io->args, void*);
+
+    if(f&fmt·Vlong)
+        *(vlong*)p = io->n;
+    else if(f&fmt·Long)
+        *(long*)p = io->n;
+    else if(f&fmt·Byte)
+        *(char*)p = io->n;
+    else if(f&fmt·Short)
+        *(short*)p = io->n;
+    else
+        *(int*)p = io->n;
+
+    return 0;
+}
+
+static int
+fmtflag(fmt·State *io)
+{
+    switch(io->verb){
+    case ',':  io->flag |= fmt·Comma;    break;
+    case '-':  io->flag |= fmt·Left;     break;
+    case '+':  io->flag |= fmt·Sign;     break;
+    case '#':  io->flag |= fmt·Sharp;    break;
+    case '\'': io->flag |= fmt·Apost;    break;
+    case ' ':  io->flag |= fmt·Space;    break;
+    case 'u':  io->flag |= fmt·Unsigned; break;
+    case 'L':  io->flag |= fmt·Ldouble;  break;
+    case 'h':
+        if(io->flag&fmt·Short)
+            io->flag |= fmt·Byte;
+        io->flag |= fmt·Short;
+        break;
+    case 'l':
+        if(io->flag&fmt·Long)
+            io->flag |= fmt·Vlong;
+        io->flag |= fmt·Long;
+        break;
+    }
+    return 1;
+}
+
+static int
+badfmt(fmt·State *io)
+{
+    int n;
+    char x[UTFmax+2];
+
+    x[0] = '%';
+    n = 1 + utf8·encode(&io->verb, x+1);
+    x[n++] = '%';
+    io->prec = n;
+    copy(io, x, n, n);
+
+    return 0;
+}
+
+#include "float.c"
+
+// -----------------------------------------------------------------------
+// exports
+
+int
+fmt·do(fmt·State *io, char *fmt)
+{
+    rune r;
+    int  c, n;
+    char *b, *e;
+
+    for(;;){
+        b = io->buffer.cur;
+        e = io->buffer.end;
+        while((c = *(uchar *)fmt) && c != '%'){
+            if(utf8·onebyte(c)){
+                if(b >= e){
+                    if(!(b=flush(io, b, 1)))
+                        return -1;
+                    e = io->buffer.end;
+                }
+                *b++ = *fmt++;
+            }else{
+                n = utf8·decode(fmt, &r);
+                if(b + n > e){
+                    if(!(b=flush(io, b, n)))
+                        return -1;
+                    e = io->buffer.end;
+                }
+                while(n--)
+                    *b++ = *fmt++;
+            }
+        }
+        fmt++;
+        io->n += b - io->buffer.cur;
+        io->buffer.cur = b;
+        if(!c) /* we hit our nul terminator */
+            return io->n - n;
+        io->buffer.end = e;
+
+        if(!(fmt=dispatch(io, fmt)))
+            return -1;
+    }
+}
+
+int
+fmt·install(int verb, Formatter func)
+{
+    Verb *v;
+    int i, ret;
+
+lock:
+    if(verb <= 0 || verb >= 65536){
+        ret = -1;
+        goto unlock;
+    }
+    if(!func)
+        func = badfmt;
+
+    if((i = atomic·load(&formatter.len))==MaxFmt)
+        return -1;
+
+    v = &formatter.verb[i];
+    v->c   = verb;
+    v->fmt = func;
+
+    atomic·store(&formatter.len, i+1);
+    ret = 0;
+unlock:
+    return ret;
+}
diff --git a/src/base/fmt/esprint.c b/src/base/fmt/esprint.c
new file mode 100644
index 0000000..6d97340
--- /dev/null
+++ b/src/base/fmt/esprint.c
@@ -0,0 +1,14 @@
+#include "internal.h"
+
+char *
+fmt·esprint(char *buf, char *end, char *fmt, ...)
+{
+    char *p;
+    va_list args;
+
+    va_start(args, fmt);
+    p = fmt·vesprint(buf, end, fmt, args);
+    va_end(args);
+
+    return p;
+}
diff --git a/src/base/fmt/float.c b/src/base/fmt/float.c
new file mode 100644
index 0000000..63ea80f
--- /dev/null
+++ b/src/base/fmt/float.c
@@ -0,0 +1,1077 @@
+#define FDIGIT  30
+#define FDEFLT  6
+#define NSIGNIF 17
+
+static uvlong uvnan    = ((uvlong)0x7FF00000<<32)|0x00000001;
+static uvlong uvinf    = ((uvlong)0x7FF00000<<32)|0x00000000;
+static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000;
+
+static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" };
+
+static int
+isNaN(double val)
+{
+    union{
+        uvlong i;
+        double f;
+    }x;
+
+    x.f = val;
+    return (x.i&uvinf) == uvinf && (x.i&~uvneginf) != 0;
+}
+
+static double
+NaN(void)
+{
+    union{
+        uvlong i;
+        double f;
+    }x;
+    x.i = uvnan;
+    return x.f;
+}
+
+static int
+isInf(double val, int sign)
+{
+    union{
+        uvlong i;
+        double f;
+    }x;
+
+    x.f = val;
+    if(sign == 0)
+        return x.i == uvinf || x.i == uvneginf;
+    else if(sign == 1)
+        return x.i == uvinf;
+    else
+        return x.i == uvneginf;
+}
+
+static double pows10[] =
+{
+      1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,   1e8,   1e9,
+     1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
+     1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,
+     1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,
+     1e40,  1e41,  1e42,  1e43,  1e44,  1e45,  1e46,  1e47,  1e48,  1e49,
+     1e50,  1e51,  1e52,  1e53,  1e54,  1e55,  1e56,  1e57,  1e58,  1e59,
+     1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,  1e68,  1e69,
+     1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76,  1e77,  1e78,  1e79,
+     1e80,  1e81,  1e82,  1e83,  1e84,  1e85,  1e86,  1e87,  1e88,  1e89,
+     1e90,  1e91,  1e92,  1e93,  1e94,  1e95,  1e96,  1e97,  1e98,  1e99,
+    1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
+    1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
+    1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
+    1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
+    1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
+    1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
+};
+
+static double
+fpow10(int n)
+{
+    double d;
+    int neg;
+
+    neg = 0;
+    if(n < 0){
+        neg = 1;
+        n = -n;
+    }
+
+    if(n<arrlen(pows10))
+        d = pows10[n];
+    else{
+        d = pows10[arrlen(pows10)-1];
+        for(;;){
+            n -= arrlen(pows10)- 1;
+            if(n < arrlen(pows10)){
+                d *= pows10[n];
+                break;
+            }
+            d *= pows10[arrlen(pows10)- 1];
+        }
+    }
+    if(neg)
+        return 1./d;
+    return d;
+}
+
+static int
+add1(char *a, int n)
+{
+    int c;
+    char *b;
+
+    if(n < 0 || n > NSIGNIF)
+        return 0;
+
+    for(b = a+n-1; b >= a; b--){
+        c = *b + 1;
+        if(c <= '9'){
+            *b = c;
+            return 0;
+        }
+        *b = '0';
+    }
+    /*
+     * need to overflow adding digit.
+     * shift number down and insert 1 at beginning.
+     * decimal is known to be 0s or we wouldn't
+     * have gotten this far.  (e.g., 99999+1 => 00000)
+     */
+    a[0] = '1';
+    return 1;
+}
+
+static int
+sub1(char *a, int n)
+{
+    int c;
+    char *b;
+
+    if(n < 0 || n > NSIGNIF)
+        return 0;
+    for(b = a+n-1; b >= a; b--){
+        c = *b - 1;
+        if(c >= '0'){
+            if(c == '0' && b == a){
+                /*
+                 * just zeroed the top digit; shift everyone up.
+                 * decimal is known to be 9s or we wouldn't
+                 * have gotten this far.  (e.g., 10000-1 => 09999)
+                 */
+                *b = '9';
+                return 1;
+            }
+            *b = c;
+            return 0;
+        }
+        *b = '9';
+    }
+    /*
+     * can't get here.  the number a is always normalized
+     * so that it has a nonzero first digit.
+     */
+    abort();
+}
+
+// -----------------------------------------------------------------------
+// strtod
+
+#define Nbits 28
+#define Nmant 53
+#define Prec ((Nmant+Nbits+1)/Nbits)
+
+#define Sigbit (1<<(Prec*Nbits-Nmant)) /* first significant bit of Prec-th word */
+#define Ndig 1500
+#define One  (ulong)(1<<Nbits)
+#define Half (ulong)(One>>1)
+#define Maxe 310
+
+#define Fsign   (1<<0)  /* found -  */
+#define Fesign  (1<<1)  /* found e- */
+#define Fdpoint (1<<2)  /* found .  */
+
+#define S0  0 /* _  _S0 +S1 #S2 .S3 */
+#define S1  1 /* _+  #S2 .S3 */
+#define S2  2 /* _+#  #S2 .S4 eS5 */
+#define S3  3 /* _+.  #S4 */
+#define S4  4 /* _+#.# #S4 eS5 */
+#define S5  5 /* _+#.#e +S6 #S7 */
+#define S6  6 /* _+#.#e+ #S7 */
+#define S7  7 /* _+#.#e+# #S7 */
+
+typedef struct Tab Tab;
+struct Tab
+{
+    int bp;
+    int siz;
+    char *cmp;
+};
+
+static ulong
+umuldiv(ulong a, ulong b, ulong c)
+{
+    double d;
+
+    d = ((double)a * (double)b) / (double)c;
+    if(d >= 4294967295.)
+        d = 4294967295.;
+    return (ulong)d;
+}
+
+static void
+frnorm(ulong *f)
+{
+    int i, c;
+
+    c = 0;
+    for(i=Prec-1; i>0; i--) {
+        f[i] += c;
+        c = f[i] >> Nbits;
+        f[i] &= One-1;
+    }
+    f[0] += c;
+}
+
+static int
+fpcmp(char *a, ulong* f)
+{
+    ulong tf[Prec];
+    int i, d, c;
+
+    for(i=0; i<Prec; i++)
+        tf[i] = f[i];
+
+    for(;;) {
+        /* tf *= 10 */
+        for(i=0; i<Prec; i++)
+            tf[i] = tf[i]*10;
+        frnorm(tf);
+        d = (tf[0] >> Nbits) + '0';
+        tf[0] &= One-1;
+
+        /* compare next digit */
+        c = *a;
+        if(c == 0) {
+            if('0' < d)
+                return -1;
+            if(tf[0] != 0)
+                goto cont;
+            for(i=1; i<Prec; i++)
+                if(tf[i] != 0)
+                    goto cont;
+            return 0;
+        }
+        if(c > d)
+            return +1;
+        if(c < d)
+            return -1;
+        a++;
+    cont:;
+}
+}
+
+static void
+divby(char *a, int *na, int b)
+{
+    int n, c;
+    char *p;
+
+    p = a;
+    n = 0;
+    while(n>>b == 0){
+        c = *a++;
+        if(c == 0) {
+            while(n) {
+                c = n*10;
+                if(c>>b)
+                    break;
+                n = c;
+            }
+            goto xx;
+        }
+        n = n*10 + c-'0';
+        (*na)--;
+    }
+    for(;;){
+        c = n>>b;
+        n -= c<<b;
+        *p++ = c + '0';
+        c = *a++;
+        if(c == 0)
+            break;
+        n = n*10 + c-'0';
+    }
+    (*na)++;
+    xx:
+    while(n){
+        n = n*10;
+        c = n>>b;
+        n -= c<<b;
+        *p++ = c + '0';
+        (*na)++;
+    }
+    *p = 0;
+}
+
+static Tab tab1[] =
+{
+     1,  0, "",
+     3,  1, "7",
+     6,  2, "63",
+     9,  3, "511",
+    13,  4, "8191",
+    16,  5, "65535",
+    19,  6, "524287",
+    23,  7, "8388607",
+    26,  8, "67108863",
+    27,  9, "134217727",
+};
+
+static void
+divascii(char *a, int *na, int *dp, int *bp)
+{
+    int b, d;
+    Tab *t;
+
+    d = *dp;
+    if(d >= (int)(arrlen(tab1)))
+        d = (int)(arrlen(tab1))-1;
+    t = tab1 + d;
+    b = t->bp;
+    if(memcmp(a, t->cmp, t->siz) > 0)
+        d--;
+    *dp -= d;
+    *bp += b;
+    divby(a, na, b);
+}
+
+static void
+mulby(char *a, char *p, char *q, int b)
+{
+    int n, c;
+
+    n = 0;
+    *p = 0;
+    for(;;) {
+        q--;
+        if(q < a)
+            break;
+        c = *q - '0';
+        c = (c<<b) + n;
+        n = c/10;
+        c -= n*10;
+        p--;
+        *p = c + '0';
+    }
+    while(n) {
+        c = n;
+        n = c/10;
+        c -= n*10;
+        p--;
+        *p = c + '0';
+    }
+}
+
+static Tab tab2[] =
+{
+     1,  1, "",				/* dp = 0-0 */
+     3,  3, "125",
+     6,  5, "15625",
+     9,  7, "1953125",
+    13, 10, "1220703125",
+    16, 12, "152587890625",
+    19, 14, "19073486328125",
+    23, 17, "11920928955078125",
+    26, 19, "1490116119384765625",
+    27, 19, "7450580596923828125",		/* dp 8-9 */
+};
+
+static void
+mulascii(char *a, int *na, int *dp, int *bp)
+{
+    char *p;
+    int d, b;
+    Tab *t;
+
+    d = -*dp;
+    if(d >= (int)(arrlen(tab2)))
+        d = (int)(arrlen(tab2))-1;
+    t = tab2 + d;
+    b = t->bp;
+    if(memcmp(a, t->cmp, t->siz) < 0)
+        d--;
+    p = a + *na;
+    *bp -= b;
+    *dp += d;
+    *na += d;
+    mulby(a, p+d, p, b);
+}
+
+static int
+cmp(char *a, char *b)
+{
+    int c1, c2;
+
+    while((c1 = *b++) != '\0') {
+        c2 = *a++;
+        if(isupper(c2))
+            c2 = tolower(c2);
+        if(c1 != c2)
+            return 1;
+    }
+    return 0;
+}
+
+double
+fmtstrtod(char *as, char **aas)
+{
+    int na, ex, dp, bp, c, i, flag, state;
+    ulong low[Prec], hig[Prec], mid[Prec];
+    double d;
+    char *s, a[Ndig];
+
+    flag = 0;      /* Fsign, Fesign, Fdpoint */
+    na = 0;        /* number of digits of a[] */
+    dp = 0;        /* na of decimal point */
+    ex = 0;        /* exonent */
+
+    state = S0;
+    for(s=as;;s++){
+        c = *s;
+        if('0' <= c && c <= '9'){
+            switch(state){
+            case S0: case S1: case S2:
+                state = S2;
+                break;
+            case S3: case S4:
+                state = S4;
+                break;
+            case S5: case S6: case S7:
+                state = S7;
+                ex = ex*10 + (c-'0');
+                continue;
+            }
+
+            if(na == 0 && c == '0'){
+                dp--;
+                continue;
+            }
+            if(na < Ndig-50)
+                a[na++] = c;
+            continue;
+        }
+        switch(c){
+        case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+            if(state == S0)
+                continue;
+            break;
+        case '-':
+            if(state == S0)
+                flag |= Fsign;
+            else
+                flag |= Fesign;
+        case '+':
+            if(state == S0)
+                state = S1;
+            else
+            if(state == S5)
+                state = S6;
+            else
+                break;    /* syntax */
+            continue;
+        case '.':
+            flag |= Fdpoint;
+            dp = na;
+            if(state == S0 || state == S1){
+                state = S3;
+                continue;
+            }
+            if(state == S2){
+                state = S4;
+                continue;
+            }
+            break;
+        case 'e': case 'E':
+            if(state == S2 || state == S4){
+                state = S5;
+                continue;
+            }
+            break;
+        }
+        break;
+    }
+
+    /* clean up return char-pointer */
+    switch(state) {
+    case S0:
+        if(cmp(s, "nan") == 0){
+            if(aas != nil)
+                *aas = s+3;
+            goto retnan;
+        }
+    case S1:
+        if(cmp(s, "infinity") == 0){
+            if(aas != nil)
+                *aas = s+8;
+            goto retinf;
+        }
+        if(cmp(s, "inf") == 0){
+            if(aas != nil)
+                *aas = s+3;
+            goto retinf;
+        }
+    case S3:
+        if(aas != nil)
+            *aas = as;
+        goto ret0;   /* no digits found */
+    case S6:
+        s--;        /* back over +- */
+    case S5:
+        s--;        /* back over e */
+        break;
+    }
+    if(aas != nil)
+        *aas = s;
+
+    if(flag & Fdpoint)
+    while(na > 0 && a[na-1] == '0')
+        na--;
+    if(na == 0)
+        goto ret0;    /* zero */
+    a[na] = 0;
+    if(!(flag & Fdpoint))
+        dp = na;
+    if(flag & Fesign)
+        ex = -ex;
+    dp += ex;
+    if(dp < -Maxe){
+        errno = ERANGE;
+        goto ret0;      /* underflow by exp */
+    } else
+    if(dp > +Maxe)
+        goto retinf;    /* overflow by exp */
+
+    /*
+     * normalize the decimal ascii number
+     * to range .[5-9][0-9]* e0
+     */
+    bp = 0;        /* binary exponent */
+    while(dp > 0)
+        divascii(a, &na, &dp, &bp);
+    while(dp < 0 || a[0] < '5')
+        mulascii(a, &na, &dp, &bp);
+
+    /* close approx by naive conversion */
+    mid[0] = 0;
+    mid[1] = 1;
+    for(i=0; (c=a[i]) != '\0'; i++) {
+        mid[0] = mid[0]*10 + (c-'0');
+        mid[1] = mid[1]*10;
+        if(i >= 8)
+            break;
+    }
+    low[0] = umuldiv(mid[0], One, mid[1]);
+    hig[0] = umuldiv(mid[0]+1, One, mid[1]);
+    for(i=1; i<Prec; i++) {
+        low[i] = 0;
+        hig[i] = One-1;
+    }
+
+    /* binary search for closest mantissa */
+    for(;;) {
+        /* mid = (hig + low) / 2 */
+        c = 0;
+        for(i=0; i<Prec; i++) {
+            mid[i] = hig[i] + low[i];
+            if(c)
+                mid[i] += One;
+            c = mid[i] & 1;
+            mid[i] >>= 1;
+        }
+        frnorm(mid);
+
+        /* compare */
+        c = fpcmp(a, mid);
+        if(c > 0) {
+            c = 1;
+            for(i=0; i<Prec; i++)
+                if(low[i] != mid[i]) {
+                    c = 0;
+                    low[i] = mid[i];
+                }
+            if(c)
+                break;    /* between mid and hig */
+            continue;
+        }
+        if(c < 0) {
+            for(i=0; i<Prec; i++)
+                hig[i] = mid[i];
+            continue;
+        }
+
+        /* only hard part is if even/odd roundings wants to go up */
+        c = mid[Prec-1] & (Sigbit-1);
+        if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
+            mid[Prec-1] -= c;
+        break;    /* exactly mid */
+    }
+
+    /* normal rounding applies */
+    c = mid[Prec-1] & (Sigbit-1);
+    mid[Prec-1] -= c;
+    if(c >= Sigbit/2) {
+        mid[Prec-1] += Sigbit;
+        frnorm(mid);
+    }
+    goto out;
+
+ret0:
+    return 0;
+
+retnan:
+    return NaN();
+
+retinf:
+    /* Unix strtod requires these.  Plan 9 would return Inf(0) or Inf(-1). */
+    errno = ERANGE;
+    if(flag & Fsign)
+        return -HUGE_VAL;
+    return HUGE_VAL;
+
+out:
+    d = 0;
+    for(i=0; i<Prec; i++)
+        d = d*One + mid[i];
+    if(flag & Fsign)
+        d = -d;
+    d = ldexp(d, bp - Prec*Nbits);
+    if(d == 0)  /* underflow */
+        errno = ERANGE;
+
+    return d;
+}
+
+#undef Nbits
+#undef Nmant
+#undef Prec
+
+#undef Sigbit
+#undef Ndig
+#undef One
+#undef Half
+#undef Maxe
+
+#undef Fsign
+#undef Fesign
+#undef Fdpoint
+
+#undef S0
+#undef S1
+#undef S2
+#undef S3
+#undef S4
+#undef S5
+#undef S6
+#undef S7
+
+static void
+fmtexp(char *p, int e, int ucase)
+{
+    int i;
+    char se[9];
+
+    *p++ = ucase ? 'E' : 'e';
+    if(e < 0){
+        *p++ = '-';
+        e = -e;
+    }else
+        *p++ = '+';
+
+    i = 0;
+    while(e){
+        se[i++] = e % 10 + '0';
+        e /= 10;
+    }
+
+    while(i < 2)
+        se[i++] = '0';
+    while(i > 0)
+        *p++ = se[--i];
+
+    *p++ = '\0';
+}
+
+/*
+ * compute decimal integer m, exp such that:
+ * f = m*10^exp
+ * m is as short as possible with losing exactness
+ * assumes special cases (NaN, +Inf, -Inf) have been handled.
+ */
+static void
+dtoa(double f, char *s, int *exp, int *neg, int *len)
+{
+    int c, d, e2, e, ee, i, ndigit, oerrno;
+    char buf[NSIGNIF+10];
+    double g;
+
+    oerrno = errno;
+
+    *neg = 0;
+    if(f < 0){
+        f = -f;
+        *neg = 1;
+    }
+
+    if(f == 0){
+        *exp = 0;
+        s[0] = '0';
+        s[1] = 0;
+        *len = 1;
+        return;
+    }
+
+    frexp(f, &e2);
+    e = (int)(e2 * .301029995664);
+    g = f * fpow10(-e);
+    while(g < 1) {
+        e--;
+        g = f * fpow10(-e);
+    }
+    while(g >= 10){
+        e++;
+        g = f * fpow10(-e);
+    }
+
+    /* convert nsignif digits as a first approximation */
+    for(i=0; i<NSIGNIF; i++){
+        d = (int)g;
+        s[i] = d+'0';
+        g = (g-d)*10;
+    }
+    s[i] = 0;
+
+    e -= NSIGNIF-1;
+    fmtexp(s+NSIGNIF, e, 0);
+
+    for(i=0; i<10; i++) {
+        g=fmtstrtod(s, nil);
+        if(f > g) {
+            if(add1(s, NSIGNIF)){
+                /* gained a digit */
+                e--;
+                fmtexp(s+NSIGNIF, e, 0);
+            }
+            continue;
+        }
+        if(f < g){
+            if(sub1(s, NSIGNIF)){
+                /* lost a digit */
+                e++;
+                fmtexp(s+NSIGNIF, e, 0);
+            }
+            continue;
+        }
+        break;
+    }
+
+    /*
+     * bump last few digits down to 0 as we can.
+     */
+    for(i=NSIGNIF-1; i>=NSIGNIF-3; i--){
+        c = s[i];
+        if(c != '0'){
+            s[i] = '0';
+            g=fmtstrtod(s, nil);
+            if(g != f){
+                s[i] = c;
+                break;
+            }
+        }
+    }
+
+    /*
+     * remove trailing zeros.
+     */
+    ndigit = NSIGNIF;
+    while(ndigit > 1 && s[ndigit-1] == '0'){
+        e++;
+        --ndigit;
+    }
+    s[ndigit] = 0;
+    *exp = e;
+    *len = ndigit;
+
+    errno = oerrno;
+}
+
+
+static int
+fmtfloat(fmt·State *io)
+{
+    char buf[NSIGNIF+10], *dot, *digits, *p, *end, suf[10], *cur;
+    double val;
+    int c, verb, ndot, e, exp, f, ndigits, neg, newndigits;
+    int npad, pt, prec, realverb, sign, nsuf, ucase, n, z1, z2;
+
+    if(io->flag&fmt·Long)
+        val = va_arg(io->args, long double);
+    else
+        val = va_arg(io->args, double);
+
+    /* extract formatting flags */
+    f = io->flag;
+    io->flag = 0;
+    prec = FDEFLT;
+    if(f & fmt·Prec)
+        prec = io->prec;
+
+    verb = io->verb;
+    ucase = 0;
+    switch(verb) {
+    case 'A':
+    case 'E':
+    case 'F':
+    case 'G':
+        verb += 'a'-'A';
+        ucase = 1;
+        break;
+    }
+
+    /* pick off special numbers. */
+    if(isNaN(val)) {
+        end = special[0+ucase];
+    special:
+        io->flag = f & (fmt·Width|fmt·Left);
+        return copy(io, end, strlen(end), strlen(end));
+    }
+    if(isInf(val, 1)) {
+        end = special[2+ucase];
+        goto special;
+    }
+    if(isInf(val, -1)) {
+        end = special[4+ucase];
+        goto special;
+    }
+
+    /* get exact representation. */
+    digits = buf;
+    dtoa(val, digits, &exp, &neg, &ndigits);
+
+    /* get locale's decimal point. */
+    dot = io->decimal;
+    if(dot == nil)
+        dot = ".";
+    ndot = utf8·len(dot);
+
+    /*
+     * now the formatting fun begins.
+     * compute parameters for actual fmt:
+     *
+     *    pad: number of spaces to insert before/after field.
+     *    z1: number of zeros to insert before digits
+     *    z2: number of zeros to insert after digits
+     *    point: number of digits to print before decimal point
+     *    ndigits: number of digits to use from digits[]
+     *    suf: trailing suffix, like "e-5"
+     */
+    realverb = verb;
+    switch(verb){
+    case 'g':
+        /* convert to at most prec significant digits. (prec=0 means 1) */
+        if(prec == 0)
+            prec = 1;
+        if(ndigits > prec) {
+            if(digits[prec] >= '5' && add1(digits, prec))
+                exp++;
+            exp += ndigits-prec;
+            ndigits = prec;
+        }
+
+        /*
+         * extra rules for %g (implemented below):
+         *    trailing zeros removed after decimal unless FmtSharp.
+         *    decimal point only if digit follows.
+         */
+
+        /* fall through to %e */
+    default:
+    case 'e':
+        /* one significant digit before decimal, no leading zeros. */
+        pt = 1;
+        z1 = 0;
+
+        /*
+         * decimal point is after ndigits digits right now.
+         * slide to be after first.
+         */
+        e  = exp + (ndigits-1);
+
+        /* if this is %g, check exponent and convert prec */
+        if(realverb == 'g') {
+            if(-4 <= e && e < prec)
+                goto casef;
+            prec--;    /* one digit before decimal; rest after */
+        }
+
+        /* compute trailing zero padding or truncate digits. */
+        if(1+prec >= ndigits)
+            z2 = 1+prec - ndigits;
+        else {
+            /* truncate digits */
+            assert(realverb != 'g');
+            newndigits = 1+prec;
+            if(digits[newndigits] >= '5' && add1(digits, newndigits)) {
+                /* had 999e4, now have 100e5 */
+                e++;
+            }
+            ndigits = newndigits;
+            z2 = 0;
+        }
+        fmtexp(suf, e, ucase);
+        nsuf = strlen(suf);
+        break;
+
+    casef:
+    case 'f':
+        /* determine where digits go with respect to decimal point */
+        if(ndigits+exp > 0) {
+            pt = ndigits+exp;
+            z1 = 0;
+        } else {
+            pt = 1;
+            z1 = 1 + -(ndigits+exp);
+        }
+
+        /*
+         * %g specifies prec = number of significant digits
+         * convert to number of digits after decimal point
+         */
+        if(realverb == 'g')
+            prec += z1 - pt;
+
+        /* compute trailing zero padding or truncate digits. */
+        if(pt+prec >= z1+ndigits)
+            z2 = pt+prec - (z1+ndigits);
+        else{
+            /* truncate digits */
+            assert(realverb != 'g');
+            newndigits = pt+prec - z1;
+            if(newndigits < 0){
+                z1 += newndigits;
+                newndigits = 0;
+            }else if(newndigits == 0){
+                /* perhaps round up */
+                if(digits[0] >= '5'){
+                    digits[0] = '1';
+                    newndigits = 1;
+                    goto newdigit;
+                }
+            }else if(digits[newndigits] >= '5' && add1(digits, newndigits)){
+                /* digits was 999, is now 100; make it 1000 */
+                digits[newndigits++] = '0';
+            newdigit:
+                /* account for new digit */
+                if(z1)   /* 0.099 => 0.100 or 0.99 => 1.00*/
+                    z1--;
+                else    /* 9.99 => 10.00 */
+                    pt++;
+            }
+            z2 = 0;
+            ndigits = newndigits;
+        }
+        nsuf = 0;
+        break;
+    }
+
+    /*
+     * if %g is given without FmtSharp, remove trailing zeros.
+     * must do after truncation, so that e.g. print %.3g 1.001
+     * produces 1, not 1.00.  sorry, but them's the rules.
+     */
+    if(realverb == 'g' && !(f & fmt·Sharp)) {
+        if(z1+ndigits+z2 >= pt) {
+            if(z1+ndigits < pt)
+                z2 = pt - (z1+ndigits);
+            else{
+                z2 = 0;
+                while(z1+ndigits > pt && digits[ndigits-1] == '0')
+                    ndigits--;
+            }
+        }
+    }
+
+    /*
+     * compute width of all digits and decimal point and suffix if any
+     */
+    n = z1+ndigits+z2;
+    if(n > pt)
+        n += ndot;
+    else if(n == pt){
+        if(f & fmt·Sharp)
+            n += ndot;
+        else
+            pt++;    /* do not print any decimal point */
+    }
+    n += nsuf;
+
+    /*
+     * determine sign
+     */
+    sign = 0;
+    if(neg)
+        sign = '-';
+    else if(f & fmt·Sign)
+        sign = '+';
+    else if(f & fmt·Space)
+        sign = ' ';
+    if(sign)
+        n++;
+
+    /* compute padding */
+    npad = 0;
+    if((f & fmt·Width) && io->width > n)
+        npad = io->width - n;
+    if(npad && !(f & fmt·Left) && (f & fmt·Zero)){
+        z1 += npad;
+        pt += npad;
+        npad = 0;
+    }
+
+    /* format the actual field.  too bad about doing this twice. */
+    if(npad && !(f & fmt·Left) && pad(io, npad < 0))
+        return -1;
+
+    cur = io->buffer.cur;
+    end = io->buffer.end;
+
+    if(sign){
+        if(cur+1 > end){
+            if(!(cur=flush(io,cur,1)))
+                return -1;
+            end = io->buffer.end;
+        }
+        *cur++ = sign;
+    }
+
+    while(z1>0 || ndigits>0 || z2>0){
+        if(z1 > 0){
+            z1--;
+            c = '0';
+        }else if(ndigits > 0){
+            ndigits--;
+            c = *digits++;
+        }else{
+            z2--;
+            c = '0';
+        }
+
+        if(cur+1 > end){
+            if(!(cur=flush(io,cur,1)))
+                return -1;
+            end = io->buffer.end;
+        }
+        *cur++ = c;
+
+        if(--pt == 0)
+            for(p=dot; *p; p++){
+                if(cur+1 > end){
+                    if(!(cur=flush(io,cur,1)))
+                        return -1;
+                    end = io->buffer.end;
+                }
+                *cur++ = *p;
+            }
+    }
+    io->n += cur - (char*)io->buffer.cur;
+    io->buffer.cur = cur;
+    if(nsuf && copy(io, suf, nsuf, nsuf) < 0)
+        return -1;
+    if(npad && (f & fmt·Left) && pad(io, npad < 0))
+        return -1;
+
+    return 0;
+}
diff --git a/src/base/fmt/fprint.c b/src/base/fmt/fprint.c
new file mode 100644
index 0000000..5077359
--- /dev/null
+++ b/src/base/fmt/fprint.c
@@ -0,0 +1,14 @@
+#include "internal.h"
+
+int
+fmt·fprint(int fd, char *fmt, ...)
+{
+    int n;
+    va_list args;
+
+    va_start(args, fmt);
+    n = fmt·vfprint(fd, fmt, args);
+    va_end(args);
+
+    return n;
+}
diff --git a/src/base/fmt/internal.h b/src/base/fmt/internal.h
new file mode 100644
index 0000000..7bf47af
--- /dev/null
+++ b/src/base/fmt/internal.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <u.h>
+#include <base.h>
+
+typedef int (*Formatter)(fmt·State *io);
+typedef struct Verb Verb;
+
+struct Verb
+{
+    int c;
+    Formatter fmt;
+};
+
+void fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups);
diff --git a/src/base/fmt/locale.c b/src/base/fmt/locale.c
new file mode 100644
index 0000000..437c61e
--- /dev/null
+++ b/src/base/fmt/locale.c
@@ -0,0 +1,16 @@
+#include "internal.h"
+
+void
+fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups)
+{
+    if(decimal == nil || decimal[0] == '\0')
+        decimal = ".";
+    if(thousands == nil)
+        thousands = ",";
+    if(groups == nil)
+        groups = "\3";
+
+    io->groups    = groups;
+    io->decimal   = decimal;
+    io->thousands = thousands;
+}
diff --git a/src/base/fmt/nsprint.c b/src/base/fmt/nsprint.c
new file mode 100644
index 0000000..90489e0
--- /dev/null
+++ b/src/base/fmt/nsprint.c
@@ -0,0 +1,14 @@
+#include "internal.h"
+
+int
+fmt·nsprint(int len, char *buf, char *fmt, ...)
+{
+    int n;
+    va_list args;
+
+    va_start(args, fmt);
+    n = fmt·vnsprint(len, buf, fmt, args);
+    va_end(args);
+
+    return n;
+}
diff --git a/src/base/fmt/open.c b/src/base/fmt/open.c
new file mode 100644
index 0000000..8aadef5
--- /dev/null
+++ b/src/base/fmt/open.c
@@ -0,0 +1,34 @@
+#include "internal.h"
+
+static int
+flush(fmt·State *io)
+{
+    int n, fd;
+
+    fd = (uintptr)io->file;
+    n  = io->buffer.cur - io->buffer.beg;
+    if(n && write(fd, io->buffer.beg, n) != n)
+        return -1;
+
+    io->buffer.cur  = io->buffer.beg;
+    return io->n;
+}
+
+int
+fmt·open(int fd, int len, char *buf, fmt·State *io)
+{
+    io->buffer.beg = buf;
+    io->buffer.cur = buf;
+    io->buffer.end = buf+len;
+    io->flush = flush;
+    io->file  = (void*)(uintptr)fd;
+    io->flag  = 0;
+    io->n     = 0;
+    /* no heap needed */
+    io->heap = nil;
+    io->mem  = (mem·Reallocator){ 0 };
+
+    fmt·setlocale(io, nil, nil, nil);
+
+    return 0;
+}
diff --git a/src/base/fmt/panic.c b/src/base/fmt/panic.c
new file mode 100644
index 0000000..25ee277
--- /dev/null
+++ b/src/base/fmt/panic.c
@@ -0,0 +1,15 @@
+#include "internal.h"
+
+void
+fmt·panic(char *fmt, ...)
+{
+    char buf[256];
+    va_list arg;
+
+    va_start(arg, fmt);
+    fmt·vesprint(buf, arrend(buf), fmt, arg);
+    va_end(arg);
+
+    fmt·fprint(2, "%s: %s\n", argv0 ? argv0 : "<prog>", buf);
+    exits("fatal");
+}
diff --git a/src/base/fmt/print.c b/src/base/fmt/print.c
new file mode 100644
index 0000000..20b8e00
--- /dev/null
+++ b/src/base/fmt/print.c
@@ -0,0 +1,13 @@
+#include "internal.h"
+
+int
+fmt·print(char *fmt, ...)
+{
+    int n;
+    va_list args;
+
+    va_start(args, fmt);
+    n = fmt·vfprint(1, fmt, args);
+    va_end(args);
+    return n;
+}
diff --git a/src/base/fmt/rules.mk b/src/base/fmt/rules.mk
new file mode 100644
index 0000000..fdfdac0
--- /dev/null
+++ b/src/base/fmt/rules.mk
@@ -0,0 +1,21 @@
+# Local sources
+SRCS_$(d)+=\
+	$(d)/fmt/buffer.c\
+	$(d)/fmt/do.c\
+	$(d)/fmt/esprint.c\
+	$(d)/fmt/fprint.c\
+	$(d)/fmt/locale.c\
+	$(d)/fmt/nsprint.c\
+	$(d)/fmt/open.c\
+	$(d)/fmt/print.c\
+	$(d)/fmt/sprint.c\
+	$(d)/fmt/vesprint.c\
+	$(d)/fmt/vfprint.c\
+	$(d)/fmt/vnsprint.c\
+	$(d)/fmt/vprint.c\
+	$(d)/fmt/vwrite.c\
+	$(d)/fmt/panic.c\
+	$(d)/fmt/write.c
+
+CHECK_$(d)+=\
+	$(d)/fmt/test.c
diff --git a/src/base/fmt/sprint.c b/src/base/fmt/sprint.c
new file mode 100644
index 0000000..f1be6dd
--- /dev/null
+++ b/src/base/fmt/sprint.c
@@ -0,0 +1,19 @@
+#include "internal.h"
+
+int
+fmt·sprint(char *buf, char *fmt, ...)
+{
+    int n;
+    uint len;
+    va_list args;
+
+    len = 1 << 30;
+    if(buf+len < buf)
+        len = -(uintptr)buf-1;
+
+    va_start(args, fmt);
+    n = fmt·vnsprint(len, buf, fmt, args);
+    va_end(args);
+
+    return n;
+}
diff --git a/src/base/fmt/test.c b/src/base/fmt/test.c
new file mode 100644
index 0000000..d81a62e
--- /dev/null
+++ b/src/base/fmt/test.c
@@ -0,0 +1,72 @@
+#include <u.h>
+#include <base.h>
+#include <libutf.h>
+#include <libfmt.h>
+
+typedef struct Complex
+{
+    double r, i;
+} Complex;
+
+int
+Xfmt(fmt·State *io)
+{
+    Complex c;
+    c = va_arg(io->args, Complex);
+
+    return fmt·write(io, "(real=%g,imag=%g)", c.r, c.i);
+}
+
+int
+main(int argc, char *argv[])
+{
+    fmt·print("basic tests\n");
+    fmt·print("\tx: %x\n", 0x87654321);
+    fmt·print("\tu: %u\n", 0x87654321);
+    fmt·print("\td: %d\n", 0x87654321);
+    fmt·print("\ts: %s\n", "hi there");
+    fmt·print("\tc: %c\n", '!');
+    fmt·print("\tg: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10);
+    fmt·print("\te: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10);
+    fmt·print("\tf: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10);
+    fmt·print("\tsmiley: %C\n", (rune)0x263a);
+    fmt·print("\t%g %.18g\n", 2e25, 2e25);
+    fmt·print("\t%2.18g\n", 1.0);
+    fmt·print("\t%2.18f\n", 1.0);
+    fmt·print("\t%f\n", 3.1415927/4);
+    fmt·print("\t%d\n", 23);
+    fmt·print("\t%i\n", 23);
+    fmt·print("\t%0.10d\n", 12345);
+
+    fmt·print("%%4%%d tests\n");
+    fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222);
+    fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222);
+    fmt·print("\t%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20);
+    fmt·print("\t%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20);
+    fmt·print("\t%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20);
+
+    /* test %'d formats */
+    fmt·print("%%'%%d tests\n");
+    fmt·print("\t%'d %'d %'d\n", 1, 2222, 33333333);
+    fmt·print("\t%'019d\n", 0);
+    fmt·print("\t%08d %08d %08d\n", 1, 2222, 33333333);
+    fmt·print("\t%'08d %'08d %'08d\n", 1, 2222, 33333333);
+    fmt·print("\t%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345);
+    fmt·print("\t%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL);
+    fmt·print("\t%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL);
+    fmt·print("\t%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL);
+    fmt·print("\t%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL);
+    fmt·print("\t%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL);
+
+    /* test precision */
+    fmt·print("precision tests\n");
+    fmt·print("%020.10d\n", 100);
+
+    /* test install */
+    fmt·install('X', Xfmt);
+    Complex c = { 1.5, -2.3 };
+    fmt·print("x = %X\n", c);
+
+    return 0;
+
+}
diff --git a/src/base/fmt/vesprint.c b/src/base/fmt/vesprint.c
new file mode 100644
index 0000000..18f4dd2
--- /dev/null
+++ b/src/base/fmt/vesprint.c
@@ -0,0 +1,26 @@
+#include "internal.h"
+
+char*
+fmt·vesprint(char *buf, char *end, char *fmt, va_list args)
+{
+    fmt·State io;
+
+    if(end <= buf)
+        return nil;
+
+    io.n = 0;
+    io.buffer.beg = io.buffer.cur = buf;
+    io.buffer.end = end-1;
+    io.flush = nil;
+    io.file  = nil;
+
+    va_copy(io.args, args);
+
+    fmt·setlocale(&io, nil, nil, nil);
+    fmt·do(&io, fmt);
+
+    va_end(io.args);
+
+    *(io.buffer.cur) = 0;
+    return io.buffer.cur;
+}
diff --git a/src/base/fmt/vfprint.c b/src/base/fmt/vfprint.c
new file mode 100644
index 0000000..4306ea7
--- /dev/null
+++ b/src/base/fmt/vfprint.c
@@ -0,0 +1,19 @@
+#include "internal.h"
+
+int
+fmt·vfprint(int fd, char *fmt, va_list args)
+{
+    int  n;
+    fmt·State io;
+    char buf[256];
+
+    fmt·open(fd, sizeof(buf), buf, &io);
+
+    va_copy(io.args, args);
+    n = fmt·do(&io, fmt);
+    va_end(io.args);
+
+    if(n > 0 && io.flush(&io) < 0)
+        return -1;
+    return n;
+}
diff --git a/src/base/fmt/vnsprint.c b/src/base/fmt/vnsprint.c
new file mode 100644
index 0000000..7ded908
--- /dev/null
+++ b/src/base/fmt/vnsprint.c
@@ -0,0 +1,26 @@
+#include "internal.h"
+
+int
+fmt·vnsprint(int len, char *buf, char *fmt, va_list args)
+{
+    fmt·State io;
+
+    if(len <= 0)
+        return -1;
+
+    io.n = 0;
+    io.buffer.beg = io.buffer.cur = buf;
+    io.buffer.end = buf+len-1;
+    io.flush = nil;
+    io.file  = nil;
+
+    va_copy(io.args, args);
+
+    fmt·setlocale(&io, nil, nil, nil);
+    fmt·do(&io, fmt);
+
+    va_end(io.args);
+
+    *(io.buffer.cur) = 0;
+    return io.buffer.cur - io.buffer.beg;
+}
diff --git a/src/base/fmt/vprint.c b/src/base/fmt/vprint.c
new file mode 100644
index 0000000..bb3076b
--- /dev/null
+++ b/src/base/fmt/vprint.c
@@ -0,0 +1,19 @@
+#include "internal.h"
+
+int
+fmt·vprint(char *fmt, va_list args)
+{
+    fmt·State io;
+    int  n;
+    char buf[256];
+
+    fmt·open(1, sizeof(buf), buf, &io);
+
+    va_copy(io.args, args);
+    n = fmt·do(&io, fmt);
+    va_end(io.args);
+
+    if(n > 0 && io.flush(&io) < 0)
+        return -1;
+    return n;
+}
diff --git a/src/base/fmt/vwrite.c b/src/base/fmt/vwrite.c
new file mode 100644
index 0000000..cacdef2
--- /dev/null
+++ b/src/base/fmt/vwrite.c
@@ -0,0 +1,26 @@
+#include "internal.h"
+
+int
+fmt·vwrite(fmt·State *io, char *fmt, va_list args)
+{
+    int n;
+    va_list tmp;
+
+    io->flag = io->width = io->prec = 0;
+
+    va_copy(tmp, io->args);
+    va_end(io->args);
+
+    va_copy(io->args,args);
+    n = fmt·do(io, fmt);
+    va_end(io->args);
+
+    va_copy(io->args, tmp);
+    va_end(tmp);
+
+    io->flag = io->width = io->prec = 0;
+
+    if(n >= 0)
+        return 0;
+    return n;
+}
diff --git a/src/base/fmt/write.c b/src/base/fmt/write.c
new file mode 100644
index 0000000..9a77223
--- /dev/null
+++ b/src/base/fmt/write.c
@@ -0,0 +1,22 @@
+#include "internal.h"
+
+int
+fmt·write(fmt·State *io, char *fmt, ...)
+{
+    int n;
+    va_list args;
+
+    io->flag = io->width = io->prec = 0;
+
+    va_copy(args, io->args);
+    va_end(io->args);
+
+    va_start(io->args, fmt);
+    n = fmt·do(io, fmt);
+    va_end(io->args);
+
+    io->flag = io->width = io->prec = 0;
+    if(n >= 0)
+        return 0;
+    return n;
+}
diff --git a/src/base/io/close.c b/src/base/io/close.c
new file mode 100644
index 0000000..5a773cd
--- /dev/null
+++ b/src/base/io/close.c
@@ -0,0 +1,7 @@
+#include "internal.h"
+
+int
+io·close(io·Stream *s)
+{
+    return fclose(s);
+}
diff --git a/src/base/io/open.c b/src/base/io/open.c
index 71e88d4..fe78255 100644
--- a/src/base/io/open.c
+++ b/src/base/io/open.c
@@ -5,9 +5,3 @@ io·open(byte *name, byte *mode)
 {
     return fopen(name, mode);
 }
-
-int
-io·close(io·Stream *s)
-{
-    return fclose(s);
-}
diff --git a/src/base/io/rules.mk b/src/base/io/rules.mk
index 2e03ca5..124cd09 100644
--- a/src/base/io/rules.mk
+++ b/src/base/io/rules.mk
@@ -3,6 +3,7 @@ SRCS_$(d)+=\
 	$(d)/io/flush.c\
 	$(d)/io/interface.c\
 	$(d)/io/open.c\
+	$(d)/io/close.c\
 	$(d)/io/putbyte.c\
 	$(d)/io/putstring.c\
 	$(d)/io/read.c\
@@ -11,4 +12,4 @@ SRCS_$(d)+=\
 	$(d)/io/stat.c\
 	$(d)/io/tell.c\
 	$(d)/io/unget.c\
-	$(d)/io/write.c\
+	$(d)/io/write.c
diff --git a/src/base/rules.mk b/src/base/rules.mk
index 9f25d37..0a262c7 100644
--- a/src/base/rules.mk
+++ b/src/base/rules.mk
@@ -5,10 +5,15 @@ include share/push.mk
 # local sources
 SRCS_$(d):=\
 	$(d)/arg.c
+
+CHECK_$(d):=\
+	$(d)/test.c
+
 include $(d)/bufio/rules.mk
 include $(d)/coro/rules.mk
 include $(d)/error/rules.mk
 include $(d)/flate/rules.mk
+include $(d)/fmt/rules.mk
 include $(d)/fs/rules.mk
 include $(d)/gz/rules.mk
 include $(d)/io/rules.mk
@@ -18,12 +23,10 @@ include $(d)/os/rules.mk
 include $(d)/rng/rules.mk
 include $(d)/sort/rules.mk
 include $(d)/string/rules.mk
-CHECK_$(d):=\
-	$(d)/test.c
+include $(d)/utf/rules.mk
 
 # outputs
 LIBS_$(d) := $(d)/base.a
-BINS_$(d) :=
 
 include share/paths.mk
 
diff --git a/src/base/utf/canfit.c b/src/base/utf/canfit.c
new file mode 100644
index 0000000..4579ab3
--- /dev/null
+++ b/src/base/utf/canfit.c
@@ -0,0 +1,23 @@
+#include "internal.h"
+
+/* returns 1 if string of length n is long enough to be decoded */
+int
+utf8·canfit(byte* s, int n)
+{
+    int  i;
+    rune c;
+
+    if(n <= 0)
+        return 0;
+
+    c = *(ubyte*)s;
+    if(c < TByte1)
+        return 1;
+
+    if(c < TByte3)
+        return n >= 2;
+    if(c < TByte4)
+        return n >= 3;
+
+    return n >= UTFmax;
+}
diff --git a/src/base/utf/decode.c b/src/base/utf/decode.c
new file mode 100644
index 0000000..01797f1
--- /dev/null
+++ b/src/base/utf/decode.c
@@ -0,0 +1,98 @@
+#include "internal.h"
+
+#define ACCEPT 0
+#define REJECT 12
+
+static uint8 decode[] = {
+    /*
+     * the first part of the table maps bytes to character classes that
+     * to reduce the size of the transition table and create bitmasks
+     */
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+     8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+    /*
+     * the second part is a transition table that maps a combination
+     * of a state of the automaton and a character class to a state
+     */
+     0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+    12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+    12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+    12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+    12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+int
+utf8·decode(char *s, rune *r)
+{
+    int   n;
+    rune  v;
+    uint8 b, t, x=ACCEPT;
+
+    b = ((uint8 *)s)[0];
+    t = decode[b];
+    v = (0xFF >> t) & b;
+    x = decode[256+x+t];
+
+    for(n=1; x > REJECT && n < UTFmax; n++){
+        b = ((uint8 *)s)[n];
+        t = decode[b];
+        v = (v << 6) | (b & TMask);
+        x = decode[256+x+t];
+    }
+
+    if(x != ACCEPT){
+        *r = RuneErr;
+        return 1;
+    }
+
+    *r = v;
+    return n;
+}
+
+#if 0
+int
+utf8·decode(byte *s, rune *r)
+{
+    int c[UTFmax], i;
+    rune l;
+
+    c[0] = *(ubyte*)(s);
+    if(c[0] < Tx){
+        *r = c[0];
+        return 1;
+    }
+
+    l = c[0];
+    for(i = 1; i < UTFmax; i++){
+        c[i]  = *(ubyte*)(s+i);
+        c[i] ^= Tx;
+        if(c[i] & Testx) goto bad;
+
+        l = (l << Bitx) | c[i];
+        if(c[0] < Tbyte(i + 2)){
+            l &= RuneX(i + 1);
+            if(i == 1){
+                if(c[0] < Tbyte(2) || l <= Rune1)
+                    goto bad;
+            }else if(l <= RuneX(i) || l > RuneMax)
+                goto bad;
+
+            if(i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+                goto bad;
+
+            *r = l;
+            return i + 1;
+        }
+    }
+bad:
+    *r = RuneErr;
+    return 1;
+}
+#endif
diff --git a/src/base/utf/decodeprev.c b/src/base/utf/decodeprev.c
new file mode 100644
index 0000000..27dced6
--- /dev/null
+++ b/src/base/utf/decodeprev.c
@@ -0,0 +1,60 @@
+#include "internal.h"
+
+#define ACCEPT 0
+#define REJECT 12
+
+static uint8 decode[] = {
+    /*
+     * the first part of the table maps bytes to character classes that
+     * to reduce the size of the transition table and create bitmasks.
+     */
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+    /*
+     * The second part is a transition table that maps a combination
+     * of a state of the automaton and a character class to a state.
+     */
+    //   0  1  2  3  4  5  6  7  8  9 10 11
+         0,24,12,12,12,12,12,24,12,24,12,12,
+         0,24,12,12,12,12,12,24,12,24,12,12,
+        12,36, 0,12,12,12,12,48,12,36,12,12,
+        12,60,12, 0, 0,12,12,72,12,72,12,12,
+        12,60,12, 0,12,12,12,72,12,72, 0,12,
+        12,12,12,12,12, 0, 0,12,12,12,12,12,
+        12,12,12,12,12,12,12,12,12,12,12, 0
+};
+
+int
+utf8·decodeprev(byte *s, rune *r)
+{
+    int   n;
+    rune  v;
+    uint8 b, t, d, x=ACCEPT;
+
+    v=0, n=0, d=0;
+nextbyte:
+    b = ((uint8 *)s)[-n++];
+    t = decode[b];
+    x = decode[256+x+t];
+
+    if(x > REJECT && n < UTFmax){
+        v = v | ((b & TMask) << d);
+        d += 6;
+        goto nextbyte;
+    }
+
+    if(x != ACCEPT)
+        *r = RuneErr;
+    else{
+        v |= (((0xFFu >> t) & b) << d);
+        *r = v;
+    }
+
+    return n;
+}
diff --git a/src/base/utf/encode.c b/src/base/utf/encode.c
new file mode 100644
index 0000000..fa7c93e
--- /dev/null
+++ b/src/base/utf/encode.c
@@ -0,0 +1,69 @@
+#include "internal.h"
+
+int
+utf8·encode(rune *r, byte *s)
+{
+    rune c;
+
+    c = *r;
+    if(c < Rune1Byte){ // 7 bits
+        s[0] = (uint8)c;
+        return 1;
+    }
+
+    if(c < Rune2Byte){ // 11 bits
+        s[0] = TByte1 | (c >> 6);
+        s[1] = Tx     | (c & TMask);
+        return 2;
+    }
+
+    if(c < Rune3Byte){ // 16 bits
+        s[0] = TByte2 | ((c >> 12));
+        s[1] = Tx     | ((c >> 6) & TMask);
+        s[2] = Tx     | ((c)      & TMask);
+        return 3;
+    }
+
+    // 22 bits
+    if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax))
+        c = RuneErr;
+
+    s[0] = TByte3 | ((c >> 18));
+    s[1] = Tx     | ((c >> 12) & TMask);
+    s[2] = Tx     | ((c >> 6)  & TMask);
+    s[3] = Tx     | ((c)       & TMask);
+
+    return 4;
+}
+
+#if 0
+int
+utf8·encode(rune* r, byte* s)
+{
+    int i, j;
+    rune c;
+
+    c = *r;
+    if(c <= Rune1) {
+        s[0] = c;
+        return 1;
+    }
+
+    for(i = 2; i < UTFmax + 1; i++){
+        if(i == 3){
+            if(c > RuneMax)
+                c = RuneErr;
+            if(SurrogateMin <= c && c <= SurrogateMax)
+                c = RuneErr;
+        }
+        if(c <= RuneX(i) || i == UTFmax) {
+            s[0] = Tbyte(i) |  (c >> (i - 1)*Bitx);
+            for(j = 1; j < i; j++)
+                s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
+            return i;
+        }
+    }
+
+    return UTFmax;
+}
+#endif
diff --git a/src/base/utf/find.c b/src/base/utf/find.c
new file mode 100644
index 0000000..d75feb8
--- /dev/null
+++ b/src/base/utf/find.c
@@ -0,0 +1,31 @@
+#include "internal.h"
+
+byte*
+utf8·find(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    int  n;
+
+    if(c < Tx)
+        return strchr(s, c);
+
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < Tx){
+            if(c1 == 0) return nil;
+            if(c1 == c) return s;
+            s++;
+            continue;
+        }
+
+        n = utf8·decode(s, &r);
+
+        if(r == c)
+            return s;
+
+        s += n;
+    }
+
+    return nil;
+}
diff --git a/src/base/utf/findlast.c b/src/base/utf/findlast.c
new file mode 100644
index 0000000..ab25ab2
--- /dev/null
+++ b/src/base/utf/findlast.c
@@ -0,0 +1,32 @@
+#include "internal.h"
+
+byte*
+utf8·findlast(byte* s, rune c)
+{
+    long c1;
+    rune r;
+    byte *l;
+
+    if(c < Tx)
+        return strrchr(s, c);
+
+    l = nil;
+    for(;;){
+        c1 = *(ubyte*)s;
+        if(c1 < Tx){
+            if(c1 == 0) return l;
+            if(c1 == c) l = s;
+            s++;
+            continue;
+        }
+
+        c1 = utf8·decode(s, &r);
+
+        if(r == c)
+            l = s;
+
+        s += c1;
+    }
+
+    return nil;
+}
diff --git a/src/base/utf/internal.h b/src/base/utf/internal.h
new file mode 100644
index 0000000..49945dd
--- /dev/null
+++ b/src/base/utf/internal.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <u.h>
+#include <base.h>
+
+/*
+ * NOTE: we use the preprocessor to ensure we have unsigned constants.
+ * UTF-8 code:
+ * 1 byte:
+ * 0xxxxxxx
+ * 2 byte:
+ * 110xxxxx 10xxxxxx
+ * 3 byte:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 4 byte:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ */
+
+#define Tx     0x80u // 0b10000000 transfer header
+#define TMask  0x3Fu // 0b00111111 transfer mask
+
+#define TByte1 0xC0u // 0b11000000
+#define TByte2 0xE0u // 0b11100000
+#define TByte3 0xF0u // 0b11110000
+#define TByte4 0xF8u // 0b11111000
+
+#define RuneMask  0x1FFFFFu
+
+#define Rune1Byte 0x000080u  // 1 << 8   (1 byte)
+#define Rune2Byte 0x001000u  // 1 << 12  (2 bytes)
+#define Rune3Byte 0x020000u  // 1 << 17  (3 bytes)
+#define Rune4Byte 0x400000u  // 1 << 22  (4 bytes)
+
+
+/* UTF-16 nonsense */
+#define RuneSurrogateMin 0x0D8000
+#define RuneSurrogateMax 0x0D8FFF
diff --git a/src/base/utf/len.c b/src/base/utf/len.c
new file mode 100644
index 0000000..8fbd679
--- /dev/null
+++ b/src/base/utf/len.c
@@ -0,0 +1,21 @@
+#include "internal.h"
+
+int
+utf8·len(char *s)
+{
+    int c;
+    long n;
+    rune r;
+
+    n = 0;
+    for(;;){
+        c = *(uchar*)s;
+        if(c < Tx){
+            if(c == 0)
+                return n;
+            s++;
+        }else
+            s += utf8·decode(s, &r);
+        n++;
+    }
+}
diff --git a/src/base/utf/rules.mk b/src/base/utf/rules.mk
new file mode 100644
index 0000000..446c113
--- /dev/null
+++ b/src/base/utf/rules.mk
@@ -0,0 +1,71 @@
+UNICODE=14.0.0
+
+SRCS_$(d)+=\
+	$(d)/utf/encode.c\
+	$(d)/utf/decode.c\
+	$(d)/utf/decodeprev.c\
+	$(d)/utf/find.c\
+	$(d)/utf/findlast.c\
+	$(d)/utf/canfit.c\
+	$(d)/utf/runelen.c\
+	$(d)/utf/len.c\
+	$(d)/utf/runetype-$(UNICODE).c\
+	$(d)/utf/runewidth-$(UNICODE).c
+
+# ========================================================================
+# table generation
+
+# NOTE: this is pretty hacky...
+NEED_OBJS=\
+	$(OBJ_DIR)/base/arg.o\
+	$(OBJ_DIR)/base/utf/decode.o\
+	$(OBJ_DIR)/base/error/panicf.o\
+	$(OBJ_DIR)/base/io/readln.o\
+	$(OBJ_DIR)/base/io/open.o\
+	$(OBJ_DIR)/base/io/close.o
+
+$(d)/utf/vendor/common.o: $(d)/utf/vendor/common.c
+	$(COMPILE)
+
+# rune categories
+$(d)/utf/vendor/UnicodeData-$(UNICODE).txt:
+	@echo "GET	UnicodeData.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@
+
+$(d)/utf/vendor/mkrunetype: $(d)/utf/vendor/mkrunetype.c $(d)/utf/vendor/common.o $(NEED_OBJS)
+	$(COMPLINK)
+
+GENS += $(d)/utf/vendor/mkrunetype
+
+$(d)/utf/runetype-$(UNICODE).c: $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/mkrunetype
+	@$(dir $@)vendor/mkrunetype $< > $@
+
+# rune widths
+$(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt:
+	@echo "GET	EastAsianWidth.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@
+
+$(d)/utf/vendor/EmojiData-$(UNICODE).txt:
+	@echo "GET	EmojiData.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@
+
+$(d)/utf/vendor/mkrunewidth: $(d)/utf/vendor/mkrunewidth.c $(d)/utf/vendor/common.o $(NEED_OBJS)
+	$(COMPLINK)
+
+GENS += $(d)/utf/vendor/mkrunewidth
+
+$(d)/utf/runewidth-$(UNICODE).c: $(d)/utf/vendor/mkrunewidth $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt $(d)/utf/vendor/EmojiData-$(UNICODE).txt
+	@$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@
+
+# grapheme boundaries
+$(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt:
+	@echo "GET	GraphemeBreakProperty.txt";\
+	curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@
+
+$(d)/utf/vendor/mkgraphemedata: $(d)/utf/vendor/mkgraphemedata.c $(d)/utf/vendor/common.o $(NEED_OBJS)
+	$(COMPLINK)
+
+$(d)/utf/graphemedata-$(UNICODE).c: $(d)/utf/vendor/mkgraphemedata $(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt
+	$^ > $@
+
+GENS += $(d)/utf/vendor/mkgraphemedata
diff --git a/src/base/utf/runelen.c b/src/base/utf/runelen.c
new file mode 100644
index 0000000..dac7f15
--- /dev/null
+++ b/src/base/utf/runelen.c
@@ -0,0 +1,8 @@
+#include "internal.h"
+
+int
+utf8·runelen(rune r)
+{
+    byte s[10];
+    return utf8·encode(&r, s);
+}
diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c
new file mode 100644
index 0000000..fcf1177
--- /dev/null
+++ b/src/base/utf/vendor/common.c
@@ -0,0 +1,220 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// input functions
+
+int
+parse(io·Stream *io, int nfield, char **field, int len, char *line)
+{
+    int n;
+    if((n=io·readln(io, len, line)) <= 0)
+        return ParseEOF;
+
+    if(n == len)
+        panicf("line too long");
+
+    if(line[n-1] != '\n')
+        panicf("invalid line: expected '\n', found '%c'", line[n]);
+
+    line[n-1] = 0;
+
+    if(line[0] == '#' || line[0] == 0)
+        return ParseSkip;
+
+    /* tokenize line into fields */
+    n = 0;
+    field[n] = line;
+    while(*line){
+        if(*line == ';'){
+            *line = 0;
+            field[++n] = line+1;
+        }
+        line++;
+    }
+
+    if(n != nfield-1)
+        panicf("expected %d number of fields, got %d: %s", nfield, n, line);
+
+    return ParseOK;
+}
+
+int
+codepoint(char *s)
+{
+    int c, b;
+
+    c = 0;
+    while((b=*s++)){
+        c <<= 4;
+        if(b >= '0' && b <= '9')
+            c += b - '0';
+        else if(b >= 'A' && b <= 'F')
+            c += b - 'A' + 10;
+        else
+            panicf("bad codepoint char '%c'", b);
+    }
+
+    return c;
+}
+
+void
+codepointrange(io·Stream *utf8, char *field[NumFields], int *start,  int *stop)
+{
+    int e, c;
+    char *other[NumFields], line[1024];
+
+    // XXX: the stop variable passes in the previous stopping character
+    e = *stop;
+    c = codepoint(field[Fcode]);
+
+    if(c >= NumRunes)
+        panicf("unexpected large codepoint %x", c);
+    if(c <= e)
+        panicf("bad code sequence: %x then %x", e, c);
+    e = c;
+
+    if(strstr(field[Fname], ", First>") != nil){
+        if(!parse(utf8, arrlen(other), other, arrlen(line), line))
+            panicf("range start at end of file");
+        if(strstr(other[Fname], ", Last>") == nil)
+            panicf("range start not followed by range end");
+
+        e = codepoint(other[Fcode]);
+
+        if(e <= c)
+            panicf("bad code sequence: %x then %x", c, e);
+        if(strcmp(field[Fcategory], other[Fcategory]) != 0)
+            panicf("range with mismatched category");
+    }
+
+    *start = c;
+    *stop  = e;
+}
+
+// -----------------------------------------------------------------------
+// output functions
+
+void
+putsearch(void)
+{
+    puts(
+        "#include <u.h>\n"
+        "#include <base/utf.h>\n"
+        "\n"
+        "static\n"
+        "rune*\n"
+        "rangesearch(rune c, rune *t, int n, int ne)\n"
+        "{\n"
+        "   rune *p;\n"
+        "   int m;\n"
+        "   while(n > 1) {\n"
+        "   m = n >> 1;\n"
+        "   p = t + m*ne;\n"
+        "   if(c >= p[0]){\n"
+        "       t = p;\n"
+        "       n = n-m;\n"
+        "   }else\n"
+        "       n = m;\n"
+        "   }\n"
+        "   if(n && c >= t[0])\n"
+        "       return t;\n"
+        "   return 0;\n"
+        "}\n"
+    );
+
+}
+
+int
+putrange(char *ident, char *prop, int force)
+{
+    int l, r, start;
+
+    start = 0;
+    for(l = 0; l < NumRunes;) {
+        if(!prop[l]){
+            l++;
+            continue;
+        }
+
+        for(r = l+1; r < NumRunes; r++){
+            if(!prop[r])
+                break;
+            prop[r] = 0;
+        }
+
+        if(force || r > l + 1){
+            if(!start){
+                printf("static rune %s[] = {\n", ident);
+                start = 1;
+            }
+            prop[l] = 0;
+            printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
+        }
+
+        l = r;
+    }
+
+    if(start)
+        printf("};\n\n");
+
+    return start;
+}
+
+int
+putpair(char *ident, char *prop)
+{
+    int l, r, start;
+
+    start = 0;
+    for(l=0; l+2 < NumRunes; ){
+        if(!prop[l]){
+            l++;
+            continue;
+        }
+
+        for(r = l + 2; r < NumRunes; r += 2){
+            if(!prop[r])
+                break;
+            prop[r] = 0;
+        }
+
+        if(r != l + 2){
+            if(!start){
+                printf("static rune %s[] = {\n", ident);
+                start = 1;
+            }
+            prop[l] = 0;
+            printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
+        }
+
+        l = r;
+    }
+
+    if(start)
+        printf("};\n\n");
+    return start;
+}
+
+int
+putsingle(char *ident, char *prop)
+{
+    int i, start;
+
+    start = 0;
+    for(i = 0; i < NumRunes; i++) {
+        if(!prop[i])
+            continue;
+
+        if(!start){
+            printf("static rune %s[] = {\n", ident);
+            start = 1;
+        }
+        prop[i] = 0;
+        printf("\t0x%.4x,\n", i);
+    }
+
+    if(start)
+        printf("};\n\n");
+
+    return start;
+}
diff --git a/src/base/utf/vendor/common.h b/src/base/utf/vendor/common.h
new file mode 100644
index 0000000..51a53bd
--- /dev/null
+++ b/src/base/utf/vendor/common.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <u.h>
+#include <base.h>
+
+enum
+{
+    // Fields inside UnicodeData.txt
+    Fcode,
+    Fname,
+    Fcategory,
+    Fcombine,
+    Fbidir,
+    Fdecomp,
+    Fdecimal,
+    Fdigit,
+    Fnumeric,
+    Fmirror,
+    Foldname,
+    Fcomment,
+    Fupper,
+    Flower,
+    Ftitle,
+
+    NumFields,
+    NumRunes = 1 << 21,
+};
+
+/* input functions */
+enum
+{
+    ParseEOF,
+    ParseOK,
+    ParseSkip,
+};
+
+int  parse(io·Stream *io, int nfield, char **field, int len, char *line);
+int  codepoint(char *s);
+void codepointrange(io·Stream *utf8, char *field[NumFields], int *start,  int *stop);
+
+/* output functions */
+void putsearch(void);
+int  putrange(char *ident, char *prop, int force);
+int  putpair(char *ident, char *prop);
+int  putsingle(char *ident, char *prop);
diff --git a/src/base/utf/vendor/mkgraphemedata.c b/src/base/utf/vendor/mkgraphemedata.c
new file mode 100644
index 0000000..ce5a952
--- /dev/null
+++ b/src/base/utf/vendor/mkgraphemedata.c
@@ -0,0 +1,24 @@
+#include <u.h>
+#include <base.h>
+#include <libutf.h>
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+    fprintf(stderr, "usage: mkgraphemedata <GraphemeBreakProperty.txt>\n");
+    exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+    io·Stream *utf8;
+    char line[1024];
+
+    ARGBEGIN{
+    }ARGEND;
+}
diff --git a/src/base/utf/vendor/mkrunetype.c b/src/base/utf/vendor/mkrunetype.c
new file mode 100644
index 0000000..b33df32
--- /dev/null
+++ b/src/base/utf/vendor/mkrunetype.c
@@ -0,0 +1,390 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// globals
+
+#define OFFSET (1 << 20)
+#define DELTA(mapx, x)  ((1 << 20) + (mapx) - (x))
+
+// TODO: use bitarrays. will reduce executable size 8x
+struct Table
+{
+    /* properties */
+    char isspace[NumRunes];
+    char isalpha[NumRunes];
+    char ismark[NumRunes];
+    char isdigit[NumRunes];
+    char isupper[NumRunes];
+    char islower[NumRunes];
+    char istitle[NumRunes];
+    char ispunct[NumRunes];
+    char issymbl[NumRunes];
+    char iscntrl[NumRunes];
+
+    char combine[NumRunes];
+
+    /* transformations */
+    int  toupper[NumRunes];
+    int  tolower[NumRunes];
+    int  totitle[NumRunes];
+};
+
+static struct Table table;
+
+// -----------------------------------------------------------------------
+// internal functions
+
+static
+int
+isrange(char *label, char *prop, int force)
+{
+    char ident[128];
+    if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
+        panicf("out of identifier space\n");
+
+    return putrange(ident, prop, force);
+}
+
+static
+int
+ispair(char *label, char *prop)
+{
+    char ident[128];
+    if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
+        panicf("out of identifier space\n");
+
+    return putpair(ident, prop);
+}
+
+static
+int
+issingle(char *label, char *prop)
+{
+    char ident[128];
+    if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
+        panicf("out of identifier space\n");
+
+    return putsingle(ident, prop);
+}
+
+static
+void
+makeis(char *label, char *table, int pairs, int onlyranges)
+{
+    int hasr, hasp=0, hass=0;
+
+    hasr = isrange(label, table, onlyranges);
+    if(!onlyranges && pairs)
+        hasp = ispair(label, table);
+    if(!onlyranges)
+        hass = issingle(label, table);
+
+    printf(
+        "int\n"
+        "utf8·is%s(rune c)\n"
+        "{\n"
+        "    rune *p;\n"
+        "\n",
+        label);
+
+    if(hasr){
+        printf(
+            "   p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
+            "   if(p && c >= p[0] && c <= p[1])\n"
+            "       return 1;\n",
+            label, label);
+    }
+
+    if(hasp){
+        printf(
+            "   p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
+            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+            "       return 1;\n",
+            label, label);
+    }
+
+    if(hass)
+        printf(
+            "   p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
+            "   if(p && c == p[0])\n"
+            "       return 1;\n",
+            label, label);
+
+    printf(
+        "   return 0;\n"
+        "}\n"
+        "\n");
+}
+
+static
+int
+torange(char *label, int *index, int force)
+{
+    int l, r, d, start = 0;
+
+    for(l = 0; l < NumRunes; ){
+        if(index[l] == l){
+            l++;
+            continue;
+        }
+
+        d = DELTA(index[l], l);
+        if(d != (rune)d)
+            panicf("bad map delta %d", d);
+
+        for(r = l+1; r < NumRunes; r++){
+            if(DELTA(index[r], r) != d)
+                break;
+            index[r] = r;
+        }
+
+        if(force || r != l + 1){
+            if(!start){
+                printf("static rune to%s_range[] = {\n", label);
+                start = 1;
+            }
+            index[l] = l;
+            printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
+        }
+        l = r;
+    }
+    if(start)
+        printf("};\n\n");
+
+    return start;
+}
+
+static
+int
+topair(char *label, int *index)
+{
+    int l, r, d, start = 0;
+
+    for(l = 0; l + 2 < NumRunes; ){
+        if(index[l] == l){
+            l++;
+            continue;
+        }
+
+        d = DELTA(index[l], l);
+        if(d != (rune)d)
+            panicf("bad delta %d", d);
+
+        for(r = l+2; r < NumRunes; r += 2){
+            if(DELTA(index[r], r) != d)
+                break;
+            index[r] = r;
+        }
+
+        if(r > l+2){
+            if(!start){
+                printf("static rune to%s_pair[] = {\n", label);
+                start = 1;
+            }
+            index[l] = l;
+            printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
+        }
+
+        l = r;
+    }
+    if(start)
+        printf("};\n\n");
+
+    return start;
+}
+
+static
+int
+tosingle(char *label, int *index)
+{
+    int i, d, start = 0;
+
+    for(i=0; i < NumRunes; i++) {
+        if(index[i] == i)
+            continue;
+
+        d = DELTA(index[i], i);
+        if(d != (rune)d)
+            panicf("bad map delta %d", d);
+
+        if(!start){
+            printf("static rune to%s_single[] = {\n", label);
+            start = 1;
+        }
+        index[i] = i;
+        printf("\t0x%.4x, %d,\n", i, d);
+    }
+    if(start)
+        printf("};\n\n");
+
+    return start;
+}
+
+static
+void
+mkto(char *label, int *index, int pairs, int onlyrange)
+{
+    int hasr, hasp=0, hass=0;
+
+    hasr = torange(label, index, !onlyrange);
+    if(!onlyrange && pairs)
+        hasp = topair(label, index);
+    if(!onlyrange)
+        hass = tosingle(label, index);
+
+    printf(
+        "rune\n"
+        "utf8·to%s(rune c)\n"
+        "{\n"
+        "	rune *p;\n"
+        "\n",
+        label);
+
+    if(hasr)
+        printf(
+            "   p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
+            "   if(p && c >= p[0] && c <= p[1])\n"
+            "       return c + p[2] - %d;\n",
+            label, label, OFFSET);
+
+    if(hasp)
+        printf(
+            "   p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
+            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+            "       return c + p[2] - %d;\n",
+            label, label, OFFSET);
+
+    if(hass)
+        printf(
+            "   p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
+            "   if(p && c == p[0])\n"
+            "       return c + p[1] - %d;\n",
+            label, label, OFFSET);
+
+
+    printf(
+        "   return c;\n"
+        "}\n"
+        "\n"
+    );
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+    fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
+    exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+    int i, sc, c, ec;
+    io·Stream *utf8;
+    char *prop, *field[NumFields], line[1024];
+
+    ARGBEGIN{
+    }ARGEND;
+
+    if(argc != 1)
+        usage();
+
+    if(!(utf8 = io·open(argv[0], "r")))
+        panicf("can't open %s\n", argv[0]);
+
+    /* by default each character maps to itself */
+    for(i = 0; i < NumRunes; i++) {
+        table.toupper[i] = i;
+        table.tolower[i] = i;
+        table.totitle[i] = i;
+    }
+
+    /* ensure all C local white space characters pass */
+    table.isspace['\t'] = 1;
+    table.isspace['\n'] = 1;
+    table.isspace['\r'] = 1;
+    table.isspace['\f'] = 1;
+    table.isspace['\v'] = 1;
+    table.isspace[0x85] = 1;
+
+    ec = -1;
+    // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
+    while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+        /* parse unicode range */
+        codepointrange(utf8, field, &sc, &ec);
+        prop = field[Fcategory];
+
+        for(c = sc; c <= ec; c++){
+            /* grab properties */
+            switch(prop[0]){
+            case 'L':
+                table.isalpha[c] = 1;
+                switch(prop[1]){
+                case 'u': table.isupper[c] = 1; break;
+                case 'l': table.islower[c] = 1; break;
+                case 't': table.istitle[c] = 1; break;
+                case 'm': break; // modifier letters
+                case 'o': break; // ideograph letters
+                default:
+                    goto badproperty;
+                }
+                break;
+
+            case 'Z':
+                table.isspace[c] = 1;
+                break;
+
+            case 'M':
+                table.ismark[c] = 1;
+                break;
+
+            case 'N':
+                table.isdigit[c] = 1;
+                break;
+
+            case 'P':
+                table.ispunct[c] = 1;
+                break;
+
+            case 'S':
+                table.issymbl[c] = 1;
+                break;
+
+            case 'C':
+                table.iscntrl[c] = 1;
+                break;
+
+            default: badproperty:
+                panicf("unrecognized category '%s'", prop);
+            }
+            /* grab transformations */
+            if(*field[Fupper])
+                table.toupper[c] = codepoint(field[Fupper]);
+            if(*field[Flower])
+                table.tolower[c] = codepoint(field[Flower]);
+            if(*field[Ftitle])
+                table.totitle[c] = codepoint(field[Ftitle]);
+        }
+    }
+    io·close(utf8);
+
+    putsearch();
+
+    makeis("space", table.isspace, 0, 1);
+    makeis("digit", table.isdigit, 0, 1);
+    makeis("alpha", table.isalpha, 0, 0);
+    makeis("upper", table.isupper, 1, 0);
+    makeis("lower", table.islower, 1, 0);
+    makeis("title", table.istitle, 1, 0);
+    makeis("punct", table.ispunct, 1, 0);
+
+    mkto("upper", table.toupper, 1, 0);
+    mkto("lower", table.tolower, 1, 0);
+    mkto("title", table.totitle, 1, 0);
+
+	return 0;
+}
diff --git a/src/base/utf/vendor/mkrunewidth.c b/src/base/utf/vendor/mkrunewidth.c
new file mode 100644
index 0000000..14e6973
--- /dev/null
+++ b/src/base/utf/vendor/mkrunewidth.c
@@ -0,0 +1,325 @@
+#include "common.h"
+
+/*
+ * inspired by design choices in utf8proc/charwidths.jl
+ * all widths default to 1 unless they fall within the categories:
+ *      1. Mn 2. Mc 3. Me 4. Zl
+ *      5. Zp 6. Cc 7. Cf 8. Cs
+ * these default to zero width
+ */
+enum
+{
+    /* width ? */
+    WidthNeutral,   /* (N) practially treated like narrow but unclear ... */
+    WidthAmbiguous, /* (A) sometimes wide and sometimes not... */
+    /* width 1 */
+    WidthHalf,      /* (H) = to narrow (compatability equivalent)  */
+    WidthNarrow,    /* (Na) ASCII width */
+    /* width 2 */
+    WidthWide,      /* (W) 2x width */
+    WidthFull,      /* (F) = to wide (compatability equivalent) */
+};
+
+struct Table
+{
+    char width[3][NumRunes];
+};
+
+static struct Table table;
+
+// -----------------------------------------------------------------------
+// internal functions
+
+static
+void
+parse_category(char *path)
+{
+    int sc, c, ec, w;
+    io·Stream *utf8;
+    char *prop, *field[NumFields], line[1024];
+
+    if(!(utf8 = io·open(path, "r")))
+        panicf("can't open %s\n", path);
+
+    // NOTE: we don't check for comments here
+    ec = -1;
+    while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+        codepointrange(utf8, field, &sc, &ec);
+
+        prop = field[Fcategory];
+
+        switch(prop[0]){
+        case 'M':
+            switch(prop[1]){
+            case 'n': case 'c': case 'e':
+                w = 0;
+                break;
+            default:
+                w = 1;
+                break;
+            }
+            break;
+        case 'Z':
+            switch(prop[1]){
+                case 'l': case 'p':
+                    w = 0;
+                    break;
+                default:
+                    w = 1;
+                    break;
+            }
+            break;
+        case 'C':
+            switch(prop[1]){
+                case 'c': case 'f': case 's':
+                    w = 0;
+                    break;
+                default:
+                    w = 1;
+                    break;
+            }
+        default:
+            w = 1;
+        }
+
+        for(c = sc; c <= ec; c++)
+            table.width[w][c] = 1;
+    }
+
+    io·close(utf8);
+}
+
+static
+void
+coderange(char *field, int *l, int *r)
+{
+    char *s;
+
+    if(!(s = strstr(field, "..")))
+        *l=*r=codepoint(field);
+    else{
+        *s++ = 0, *s++ = 0;
+        *l=codepoint(field);
+        *r=codepoint(s);
+    }
+}
+
+static
+void
+parse_eawidths(char *path)
+{
+    int at, w;
+    int l, c, r;
+    io·Stream *utf8;
+    char *field[2], line[1024];
+
+    utf8 = io·open(path, "r");
+    while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+        if(at == ParseSkip)
+            continue;
+
+        switch(field[1][0]){
+        case 'A': continue;
+        case 'N':
+            if(field[1][1] != 'a')
+                continue;
+        /* fallthrough */
+        case 'H': w = 1; break;
+
+        case 'W': /* fallthrough */
+        case 'F': w = 2; break;
+
+        default:
+            panicf("malformed east asian width class: %s\n", field[1]);
+        }
+
+        coderange(field[0], &l, &r);
+
+        for(c=l; c <= r; c++){
+            /* ensure it only exists in one table */
+            table.width[w][c]       = 1;
+            table.width[(w+1)%3][c] = 0;
+            table.width[(w+2)%3][c] = 0;
+        }
+    }
+    io·close(utf8);
+}
+
+static
+void
+parse_emoji(char *path)
+{
+    int at, w;
+    int l, c, r;
+    io·Stream *utf8;
+    char *s, *field[2], line[1024];
+
+    utf8 = io·open(path, "r");
+    while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+        if(at == ParseSkip)
+            continue;
+
+        /* only override emoji presentation */
+        if(!strstr(field[1], "Emoji_Presentation"))
+            continue;
+
+        /* trim trailing space */
+        for(s=field[0]; *s; s++){
+            if(*s == ' ')
+                *s = 0;
+        }
+
+        coderange(field[0], &l, &r);
+
+        for(c=l; c <= r; c++){
+            table.width[0][c] = 0;
+            table.width[1][c] = 0;
+            table.width[2][c] = 1;
+        }
+    }
+
+    io·close(utf8);
+}
+
+/* output functions */
+static
+void
+maketable(char *label, char *table, int pairs, int onlyranges)
+{
+    int r, p=0, s=0;
+    char ident[3][128];
+
+    enum
+    {
+        Irange,
+        Ipair,
+        Isingle,
+    };
+
+    /* ranges */
+    if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange]))
+        panicf("out of identifier space\n");
+    r = putrange(ident[Irange], table, onlyranges);
+
+    if(!onlyranges && pairs){
+        if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair]))
+            panicf("out of identifier space\n");
+        p = putpair(ident[Ipair], table);
+    }
+    if(!onlyranges){
+        if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle]))
+            panicf("out of identifier space\n");
+
+        s = putsingle(ident[Isingle], table);
+    }
+
+    printf(
+        "static int\n"
+        "is%s(rune c)\n"
+        "{\n"
+        "    rune *p;\n"
+        "\n",
+        label);
+
+    if(r){
+        printf(
+            "   p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
+            "   if(p && c >= p[0] && c <= p[1])\n"
+            "       return 1;\n",
+            ident[Irange], ident[Irange]);
+    }
+
+    if(p){
+        printf(
+            "   p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
+            "   if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
+            "       return 1;\n",
+            ident[Ipair], ident[Ipair]);
+    }
+
+    if(s)
+        printf(
+            "   p = rangesearch(c, %s, arrlen(%s), 1);\n"
+            "   if(p && c == p[0])\n"
+            "       return 1;\n",
+            ident[Isingle], ident[Isingle]);
+
+    printf(
+        "   return 0;\n"
+        "}\n"
+        "\n");
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+static
+void
+usage(void)
+{
+    fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n");
+    exit(1);
+}
+
+#define SETW0(c) \
+    table.width[0][(c)] = 1, \
+    table.width[1][(c)] = 0, \
+    table.width[2][(c)] = 0;
+
+#define SETW1(c) \
+    table.width[0][(c)] = 0, \
+    table.width[1][(c)] = 1, \
+    table.width[2][(c)] = 0;
+
+#define SETW2(c) \
+    table.width[0][(c)] = 0, \
+    table.width[1][(c)] = 0, \
+    table.width[2][(c)] = 1;
+
+
+int
+main(int argc, char *argv[])
+{
+    int c;
+
+    ARGBEGIN{
+    }ARGEND;
+
+    if(argc != 3)
+        usage();
+
+    parse_category(*argv++);
+    parse_eawidths(*argv++);
+    parse_emoji(*argv);
+
+    /* overrides */
+    SETW0(0x2028);
+    SETW0(0x2029);
+
+    SETW1(0x00AD);
+
+    /* simple checking */
+    for(c=0; c<NumRunes; c++){
+        if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1)
+            panicf("improper table state");
+    }
+
+    putsearch();
+
+    maketable("width0", table.width[0], 1, 0);
+    maketable("width1", table.width[1], 1, 0);
+    maketable("width2", table.width[2], 1, 0);
+
+    puts(
+        "\n"
+        "int\n"
+        "utf8·runewidth(rune c)\n"
+        "{\n"
+        "   if(iswidth1(c))\n"
+        "       return 1;\n"
+        "   if(iswidth2(c))\n"
+        "       return 2;\n"
+        "   return 0;\n"
+        "}"
+    );
+}