From a9bfe650038afea8b751175cac16f6027345e45f Mon Sep 17 00:00:00 2001 From: Nicholas Date: Sat, 20 Nov 2021 10:53:19 -0800 Subject: Chore: reorganize libutf and libfmt into base I found the split to be arbitrary. Better to include the functionality in the standard library. I also split the headers to allow for more granular inclusion (but the library is still monolithic). The only ugliness is the circular dependency introduced with libutf's generated functions. We put explicit prereqs with the necessary object files instead. --- src/base/arg.c | 70 --- src/base/fmt/buffer.c | 60 ++ src/base/fmt/do.c | 728 +++++++++++++++++++++++ src/base/fmt/esprint.c | 14 + src/base/fmt/float.c | 1077 ++++++++++++++++++++++++++++++++++ src/base/fmt/fprint.c | 14 + src/base/fmt/internal.h | 15 + src/base/fmt/locale.c | 16 + src/base/fmt/nsprint.c | 14 + src/base/fmt/open.c | 34 ++ src/base/fmt/panic.c | 15 + src/base/fmt/print.c | 13 + src/base/fmt/rules.mk | 21 + src/base/fmt/sprint.c | 19 + src/base/fmt/test.c | 72 +++ src/base/fmt/vesprint.c | 26 + src/base/fmt/vfprint.c | 19 + src/base/fmt/vnsprint.c | 26 + src/base/fmt/vprint.c | 19 + src/base/fmt/vwrite.c | 26 + src/base/fmt/write.c | 22 + src/base/io/close.c | 7 + src/base/io/open.c | 6 - src/base/io/rules.mk | 3 +- src/base/rules.mk | 9 +- src/base/utf/canfit.c | 23 + src/base/utf/decode.c | 98 ++++ src/base/utf/decodeprev.c | 60 ++ src/base/utf/encode.c | 69 +++ src/base/utf/find.c | 31 + src/base/utf/findlast.c | 32 + src/base/utf/internal.h | 37 ++ src/base/utf/len.c | 21 + src/base/utf/rules.mk | 71 +++ src/base/utf/runelen.c | 8 + src/base/utf/vendor/common.c | 220 +++++++ src/base/utf/vendor/common.h | 45 ++ src/base/utf/vendor/mkgraphemedata.c | 24 + src/base/utf/vendor/mkrunetype.c | 390 ++++++++++++ src/base/utf/vendor/mkrunewidth.c | 325 ++++++++++ src/cmd/core/basename.c | 1 - src/cmd/core/cat.c | 1 - src/cmd/dwm/dwm.h | 1 - src/cmd/menu/menu.h | 1 - src/cmd/rc/rc.h | 2 +- src/cmd/term/term.h | 1 - src/libbio/rules.mk | 5 +- src/libc/rules.mk | 20 - src/libc/stdio.c | 59 -- src/libc/string.c | 80 --- src/libfmt/buffer.c | 60 -- src/libfmt/do.c | 728 ----------------------- src/libfmt/esprint.c | 14 - src/libfmt/float.c | 1077 ---------------------------------- src/libfmt/fprint.c | 14 - src/libfmt/internal.h | 17 - src/libfmt/locale.c | 16 - src/libfmt/nsprint.c | 14 - src/libfmt/open.c | 34 -- src/libfmt/panic.c | 15 - src/libfmt/print.c | 13 - src/libfmt/rules.mk | 36 -- src/libfmt/sprint.c | 19 - src/libfmt/test.c | 72 --- src/libfmt/vesprint.c | 26 - src/libfmt/vfprint.c | 19 - src/libfmt/vnsprint.c | 26 - src/libfmt/vprint.c | 19 - src/libfmt/vwrite.c | 26 - src/libfmt/write.c | 22 - src/libutf/canfit.c | 23 - src/libutf/decode.c | 98 ---- src/libutf/decodeprev.c | 60 -- src/libutf/encode.c | 69 --- src/libutf/find.c | 31 - src/libutf/findlast.c | 32 - src/libutf/internal.h | 38 -- src/libutf/len.c | 21 - src/libutf/rules.mk | 76 --- src/libutf/runelen.c | 8 - src/libutf/vendor/common.c | 220 ------- src/libutf/vendor/common.h | 46 -- src/libutf/vendor/mkgraphemedata.c | 24 - src/libutf/vendor/mkrunetype.c | 390 ------------ src/libutf/vendor/mkrunewidth.c | 325 ---------- src/rules.mk | 6 - 86 files changed, 3722 insertions(+), 3982 deletions(-) create mode 100644 src/base/fmt/buffer.c create mode 100644 src/base/fmt/do.c create mode 100644 src/base/fmt/esprint.c create mode 100644 src/base/fmt/float.c create mode 100644 src/base/fmt/fprint.c create mode 100644 src/base/fmt/internal.h create mode 100644 src/base/fmt/locale.c create mode 100644 src/base/fmt/nsprint.c create mode 100644 src/base/fmt/open.c create mode 100644 src/base/fmt/panic.c create mode 100644 src/base/fmt/print.c create mode 100644 src/base/fmt/rules.mk create mode 100644 src/base/fmt/sprint.c create mode 100644 src/base/fmt/test.c create mode 100644 src/base/fmt/vesprint.c create mode 100644 src/base/fmt/vfprint.c create mode 100644 src/base/fmt/vnsprint.c create mode 100644 src/base/fmt/vprint.c create mode 100644 src/base/fmt/vwrite.c create mode 100644 src/base/fmt/write.c create mode 100644 src/base/io/close.c create mode 100644 src/base/utf/canfit.c create mode 100644 src/base/utf/decode.c create mode 100644 src/base/utf/decodeprev.c create mode 100644 src/base/utf/encode.c create mode 100644 src/base/utf/find.c create mode 100644 src/base/utf/findlast.c create mode 100644 src/base/utf/internal.h create mode 100644 src/base/utf/len.c create mode 100644 src/base/utf/rules.mk create mode 100644 src/base/utf/runelen.c create mode 100644 src/base/utf/vendor/common.c create mode 100644 src/base/utf/vendor/common.h create mode 100644 src/base/utf/vendor/mkgraphemedata.c create mode 100644 src/base/utf/vendor/mkrunetype.c create mode 100644 src/base/utf/vendor/mkrunewidth.c delete mode 100644 src/libc/rules.mk delete mode 100644 src/libc/stdio.c delete mode 100644 src/libc/string.c delete mode 100644 src/libfmt/buffer.c delete mode 100644 src/libfmt/do.c delete mode 100644 src/libfmt/esprint.c delete mode 100644 src/libfmt/float.c delete mode 100644 src/libfmt/fprint.c delete mode 100644 src/libfmt/internal.h delete mode 100644 src/libfmt/locale.c delete mode 100644 src/libfmt/nsprint.c delete mode 100644 src/libfmt/open.c delete mode 100644 src/libfmt/panic.c delete mode 100644 src/libfmt/print.c delete mode 100644 src/libfmt/rules.mk delete mode 100644 src/libfmt/sprint.c delete mode 100644 src/libfmt/test.c delete mode 100644 src/libfmt/vesprint.c delete mode 100644 src/libfmt/vfprint.c delete mode 100644 src/libfmt/vnsprint.c delete mode 100644 src/libfmt/vprint.c delete mode 100644 src/libfmt/vwrite.c delete mode 100644 src/libfmt/write.c delete mode 100644 src/libutf/canfit.c delete mode 100644 src/libutf/decode.c delete mode 100644 src/libutf/decodeprev.c delete mode 100644 src/libutf/encode.c delete mode 100644 src/libutf/find.c delete mode 100644 src/libutf/findlast.c delete mode 100644 src/libutf/internal.h delete mode 100644 src/libutf/len.c delete mode 100644 src/libutf/rules.mk delete mode 100644 src/libutf/runelen.c delete mode 100644 src/libutf/vendor/common.c delete mode 100644 src/libutf/vendor/common.h delete mode 100644 src/libutf/vendor/mkgraphemedata.c delete mode 100644 src/libutf/vendor/mkrunetype.c delete mode 100644 src/libutf/vendor/mkrunewidth.c (limited to 'src') diff --git a/src/base/arg.c b/src/base/arg.c index 269043e..64e4dd6 100644 --- a/src/base/arg.c +++ b/src/base/arg.c @@ -1,71 +1 @@ -#include -#include - -// NOTE: this utf8 bit is copied from libunicode to remove the hard dependency just for ARG_BEGIN. - -#define UTFmax 4 -#define RuneSync 0x80u -#define RuneSelf 0x80u -#define RuneErr 0xFFFDu -#define RuneMax 0x10FFFFu -#define RuneMask 0x1FFFFFu - -#define Bit(i) (7-(i)) -/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */ -#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) -/* 0000 0000 0000 0111 1111 1111 */ -#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) -enum -{ - Bitx = Bit(1), - Tx = Tbyte(1), - Rune1 = (1 << (Bit(0)+0*Bitx)) - 1, - - Maskx = (1 << Bitx) - 1, /* 0011 1111 */ - Testx = Maskx ^ 0xff, /* 1100 0000 */ - - SurrogateMin = 0xD800, - SurrogateMax = 0xDFFF, - Bad = RuneErr, -}; - - -int -arg·bytetorune(uint32* r, byte* s) -{ - int c[4], i; - uint32 l; - - c[0] = *(ubyte*)(s); - if(c[0] < Tx) { - *r = c[0]; - return 1; - } - - l = c[0]; - for(i = 1; i < UTFmax; i++) { - c[i] = *(ubyte*)(s+i); - c[i] ^= Tx; - if (c[i] & Testx) goto bad; - - l = (l << Bitx) | c[i]; - if(c[0] < Tbyte(i + 2)) { - l &= RuneX(i + 1); - if (i == 1) { - if (c[0] < Tbyte(2) || l <= Rune1) - goto bad; - } else if (l <= RuneX(i) || l > RuneMax) - goto bad; - if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) - goto bad; - - *r = l; - return i + 1; - } - } -bad: - *r = RuneErr; - return 1; -} - char *argv0; diff --git a/src/base/fmt/buffer.c b/src/base/fmt/buffer.c new file mode 100644 index 0000000..0099e72 --- /dev/null +++ b/src/base/fmt/buffer.c @@ -0,0 +1,60 @@ +#include "internal.h" + +static int +flush(fmt·State *io) +{ + int n; + char *s; + + void *heap = io->heap; + mem·Reallocator mem = io->mem; + + if(!io->buffer.beg) + return 0; + + n = 2*(uintptr)io->file; + s = io->buffer.beg; + + io->buffer.beg = mem.realloc(heap, io->buffer.beg, n, 1); + if(!io->buffer.beg){ + io->file = io->buffer.cur = io->buffer.end = nil; + mem.free(heap, s); + return 0; + } + io->file = (void*)(uintptr)n; + io->buffer.cur = io->buffer.beg + (io->buffer.cur - s); + io->buffer.end = io->buffer.beg + n - 1; + + return 1; +} + +int +fmt·make(mem·Reallocator mem, void *heap, fmt·State *io) +{ + int n; + + memset(io, 0, sizeof(*io)); + + n = 32; + io->buffer.beg = io->buffer.cur = mem.alloc(heap, n, 1); + if(!io->buffer.beg) + return -1; + io->buffer.end = io->buffer.beg + n - 1; + + io->flush = flush; + io->file = (void*)(uintptr)n; + io->n = 0; + + fmt·setlocale(io, nil, nil, nil); + return 0; +} + +void +fmt·free(fmt·State *io) +{ + void *heap = io->heap; + mem·Reallocator mem = io->mem; + + mem.free(heap, io->buffer.beg); + io->buffer.beg = io->buffer.cur = io->buffer.end = nil; +} diff --git a/src/base/fmt/do.c b/src/base/fmt/do.c new file mode 100644 index 0000000..bd2e65c --- /dev/null +++ b/src/base/fmt/do.c @@ -0,0 +1,728 @@ +#include "internal.h" +#include + +#define MaxFmt 128 +#define atomic·load(p) (*(p)) + +// ----------------------------------------------------------------------- +// globals + +/* built in verbs */ +static int fmtflag(fmt·State *); +static int fmtpercent(fmt·State *); +static int fmtrune(fmt·State *); +static int fmtfloat(fmt·State *); +static int fmtutf8(fmt·State *); +static int fmtint(fmt·State *); +static int fmtchar(fmt·State *); +static int fmtcount(fmt·State *); +static int fmtstring(fmt·State *); +static int fmterror(fmt·State *); + +static int badfmt(fmt·State *); + +static struct +{ + volatile int len; + Verb verb[MaxFmt]; +} formatter = +{ + 30, + { + {' ', fmtflag}, + {'#', fmtflag}, + {'%', fmtpercent}, + {'\'',fmtflag}, + {'+', fmtflag}, + {',', fmtflag}, + {'-', fmtflag}, + {'C', fmtrune}, + {'E', fmtfloat}, + {'F', fmtfloat}, + {'G', fmtfloat}, + {'L', fmtflag}, + {'S', fmtutf8}, + {'X', fmtint}, + {'b', fmtint}, + {'c', fmtchar}, + {'d', fmtint}, + {'e', fmtfloat}, + {'f', fmtfloat}, + {'g', fmtfloat}, + {'h', fmtflag}, + {'i', fmtint}, + {'l', fmtflag}, + {'n', fmtcount}, + {'o', fmtint}, + {'p', fmtint}, + {'r', fmterror}, + {'s', fmtstring}, + {'U', fmtflag}, + {'u', fmtint}, + {'x', fmtint}, + } +}; + +// ----------------------------------------------------------------------- +// internal functions + +static Formatter +format(int c) +{ + Verb *v, *e; + e = &formatter.verb[atomic·load(&formatter.len)]; + for(v=e; v > formatter.verb; --v){ + if(v->c == c) + return v->fmt; + } + + return badfmt; +} + +static char * +dispatch(fmt·State *io, char *fmt) +{ + rune r; + int i, n; + + io->flag = 0; + io->width = io->prec = 0; + + /* + * the form of each print verb: + * % [flags] verb + * + the verb is a single character + * + each flag is either + * - a single character + * - a decimal numeric string + * - up to 2 decimal strings can be used + * - [width|*].[prec|*] + * - if missing, set to 0 + * - if *, grab from varargs + */ + for(;;){ + fmt += utf8·decode(fmt, &r); + io->verb = r; + switch(r){ + case 0: + return nil; + case '.': + io->flag |= fmt·Width|fmt·Prec; + continue; + case '0': + if(!(io->flag & fmt·Width)){ + io->flag |= fmt·Zero; + continue; + } + /* fallthrough */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + i = 0; + while('0' <= r && r <= '9'){ + i = 10*i + (r-'0'); + r = *fmt++; + } + fmt--; + number: + if(io->flag & fmt·Width){ + io->flag |= fmt·Prec; + io->prec = i; + }else{ + io->flag |= fmt·Width; + io->width = i; + } + continue; + case '*': + i = va_arg(io->args, int); + if(i < 0){ + if(io->flag&fmt·Prec){ + io->flag &= ~fmt·Prec; + io->prec = 0; + continue; + } + i = -i; + io->flag |= fmt·Left; + } + goto number; + } + n = format(r)(io); + if(n < 0) + return nil; + if(!n) + return fmt; + } +} + +static char * +flush(fmt·State *io, char *b, int len) +{ + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(!io->flush || !(*io->flush)(io) || io->buffer.cur + len >= io->buffer.end) { + io->buffer.end = io->buffer.cur; + return nil; + } + return io->buffer.cur; +} + +static int +pad(fmt·State *io, int n) +{ + int i; + char *b=io->buffer.cur, *e=io->buffer.end; + + for(i=0; i=e){ + if(!(b=flush(io, b, 1))) + return -1; + e = io->buffer.end; + } + *b++ = ' '; + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + return 0; +} + +static int +copy(fmt·State *io, char *m, int sz, int n) +{ + ulong f; + rune r; + int nc, w, nb; + char *b, *e, *me; + + w = 0; + f = io->flag; + me = m + sz; + + if(f&fmt·Width) + w = io->width; + if(f&fmt·Prec && n > io->prec) + n = io->prec; + if(!(f&fmt·Left) && pad(io, w-n)<0) + return -1; + + b = io->buffer.cur; + e = io->buffer.end; + + for(nc=n; nc>0; nc--){ + r = *(uchar *)m; + if(utf8·onebyte(r)){ + nb=1; + m++; + }else if((me-m) >= UTFmax || utf8·canfit(m, me-m)){ + nb=utf8·decode(m, &r); + m+=n; + }else + break; + + if(b+n>e){ + if(!(b=flush(io, b, nb))) + return -1; + e = io->buffer.end; + } + b += utf8·encode(&r, b); + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(f&fmt·Left && pad(io, w-n)<0) + return -1; + + return 0; +} + +static int +copyrune(fmt·State *io, rune *m, int n) +{ + ulong f; + rune r, *me; + int w, nb; + char *b, *e; + + w = 0; + f = io->flag; + + if(f&fmt·Width) + w = io->width; + if(f&fmt·Prec && n > io->prec) + n = io->prec; + + if(!(f&fmt·Left) && pad(io, w-n)<0) + return -1; + + b = io->buffer.cur; + e = io->buffer.end; + + for(me=m+n; m < me; m++){ + r = *m; + nb = utf8·runelen(r); + if(b + nb > e){ + if(!(b=flush(io, b, nb))) + return -1; + e = io->buffer.end; + } + b += utf8·encode(&r, b); + } + + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(f&fmt·Left && pad(io, w-n)<0) + return -1; + + return 0; +} + +static int +copystring(fmt·State *io, char *s) +{ + rune r; + int i,j; + + if(!s) + return copy(io, "", 5, 5); + + if(io->flag&fmt·Prec){ + i = 0; + for(j=0; j < io->prec && s[i]; j++) + i += utf8·decode(s+i, &r); + + return copy(io, s, i, j); + } + return copy(io, s, strlen(s), utf8·len(s)); +} + +static int +copyutf8(fmt·State *io, rune *s) +{ + rune *e; + int n,p; + + if(!s) + return copy(io, "", 5, 5); + + if(io->flag & fmt·Prec){ + p = io->prec; + for(n=0; n group){ + if((*groups)[1] != 0) + (*groups)++; + *digits = 1; + return 1; + } + return 0; +} + +// ----------------------------------------------------------------------- +// formatters + +static int +fmtchar(fmt·State *io) +{ + char x[1]; + x[0] = va_arg(io->args, int); + io->prec = 1; + + return copy(io, x, 1, 1); +} + +static int +fmtstring(fmt·State *io) +{ + char *s; + s = va_arg(io->args, char *); + return copystring(io, s); +} + +static int +fmterror(fmt·State *io) +{ + char *s; + s = strerror(errno); + return copystring(io, s); +} + +static int +fmtrune(fmt·State *io) +{ + rune x[1]; + + x[0] = va_arg(io->args, int); + return copyrune(io, x, 1); +} + +static int +fmtutf8(fmt·State *io) +{ + rune *s; + + s = va_arg(io->args, rune *); + return copyutf8(io, s); +} + +static int +fmtpercent(fmt·State *io) +{ + rune x[1]; + + x[0] = io->verb; + io->prec = 1; + return copyrune(io, x, 1); +} + +static int +fmtint(fmt·State *io) +{ + union{ + ulong u; + uvlong v; + } val; + int neg, base, i, n, f, w, isv; + int digits, bytes, runes, excess; + char *groups, *thousands; + char *p, *conv, buf[140]; + + f = io->flag; + neg = 0; + isv = 0; + val.u = 0; + + switch(io->verb){ + case 'o': case 'p': case 'u': case 'x': case 'X': + f |= fmt·Unsigned; + f &= ~(fmt·Sign|fmt·Space); + } + + /* set flags */ + if(io->verb=='p'){ + val.u = (ulong)va_arg(io->args, void*); + io->verb = 'x'; + f |= fmt·Unsigned; + }else if(f&fmt·Vlong){ + isv=1; + if(f&fmt·Unsigned) + val.v = va_arg(io->args, uvlong); + else + val.v = va_arg(io->args, vlong); + }else if(f&fmt·Long){ + if(f&fmt·Unsigned) + val.u = va_arg(io->args, ulong); + else + val.u = va_arg(io->args, long); + }else if(f&fmt·Byte){ + if(f&fmt·Unsigned) + val.u = (uchar)va_arg(io->args, int); + else + val.u = (char)va_arg(io->args, int); + }else if(f&fmt·Short){ + if(f&fmt·Unsigned) + val.u = (ushort)va_arg(io->args, int); + else + val.u = (short)va_arg(io->args, int); + }else{ + if(f&fmt·Unsigned) + val.u = va_arg(io->args, uint); + else + val.u = va_arg(io->args, int); + } + + conv = "0123456789abcdef"; + groups = "\4"; + thousands = io->thousands; + /* get base */ + switch(io->verb){ + case 'd': case 'i': case 'u': + base = 10; + groups = io->groups; + break; + case 'X': + conv = "0123456789ABCDEF"; + /*fallthrough*/ + case 'x': + base = 16; + thousands = ":"; + break; + case 'b': + base = 2; + thousands = ":"; + break; + case 'o': + base = 8; + break; + default: + return -1; + } + + /* check for negativity */ + if(!(f&fmt·Unsigned)){ + if(isv && (vlong)val.v < 0){ + val.v = -(vlong)val.v; + neg = 1; + }else if(!isv && (long)val.u < 0){ + val.u = -(long)val.u; + neg = 1; + } + } + + p = buf + sizeof(buf) - 1; + n = 0; + digits = 0; + excess = 0; + runes = utf8·len(thousands); + bytes = strlen(thousands); + +#define PARSE(VALUE) \ + while((VALUE)){ \ + i = (VALUE) % base; \ + (VALUE) /= base; \ + if((f&fmt·Comma) && n%4 == 3){ \ + *p-- = ','; \ + n++; \ + } \ + if((f&fmt·Apost) && needseperate(&digits, &groups)){ \ + n += runes; \ + excess += bytes - runes; \ + p -= bytes; \ + memmove(p+1, thousands, bytes); \ + } \ + *p-- = conv[i]; \ + n++; \ + } + if(isv) + PARSE(val.v) + else + PARSE(val.u) +#undef PARSE + + if(!n){ + if(!(f&fmt·Prec) || io->prec != 0 || (io->verb == 'o' && (f&fmt·Sharp))){ + *p-- = '0'; + n = 1; + if(f&fmt·Apost) + needseperate(&digits,&groups); + } + + if(io->verb == 'x' || io->verb == 'X') + f &= ~fmt·Sharp; + } + + for(w = io->prec; n < w && p > buf+3; n++){ + if((f&fmt·Apost) && needseperate(&digits, &groups)){ + n += runes; + excess += bytes - runes; + p -= bytes; + memmove(p+1, thousands, bytes); + } + *p-- = '0'; + } + + if(neg || (f&(fmt·Sign|fmt·Space))) + n++; + + if(f&fmt·Sharp){ + if(base==16) + n += 2; + else if(base == 8){ + if(p[1] == '0') + f &= ~fmt·Sharp; + else + n++; + } + } + + if(f&fmt·Zero && !(f & (fmt·Left|fmt·Prec))){ + w = 0; + if(f & fmt·Width) + w = io->width; + for(; n < w && p > buf+3; n++){ + if((f & fmt·Apost) && needseperate(&digits, &groups)){ + n += runes; + excess += bytes - runes; + p -= bytes; + memmove(p+1, thousands, bytes); + } + *p-- = '0'; + } + io->flag &= ~fmt·Width; + } + + if(f&fmt·Sharp){ + if(base==16) + *p-- = io->verb; + if(base==16 || base == 8) + *p-- = '0'; + } + + if(neg) + *p-- = '-'; + else if(f & fmt·Sign) + *p-- = '+'; + else if (f & fmt·Space) + *p-- = ' '; + + io->flag &= ~fmt·Prec; + return copy(io, p+1, n+excess, n); +} + +static int +fmtcount(fmt·State *io) +{ + void *p; + ulong f; + + f = io->flag; + p = va_arg(io->args, void*); + + if(f&fmt·Vlong) + *(vlong*)p = io->n; + else if(f&fmt·Long) + *(long*)p = io->n; + else if(f&fmt·Byte) + *(char*)p = io->n; + else if(f&fmt·Short) + *(short*)p = io->n; + else + *(int*)p = io->n; + + return 0; +} + +static int +fmtflag(fmt·State *io) +{ + switch(io->verb){ + case ',': io->flag |= fmt·Comma; break; + case '-': io->flag |= fmt·Left; break; + case '+': io->flag |= fmt·Sign; break; + case '#': io->flag |= fmt·Sharp; break; + case '\'': io->flag |= fmt·Apost; break; + case ' ': io->flag |= fmt·Space; break; + case 'u': io->flag |= fmt·Unsigned; break; + case 'L': io->flag |= fmt·Ldouble; break; + case 'h': + if(io->flag&fmt·Short) + io->flag |= fmt·Byte; + io->flag |= fmt·Short; + break; + case 'l': + if(io->flag&fmt·Long) + io->flag |= fmt·Vlong; + io->flag |= fmt·Long; + break; + } + return 1; +} + +static int +badfmt(fmt·State *io) +{ + int n; + char x[UTFmax+2]; + + x[0] = '%'; + n = 1 + utf8·encode(&io->verb, x+1); + x[n++] = '%'; + io->prec = n; + copy(io, x, n, n); + + return 0; +} + +#include "float.c" + +// ----------------------------------------------------------------------- +// exports + +int +fmt·do(fmt·State *io, char *fmt) +{ + rune r; + int c, n; + char *b, *e; + + for(;;){ + b = io->buffer.cur; + e = io->buffer.end; + while((c = *(uchar *)fmt) && c != '%'){ + if(utf8·onebyte(c)){ + if(b >= e){ + if(!(b=flush(io, b, 1))) + return -1; + e = io->buffer.end; + } + *b++ = *fmt++; + }else{ + n = utf8·decode(fmt, &r); + if(b + n > e){ + if(!(b=flush(io, b, n))) + return -1; + e = io->buffer.end; + } + while(n--) + *b++ = *fmt++; + } + } + fmt++; + io->n += b - io->buffer.cur; + io->buffer.cur = b; + if(!c) /* we hit our nul terminator */ + return io->n - n; + io->buffer.end = e; + + if(!(fmt=dispatch(io, fmt))) + return -1; + } +} + +int +fmt·install(int verb, Formatter func) +{ + Verb *v; + int i, ret; + +lock: + if(verb <= 0 || verb >= 65536){ + ret = -1; + goto unlock; + } + if(!func) + func = badfmt; + + if((i = atomic·load(&formatter.len))==MaxFmt) + return -1; + + v = &formatter.verb[i]; + v->c = verb; + v->fmt = func; + + atomic·store(&formatter.len, i+1); + ret = 0; +unlock: + return ret; +} diff --git a/src/base/fmt/esprint.c b/src/base/fmt/esprint.c new file mode 100644 index 0000000..6d97340 --- /dev/null +++ b/src/base/fmt/esprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +char * +fmt·esprint(char *buf, char *end, char *fmt, ...) +{ + char *p; + va_list args; + + va_start(args, fmt); + p = fmt·vesprint(buf, end, fmt, args); + va_end(args); + + return p; +} diff --git a/src/base/fmt/float.c b/src/base/fmt/float.c new file mode 100644 index 0000000..63ea80f --- /dev/null +++ b/src/base/fmt/float.c @@ -0,0 +1,1077 @@ +#define FDIGIT 30 +#define FDEFLT 6 +#define NSIGNIF 17 + +static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; +static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; +static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; + +static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; + +static int +isNaN(double val) +{ + union{ + uvlong i; + double f; + }x; + + x.f = val; + return (x.i&uvinf) == uvinf && (x.i&~uvneginf) != 0; +} + +static double +NaN(void) +{ + union{ + uvlong i; + double f; + }x; + x.i = uvnan; + return x.f; +} + +static int +isInf(double val, int sign) +{ + union{ + uvlong i; + double f; + }x; + + x.f = val; + if(sign == 0) + return x.i == uvinf || x.i == uvneginf; + else if(sign == 1) + return x.i == uvinf; + else + return x.i == uvneginf; +} + +static double pows10[] = +{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, +}; + +static double +fpow10(int n) +{ + double d; + int neg; + + neg = 0; + if(n < 0){ + neg = 1; + n = -n; + } + + if(n NSIGNIF) + return 0; + + for(b = a+n-1; b >= a; b--){ + c = *b + 1; + if(c <= '9'){ + *b = c; + return 0; + } + *b = '0'; + } + /* + * need to overflow adding digit. + * shift number down and insert 1 at beginning. + * decimal is known to be 0s or we wouldn't + * have gotten this far. (e.g., 99999+1 => 00000) + */ + a[0] = '1'; + return 1; +} + +static int +sub1(char *a, int n) +{ + int c; + char *b; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--){ + c = *b - 1; + if(c >= '0'){ + if(c == '0' && b == a){ + /* + * just zeroed the top digit; shift everyone up. + * decimal is known to be 9s or we wouldn't + * have gotten this far. (e.g., 10000-1 => 09999) + */ + *b = '9'; + return 1; + } + *b = c; + return 0; + } + *b = '9'; + } + /* + * can't get here. the number a is always normalized + * so that it has a nonzero first digit. + */ + abort(); +} + +// ----------------------------------------------------------------------- +// strtod + +#define Nbits 28 +#define Nmant 53 +#define Prec ((Nmant+Nbits+1)/Nbits) + +#define Sigbit (1<<(Prec*Nbits-Nmant)) /* first significant bit of Prec-th word */ +#define Ndig 1500 +#define One (ulong)(1<>1) +#define Maxe 310 + +#define Fsign (1<<0) /* found - */ +#define Fesign (1<<1) /* found e- */ +#define Fdpoint (1<<2) /* found . */ + +#define S0 0 /* _ _S0 +S1 #S2 .S3 */ +#define S1 1 /* _+ #S2 .S3 */ +#define S2 2 /* _+# #S2 .S4 eS5 */ +#define S3 3 /* _+. #S4 */ +#define S4 4 /* _+#.# #S4 eS5 */ +#define S5 5 /* _+#.#e +S6 #S7 */ +#define S6 6 /* _+#.#e+ #S7 */ +#define S7 7 /* _+#.#e+# #S7 */ + +typedef struct Tab Tab; +struct Tab +{ + int bp; + int siz; + char *cmp; +}; + +static ulong +umuldiv(ulong a, ulong b, ulong c) +{ + double d; + + d = ((double)a * (double)b) / (double)c; + if(d >= 4294967295.) + d = 4294967295.; + return (ulong)d; +} + +static void +frnorm(ulong *f) +{ + int i, c; + + c = 0; + for(i=Prec-1; i>0; i--) { + f[i] += c; + c = f[i] >> Nbits; + f[i] &= One-1; + } + f[0] += c; +} + +static int +fpcmp(char *a, ulong* f) +{ + ulong tf[Prec]; + int i, d, c; + + for(i=0; i> Nbits) + '0'; + tf[0] &= One-1; + + /* compare next digit */ + c = *a; + if(c == 0) { + if('0' < d) + return -1; + if(tf[0] != 0) + goto cont; + for(i=1; i d) + return +1; + if(c < d) + return -1; + a++; + cont:; +} +} + +static void +divby(char *a, int *na, int b) +{ + int n, c; + char *p; + + p = a; + n = 0; + while(n>>b == 0){ + c = *a++; + if(c == 0) { + while(n) { + c = n*10; + if(c>>b) + break; + n = c; + } + goto xx; + } + n = n*10 + c-'0'; + (*na)--; + } + for(;;){ + c = n>>b; + n -= c<>b; + n -= c<= (int)(arrlen(tab1))) + d = (int)(arrlen(tab1))-1; + t = tab1 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) > 0) + d--; + *dp -= d; + *bp += b; + divby(a, na, b); +} + +static void +mulby(char *a, char *p, char *q, int b) +{ + int n, c; + + n = 0; + *p = 0; + for(;;) { + q--; + if(q < a) + break; + c = *q - '0'; + c = (c<= (int)(arrlen(tab2))) + d = (int)(arrlen(tab2))-1; + t = tab2 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) < 0) + d--; + p = a + *na; + *bp -= b; + *dp += d; + *na += d; + mulby(a, p+d, p, b); +} + +static int +cmp(char *a, char *b) +{ + int c1, c2; + + while((c1 = *b++) != '\0') { + c2 = *a++; + if(isupper(c2)) + c2 = tolower(c2); + if(c1 != c2) + return 1; + } + return 0; +} + +double +fmtstrtod(char *as, char **aas) +{ + int na, ex, dp, bp, c, i, flag, state; + ulong low[Prec], hig[Prec], mid[Prec]; + double d; + char *s, a[Ndig]; + + flag = 0; /* Fsign, Fesign, Fdpoint */ + na = 0; /* number of digits of a[] */ + dp = 0; /* na of decimal point */ + ex = 0; /* exonent */ + + state = S0; + for(s=as;;s++){ + c = *s; + if('0' <= c && c <= '9'){ + switch(state){ + case S0: case S1: case S2: + state = S2; + break; + case S3: case S4: + state = S4; + break; + case S5: case S6: case S7: + state = S7; + ex = ex*10 + (c-'0'); + continue; + } + + if(na == 0 && c == '0'){ + dp--; + continue; + } + if(na < Ndig-50) + a[na++] = c; + continue; + } + switch(c){ + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + if(state == S0) + continue; + break; + case '-': + if(state == S0) + flag |= Fsign; + else + flag |= Fesign; + case '+': + if(state == S0) + state = S1; + else + if(state == S5) + state = S6; + else + break; /* syntax */ + continue; + case '.': + flag |= Fdpoint; + dp = na; + if(state == S0 || state == S1){ + state = S3; + continue; + } + if(state == S2){ + state = S4; + continue; + } + break; + case 'e': case 'E': + if(state == S2 || state == S4){ + state = S5; + continue; + } + break; + } + break; + } + + /* clean up return char-pointer */ + switch(state) { + case S0: + if(cmp(s, "nan") == 0){ + if(aas != nil) + *aas = s+3; + goto retnan; + } + case S1: + if(cmp(s, "infinity") == 0){ + if(aas != nil) + *aas = s+8; + goto retinf; + } + if(cmp(s, "inf") == 0){ + if(aas != nil) + *aas = s+3; + goto retinf; + } + case S3: + if(aas != nil) + *aas = as; + goto ret0; /* no digits found */ + case S6: + s--; /* back over +- */ + case S5: + s--; /* back over e */ + break; + } + if(aas != nil) + *aas = s; + + if(flag & Fdpoint) + while(na > 0 && a[na-1] == '0') + na--; + if(na == 0) + goto ret0; /* zero */ + a[na] = 0; + if(!(flag & Fdpoint)) + dp = na; + if(flag & Fesign) + ex = -ex; + dp += ex; + if(dp < -Maxe){ + errno = ERANGE; + goto ret0; /* underflow by exp */ + } else + if(dp > +Maxe) + goto retinf; /* overflow by exp */ + + /* + * normalize the decimal ascii number + * to range .[5-9][0-9]* e0 + */ + bp = 0; /* binary exponent */ + while(dp > 0) + divascii(a, &na, &dp, &bp); + while(dp < 0 || a[0] < '5') + mulascii(a, &na, &dp, &bp); + + /* close approx by naive conversion */ + mid[0] = 0; + mid[1] = 1; + for(i=0; (c=a[i]) != '\0'; i++) { + mid[0] = mid[0]*10 + (c-'0'); + mid[1] = mid[1]*10; + if(i >= 8) + break; + } + low[0] = umuldiv(mid[0], One, mid[1]); + hig[0] = umuldiv(mid[0]+1, One, mid[1]); + for(i=1; i>= 1; + } + frnorm(mid); + + /* compare */ + c = fpcmp(a, mid); + if(c > 0) { + c = 1; + for(i=0; i= Sigbit/2) { + mid[Prec-1] += Sigbit; + frnorm(mid); + } + goto out; + +ret0: + return 0; + +retnan: + return NaN(); + +retinf: + /* Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ + errno = ERANGE; + if(flag & Fsign) + return -HUGE_VAL; + return HUGE_VAL; + +out: + d = 0; + for(i=0; i 0) + *p++ = se[--i]; + + *p++ = '\0'; +} + +/* + * compute decimal integer m, exp such that: + * f = m*10^exp + * m is as short as possible with losing exactness + * assumes special cases (NaN, +Inf, -Inf) have been handled. + */ +static void +dtoa(double f, char *s, int *exp, int *neg, int *len) +{ + int c, d, e2, e, ee, i, ndigit, oerrno; + char buf[NSIGNIF+10]; + double g; + + oerrno = errno; + + *neg = 0; + if(f < 0){ + f = -f; + *neg = 1; + } + + if(f == 0){ + *exp = 0; + s[0] = '0'; + s[1] = 0; + *len = 1; + return; + } + + frexp(f, &e2); + e = (int)(e2 * .301029995664); + g = f * fpow10(-e); + while(g < 1) { + e--; + g = f * fpow10(-e); + } + while(g >= 10){ + e++; + g = f * fpow10(-e); + } + + /* convert nsignif digits as a first approximation */ + for(i=0; i g) { + if(add1(s, NSIGNIF)){ + /* gained a digit */ + e--; + fmtexp(s+NSIGNIF, e, 0); + } + continue; + } + if(f < g){ + if(sub1(s, NSIGNIF)){ + /* lost a digit */ + e++; + fmtexp(s+NSIGNIF, e, 0); + } + continue; + } + break; + } + + /* + * bump last few digits down to 0 as we can. + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--){ + c = s[i]; + if(c != '0'){ + s[i] = '0'; + g=fmtstrtod(s, nil); + if(g != f){ + s[i] = c; + break; + } + } + } + + /* + * remove trailing zeros. + */ + ndigit = NSIGNIF; + while(ndigit > 1 && s[ndigit-1] == '0'){ + e++; + --ndigit; + } + s[ndigit] = 0; + *exp = e; + *len = ndigit; + + errno = oerrno; +} + + +static int +fmtfloat(fmt·State *io) +{ + char buf[NSIGNIF+10], *dot, *digits, *p, *end, suf[10], *cur; + double val; + int c, verb, ndot, e, exp, f, ndigits, neg, newndigits; + int npad, pt, prec, realverb, sign, nsuf, ucase, n, z1, z2; + + if(io->flag&fmt·Long) + val = va_arg(io->args, long double); + else + val = va_arg(io->args, double); + + /* extract formatting flags */ + f = io->flag; + io->flag = 0; + prec = FDEFLT; + if(f & fmt·Prec) + prec = io->prec; + + verb = io->verb; + ucase = 0; + switch(verb) { + case 'A': + case 'E': + case 'F': + case 'G': + verb += 'a'-'A'; + ucase = 1; + break; + } + + /* pick off special numbers. */ + if(isNaN(val)) { + end = special[0+ucase]; + special: + io->flag = f & (fmt·Width|fmt·Left); + return copy(io, end, strlen(end), strlen(end)); + } + if(isInf(val, 1)) { + end = special[2+ucase]; + goto special; + } + if(isInf(val, -1)) { + end = special[4+ucase]; + goto special; + } + + /* get exact representation. */ + digits = buf; + dtoa(val, digits, &exp, &neg, &ndigits); + + /* get locale's decimal point. */ + dot = io->decimal; + if(dot == nil) + dot = "."; + ndot = utf8·len(dot); + + /* + * now the formatting fun begins. + * compute parameters for actual fmt: + * + * pad: number of spaces to insert before/after field. + * z1: number of zeros to insert before digits + * z2: number of zeros to insert after digits + * point: number of digits to print before decimal point + * ndigits: number of digits to use from digits[] + * suf: trailing suffix, like "e-5" + */ + realverb = verb; + switch(verb){ + case 'g': + /* convert to at most prec significant digits. (prec=0 means 1) */ + if(prec == 0) + prec = 1; + if(ndigits > prec) { + if(digits[prec] >= '5' && add1(digits, prec)) + exp++; + exp += ndigits-prec; + ndigits = prec; + } + + /* + * extra rules for %g (implemented below): + * trailing zeros removed after decimal unless FmtSharp. + * decimal point only if digit follows. + */ + + /* fall through to %e */ + default: + case 'e': + /* one significant digit before decimal, no leading zeros. */ + pt = 1; + z1 = 0; + + /* + * decimal point is after ndigits digits right now. + * slide to be after first. + */ + e = exp + (ndigits-1); + + /* if this is %g, check exponent and convert prec */ + if(realverb == 'g') { + if(-4 <= e && e < prec) + goto casef; + prec--; /* one digit before decimal; rest after */ + } + + /* compute trailing zero padding or truncate digits. */ + if(1+prec >= ndigits) + z2 = 1+prec - ndigits; + else { + /* truncate digits */ + assert(realverb != 'g'); + newndigits = 1+prec; + if(digits[newndigits] >= '5' && add1(digits, newndigits)) { + /* had 999e4, now have 100e5 */ + e++; + } + ndigits = newndigits; + z2 = 0; + } + fmtexp(suf, e, ucase); + nsuf = strlen(suf); + break; + + casef: + case 'f': + /* determine where digits go with respect to decimal point */ + if(ndigits+exp > 0) { + pt = ndigits+exp; + z1 = 0; + } else { + pt = 1; + z1 = 1 + -(ndigits+exp); + } + + /* + * %g specifies prec = number of significant digits + * convert to number of digits after decimal point + */ + if(realverb == 'g') + prec += z1 - pt; + + /* compute trailing zero padding or truncate digits. */ + if(pt+prec >= z1+ndigits) + z2 = pt+prec - (z1+ndigits); + else{ + /* truncate digits */ + assert(realverb != 'g'); + newndigits = pt+prec - z1; + if(newndigits < 0){ + z1 += newndigits; + newndigits = 0; + }else if(newndigits == 0){ + /* perhaps round up */ + if(digits[0] >= '5'){ + digits[0] = '1'; + newndigits = 1; + goto newdigit; + } + }else if(digits[newndigits] >= '5' && add1(digits, newndigits)){ + /* digits was 999, is now 100; make it 1000 */ + digits[newndigits++] = '0'; + newdigit: + /* account for new digit */ + if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ + z1--; + else /* 9.99 => 10.00 */ + pt++; + } + z2 = 0; + ndigits = newndigits; + } + nsuf = 0; + break; + } + + /* + * if %g is given without FmtSharp, remove trailing zeros. + * must do after truncation, so that e.g. print %.3g 1.001 + * produces 1, not 1.00. sorry, but them's the rules. + */ + if(realverb == 'g' && !(f & fmt·Sharp)) { + if(z1+ndigits+z2 >= pt) { + if(z1+ndigits < pt) + z2 = pt - (z1+ndigits); + else{ + z2 = 0; + while(z1+ndigits > pt && digits[ndigits-1] == '0') + ndigits--; + } + } + } + + /* + * compute width of all digits and decimal point and suffix if any + */ + n = z1+ndigits+z2; + if(n > pt) + n += ndot; + else if(n == pt){ + if(f & fmt·Sharp) + n += ndot; + else + pt++; /* do not print any decimal point */ + } + n += nsuf; + + /* + * determine sign + */ + sign = 0; + if(neg) + sign = '-'; + else if(f & fmt·Sign) + sign = '+'; + else if(f & fmt·Space) + sign = ' '; + if(sign) + n++; + + /* compute padding */ + npad = 0; + if((f & fmt·Width) && io->width > n) + npad = io->width - n; + if(npad && !(f & fmt·Left) && (f & fmt·Zero)){ + z1 += npad; + pt += npad; + npad = 0; + } + + /* format the actual field. too bad about doing this twice. */ + if(npad && !(f & fmt·Left) && pad(io, npad < 0)) + return -1; + + cur = io->buffer.cur; + end = io->buffer.end; + + if(sign){ + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = sign; + } + + while(z1>0 || ndigits>0 || z2>0){ + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = c; + + if(--pt == 0) + for(p=dot; *p; p++){ + if(cur+1 > end){ + if(!(cur=flush(io,cur,1))) + return -1; + end = io->buffer.end; + } + *cur++ = *p; + } + } + io->n += cur - (char*)io->buffer.cur; + io->buffer.cur = cur; + if(nsuf && copy(io, suf, nsuf, nsuf) < 0) + return -1; + if(npad && (f & fmt·Left) && pad(io, npad < 0)) + return -1; + + return 0; +} diff --git a/src/base/fmt/fprint.c b/src/base/fmt/fprint.c new file mode 100644 index 0000000..5077359 --- /dev/null +++ b/src/base/fmt/fprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +int +fmt·fprint(int fd, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vfprint(fd, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/internal.h b/src/base/fmt/internal.h new file mode 100644 index 0000000..7bf47af --- /dev/null +++ b/src/base/fmt/internal.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +typedef int (*Formatter)(fmt·State *io); +typedef struct Verb Verb; + +struct Verb +{ + int c; + Formatter fmt; +}; + +void fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups); diff --git a/src/base/fmt/locale.c b/src/base/fmt/locale.c new file mode 100644 index 0000000..437c61e --- /dev/null +++ b/src/base/fmt/locale.c @@ -0,0 +1,16 @@ +#include "internal.h" + +void +fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups) +{ + if(decimal == nil || decimal[0] == '\0') + decimal = "."; + if(thousands == nil) + thousands = ","; + if(groups == nil) + groups = "\3"; + + io->groups = groups; + io->decimal = decimal; + io->thousands = thousands; +} diff --git a/src/base/fmt/nsprint.c b/src/base/fmt/nsprint.c new file mode 100644 index 0000000..90489e0 --- /dev/null +++ b/src/base/fmt/nsprint.c @@ -0,0 +1,14 @@ +#include "internal.h" + +int +fmt·nsprint(int len, char *buf, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vnsprint(len, buf, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/open.c b/src/base/fmt/open.c new file mode 100644 index 0000000..8aadef5 --- /dev/null +++ b/src/base/fmt/open.c @@ -0,0 +1,34 @@ +#include "internal.h" + +static int +flush(fmt·State *io) +{ + int n, fd; + + fd = (uintptr)io->file; + n = io->buffer.cur - io->buffer.beg; + if(n && write(fd, io->buffer.beg, n) != n) + return -1; + + io->buffer.cur = io->buffer.beg; + return io->n; +} + +int +fmt·open(int fd, int len, char *buf, fmt·State *io) +{ + io->buffer.beg = buf; + io->buffer.cur = buf; + io->buffer.end = buf+len; + io->flush = flush; + io->file = (void*)(uintptr)fd; + io->flag = 0; + io->n = 0; + /* no heap needed */ + io->heap = nil; + io->mem = (mem·Reallocator){ 0 }; + + fmt·setlocale(io, nil, nil, nil); + + return 0; +} diff --git a/src/base/fmt/panic.c b/src/base/fmt/panic.c new file mode 100644 index 0000000..25ee277 --- /dev/null +++ b/src/base/fmt/panic.c @@ -0,0 +1,15 @@ +#include "internal.h" + +void +fmt·panic(char *fmt, ...) +{ + char buf[256]; + va_list arg; + + va_start(arg, fmt); + fmt·vesprint(buf, arrend(buf), fmt, arg); + va_end(arg); + + fmt·fprint(2, "%s: %s\n", argv0 ? argv0 : "", buf); + exits("fatal"); +} diff --git a/src/base/fmt/print.c b/src/base/fmt/print.c new file mode 100644 index 0000000..20b8e00 --- /dev/null +++ b/src/base/fmt/print.c @@ -0,0 +1,13 @@ +#include "internal.h" + +int +fmt·print(char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = fmt·vfprint(1, fmt, args); + va_end(args); + return n; +} diff --git a/src/base/fmt/rules.mk b/src/base/fmt/rules.mk new file mode 100644 index 0000000..fdfdac0 --- /dev/null +++ b/src/base/fmt/rules.mk @@ -0,0 +1,21 @@ +# Local sources +SRCS_$(d)+=\ + $(d)/fmt/buffer.c\ + $(d)/fmt/do.c\ + $(d)/fmt/esprint.c\ + $(d)/fmt/fprint.c\ + $(d)/fmt/locale.c\ + $(d)/fmt/nsprint.c\ + $(d)/fmt/open.c\ + $(d)/fmt/print.c\ + $(d)/fmt/sprint.c\ + $(d)/fmt/vesprint.c\ + $(d)/fmt/vfprint.c\ + $(d)/fmt/vnsprint.c\ + $(d)/fmt/vprint.c\ + $(d)/fmt/vwrite.c\ + $(d)/fmt/panic.c\ + $(d)/fmt/write.c + +CHECK_$(d)+=\ + $(d)/fmt/test.c diff --git a/src/base/fmt/sprint.c b/src/base/fmt/sprint.c new file mode 100644 index 0000000..f1be6dd --- /dev/null +++ b/src/base/fmt/sprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·sprint(char *buf, char *fmt, ...) +{ + int n; + uint len; + va_list args; + + len = 1 << 30; + if(buf+len < buf) + len = -(uintptr)buf-1; + + va_start(args, fmt); + n = fmt·vnsprint(len, buf, fmt, args); + va_end(args); + + return n; +} diff --git a/src/base/fmt/test.c b/src/base/fmt/test.c new file mode 100644 index 0000000..d81a62e --- /dev/null +++ b/src/base/fmt/test.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include + +typedef struct Complex +{ + double r, i; +} Complex; + +int +Xfmt(fmt·State *io) +{ + Complex c; + c = va_arg(io->args, Complex); + + return fmt·write(io, "(real=%g,imag=%g)", c.r, c.i); +} + +int +main(int argc, char *argv[]) +{ + fmt·print("basic tests\n"); + fmt·print("\tx: %x\n", 0x87654321); + fmt·print("\tu: %u\n", 0x87654321); + fmt·print("\td: %d\n", 0x87654321); + fmt·print("\ts: %s\n", "hi there"); + fmt·print("\tc: %c\n", '!'); + fmt·print("\tg: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\te: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\tf: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10); + fmt·print("\tsmiley: %C\n", (rune)0x263a); + fmt·print("\t%g %.18g\n", 2e25, 2e25); + fmt·print("\t%2.18g\n", 1.0); + fmt·print("\t%2.18f\n", 1.0); + fmt·print("\t%f\n", 3.1415927/4); + fmt·print("\t%d\n", 23); + fmt·print("\t%i\n", 23); + fmt·print("\t%0.10d\n", 12345); + + fmt·print("%%4%%d tests\n"); + fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + fmt·print("\t%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); + fmt·print("\t%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); + fmt·print("\t%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); + + /* test %'d formats */ + fmt·print("%%'%%d tests\n"); + fmt·print("\t%'d %'d %'d\n", 1, 2222, 33333333); + fmt·print("\t%'019d\n", 0); + fmt·print("\t%08d %08d %08d\n", 1, 2222, 33333333); + fmt·print("\t%'08d %'08d %'08d\n", 1, 2222, 33333333); + fmt·print("\t%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); + fmt·print("\t%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); + fmt·print("\t%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); + + /* test precision */ + fmt·print("precision tests\n"); + fmt·print("%020.10d\n", 100); + + /* test install */ + fmt·install('X', Xfmt); + Complex c = { 1.5, -2.3 }; + fmt·print("x = %X\n", c); + + return 0; + +} diff --git a/src/base/fmt/vesprint.c b/src/base/fmt/vesprint.c new file mode 100644 index 0000000..18f4dd2 --- /dev/null +++ b/src/base/fmt/vesprint.c @@ -0,0 +1,26 @@ +#include "internal.h" + +char* +fmt·vesprint(char *buf, char *end, char *fmt, va_list args) +{ + fmt·State io; + + if(end <= buf) + return nil; + + io.n = 0; + io.buffer.beg = io.buffer.cur = buf; + io.buffer.end = end-1; + io.flush = nil; + io.file = nil; + + va_copy(io.args, args); + + fmt·setlocale(&io, nil, nil, nil); + fmt·do(&io, fmt); + + va_end(io.args); + + *(io.buffer.cur) = 0; + return io.buffer.cur; +} diff --git a/src/base/fmt/vfprint.c b/src/base/fmt/vfprint.c new file mode 100644 index 0000000..4306ea7 --- /dev/null +++ b/src/base/fmt/vfprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·vfprint(int fd, char *fmt, va_list args) +{ + int n; + fmt·State io; + char buf[256]; + + fmt·open(fd, sizeof(buf), buf, &io); + + va_copy(io.args, args); + n = fmt·do(&io, fmt); + va_end(io.args); + + if(n > 0 && io.flush(&io) < 0) + return -1; + return n; +} diff --git a/src/base/fmt/vnsprint.c b/src/base/fmt/vnsprint.c new file mode 100644 index 0000000..7ded908 --- /dev/null +++ b/src/base/fmt/vnsprint.c @@ -0,0 +1,26 @@ +#include "internal.h" + +int +fmt·vnsprint(int len, char *buf, char *fmt, va_list args) +{ + fmt·State io; + + if(len <= 0) + return -1; + + io.n = 0; + io.buffer.beg = io.buffer.cur = buf; + io.buffer.end = buf+len-1; + io.flush = nil; + io.file = nil; + + va_copy(io.args, args); + + fmt·setlocale(&io, nil, nil, nil); + fmt·do(&io, fmt); + + va_end(io.args); + + *(io.buffer.cur) = 0; + return io.buffer.cur - io.buffer.beg; +} diff --git a/src/base/fmt/vprint.c b/src/base/fmt/vprint.c new file mode 100644 index 0000000..bb3076b --- /dev/null +++ b/src/base/fmt/vprint.c @@ -0,0 +1,19 @@ +#include "internal.h" + +int +fmt·vprint(char *fmt, va_list args) +{ + fmt·State io; + int n; + char buf[256]; + + fmt·open(1, sizeof(buf), buf, &io); + + va_copy(io.args, args); + n = fmt·do(&io, fmt); + va_end(io.args); + + if(n > 0 && io.flush(&io) < 0) + return -1; + return n; +} diff --git a/src/base/fmt/vwrite.c b/src/base/fmt/vwrite.c new file mode 100644 index 0000000..cacdef2 --- /dev/null +++ b/src/base/fmt/vwrite.c @@ -0,0 +1,26 @@ +#include "internal.h" + +int +fmt·vwrite(fmt·State *io, char *fmt, va_list args) +{ + int n; + va_list tmp; + + io->flag = io->width = io->prec = 0; + + va_copy(tmp, io->args); + va_end(io->args); + + va_copy(io->args,args); + n = fmt·do(io, fmt); + va_end(io->args); + + va_copy(io->args, tmp); + va_end(tmp); + + io->flag = io->width = io->prec = 0; + + if(n >= 0) + return 0; + return n; +} diff --git a/src/base/fmt/write.c b/src/base/fmt/write.c new file mode 100644 index 0000000..9a77223 --- /dev/null +++ b/src/base/fmt/write.c @@ -0,0 +1,22 @@ +#include "internal.h" + +int +fmt·write(fmt·State *io, char *fmt, ...) +{ + int n; + va_list args; + + io->flag = io->width = io->prec = 0; + + va_copy(args, io->args); + va_end(io->args); + + va_start(io->args, fmt); + n = fmt·do(io, fmt); + va_end(io->args); + + io->flag = io->width = io->prec = 0; + if(n >= 0) + return 0; + return n; +} diff --git a/src/base/io/close.c b/src/base/io/close.c new file mode 100644 index 0000000..5a773cd --- /dev/null +++ b/src/base/io/close.c @@ -0,0 +1,7 @@ +#include "internal.h" + +int +io·close(io·Stream *s) +{ + return fclose(s); +} diff --git a/src/base/io/open.c b/src/base/io/open.c index 71e88d4..fe78255 100644 --- a/src/base/io/open.c +++ b/src/base/io/open.c @@ -5,9 +5,3 @@ io·open(byte *name, byte *mode) { return fopen(name, mode); } - -int -io·close(io·Stream *s) -{ - return fclose(s); -} diff --git a/src/base/io/rules.mk b/src/base/io/rules.mk index 2e03ca5..124cd09 100644 --- a/src/base/io/rules.mk +++ b/src/base/io/rules.mk @@ -3,6 +3,7 @@ SRCS_$(d)+=\ $(d)/io/flush.c\ $(d)/io/interface.c\ $(d)/io/open.c\ + $(d)/io/close.c\ $(d)/io/putbyte.c\ $(d)/io/putstring.c\ $(d)/io/read.c\ @@ -11,4 +12,4 @@ SRCS_$(d)+=\ $(d)/io/stat.c\ $(d)/io/tell.c\ $(d)/io/unget.c\ - $(d)/io/write.c\ + $(d)/io/write.c diff --git a/src/base/rules.mk b/src/base/rules.mk index 9f25d37..0a262c7 100644 --- a/src/base/rules.mk +++ b/src/base/rules.mk @@ -5,10 +5,15 @@ include share/push.mk # local sources SRCS_$(d):=\ $(d)/arg.c + +CHECK_$(d):=\ + $(d)/test.c + include $(d)/bufio/rules.mk include $(d)/coro/rules.mk include $(d)/error/rules.mk include $(d)/flate/rules.mk +include $(d)/fmt/rules.mk include $(d)/fs/rules.mk include $(d)/gz/rules.mk include $(d)/io/rules.mk @@ -18,12 +23,10 @@ include $(d)/os/rules.mk include $(d)/rng/rules.mk include $(d)/sort/rules.mk include $(d)/string/rules.mk -CHECK_$(d):=\ - $(d)/test.c +include $(d)/utf/rules.mk # outputs LIBS_$(d) := $(d)/base.a -BINS_$(d) := include share/paths.mk diff --git a/src/base/utf/canfit.c b/src/base/utf/canfit.c new file mode 100644 index 0000000..4579ab3 --- /dev/null +++ b/src/base/utf/canfit.c @@ -0,0 +1,23 @@ +#include "internal.h" + +/* returns 1 if string of length n is long enough to be decoded */ +int +utf8·canfit(byte* s, int n) +{ + int i; + rune c; + + if(n <= 0) + return 0; + + c = *(ubyte*)s; + if(c < TByte1) + return 1; + + if(c < TByte3) + return n >= 2; + if(c < TByte4) + return n >= 3; + + return n >= UTFmax; +} diff --git a/src/base/utf/decode.c b/src/base/utf/decode.c new file mode 100644 index 0000000..01797f1 --- /dev/null +++ b/src/base/utf/decode.c @@ -0,0 +1,98 @@ +#include "internal.h" + +#define ACCEPT 0 +#define REJECT 12 + +static uint8 decode[] = { + /* + * the first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* + * the second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state + */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +int +utf8·decode(char *s, rune *r) +{ + int n; + rune v; + uint8 b, t, x=ACCEPT; + + b = ((uint8 *)s)[0]; + t = decode[b]; + v = (0xFF >> t) & b; + x = decode[256+x+t]; + + for(n=1; x > REJECT && n < UTFmax; n++){ + b = ((uint8 *)s)[n]; + t = decode[b]; + v = (v << 6) | (b & TMask); + x = decode[256+x+t]; + } + + if(x != ACCEPT){ + *r = RuneErr; + return 1; + } + + *r = v; + return n; +} + +#if 0 +int +utf8·decode(byte *s, rune *r) +{ + int c[UTFmax], i; + rune l; + + c[0] = *(ubyte*)(s); + if(c[0] < Tx){ + *r = c[0]; + return 1; + } + + l = c[0]; + for(i = 1; i < UTFmax; i++){ + c[i] = *(ubyte*)(s+i); + c[i] ^= Tx; + if(c[i] & Testx) goto bad; + + l = (l << Bitx) | c[i]; + if(c[0] < Tbyte(i + 2)){ + l &= RuneX(i + 1); + if(i == 1){ + if(c[0] < Tbyte(2) || l <= Rune1) + goto bad; + }else if(l <= RuneX(i) || l > RuneMax) + goto bad; + + if(i == 2 && SurrogateMin <= l && l <= SurrogateMax) + goto bad; + + *r = l; + return i + 1; + } + } +bad: + *r = RuneErr; + return 1; +} +#endif diff --git a/src/base/utf/decodeprev.c b/src/base/utf/decodeprev.c new file mode 100644 index 0000000..27dced6 --- /dev/null +++ b/src/base/utf/decodeprev.c @@ -0,0 +1,60 @@ +#include "internal.h" + +#define ACCEPT 0 +#define REJECT 12 + +static uint8 decode[] = { + /* + * the first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. + */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + /* + * The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. + */ + // 0 1 2 3 4 5 6 7 8 9 10 11 + 0,24,12,12,12,12,12,24,12,24,12,12, + 0,24,12,12,12,12,12,24,12,24,12,12, + 12,36, 0,12,12,12,12,48,12,36,12,12, + 12,60,12, 0, 0,12,12,72,12,72,12,12, + 12,60,12, 0,12,12,12,72,12,72, 0,12, + 12,12,12,12,12, 0, 0,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12, 0 +}; + +int +utf8·decodeprev(byte *s, rune *r) +{ + int n; + rune v; + uint8 b, t, d, x=ACCEPT; + + v=0, n=0, d=0; +nextbyte: + b = ((uint8 *)s)[-n++]; + t = decode[b]; + x = decode[256+x+t]; + + if(x > REJECT && n < UTFmax){ + v = v | ((b & TMask) << d); + d += 6; + goto nextbyte; + } + + if(x != ACCEPT) + *r = RuneErr; + else{ + v |= (((0xFFu >> t) & b) << d); + *r = v; + } + + return n; +} diff --git a/src/base/utf/encode.c b/src/base/utf/encode.c new file mode 100644 index 0000000..fa7c93e --- /dev/null +++ b/src/base/utf/encode.c @@ -0,0 +1,69 @@ +#include "internal.h" + +int +utf8·encode(rune *r, byte *s) +{ + rune c; + + c = *r; + if(c < Rune1Byte){ // 7 bits + s[0] = (uint8)c; + return 1; + } + + if(c < Rune2Byte){ // 11 bits + s[0] = TByte1 | (c >> 6); + s[1] = Tx | (c & TMask); + return 2; + } + + if(c < Rune3Byte){ // 16 bits + s[0] = TByte2 | ((c >> 12)); + s[1] = Tx | ((c >> 6) & TMask); + s[2] = Tx | ((c) & TMask); + return 3; + } + + // 22 bits + if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) + c = RuneErr; + + s[0] = TByte3 | ((c >> 18)); + s[1] = Tx | ((c >> 12) & TMask); + s[2] = Tx | ((c >> 6) & TMask); + s[3] = Tx | ((c) & TMask); + + return 4; +} + +#if 0 +int +utf8·encode(rune* r, byte* s) +{ + int i, j; + rune c; + + c = *r; + if(c <= Rune1) { + s[0] = c; + return 1; + } + + for(i = 2; i < UTFmax + 1; i++){ + if(i == 3){ + if(c > RuneMax) + c = RuneErr; + if(SurrogateMin <= c && c <= SurrogateMax) + c = RuneErr; + } + if(c <= RuneX(i) || i == UTFmax) { + s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); + for(j = 1; j < i; j++) + s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); + return i; + } + } + + return UTFmax; +} +#endif diff --git a/src/base/utf/find.c b/src/base/utf/find.c new file mode 100644 index 0000000..d75feb8 --- /dev/null +++ b/src/base/utf/find.c @@ -0,0 +1,31 @@ +#include "internal.h" + +byte* +utf8·find(byte* s, rune c) +{ + long c1; + rune r; + int n; + + if(c < Tx) + return strchr(s, c); + + for(;;){ + c1 = *(ubyte*)s; + if(c1 < Tx){ + if(c1 == 0) return nil; + if(c1 == c) return s; + s++; + continue; + } + + n = utf8·decode(s, &r); + + if(r == c) + return s; + + s += n; + } + + return nil; +} diff --git a/src/base/utf/findlast.c b/src/base/utf/findlast.c new file mode 100644 index 0000000..ab25ab2 --- /dev/null +++ b/src/base/utf/findlast.c @@ -0,0 +1,32 @@ +#include "internal.h" + +byte* +utf8·findlast(byte* s, rune c) +{ + long c1; + rune r; + byte *l; + + if(c < Tx) + return strrchr(s, c); + + l = nil; + for(;;){ + c1 = *(ubyte*)s; + if(c1 < Tx){ + if(c1 == 0) return l; + if(c1 == c) l = s; + s++; + continue; + } + + c1 = utf8·decode(s, &r); + + if(r == c) + l = s; + + s += c1; + } + + return nil; +} diff --git a/src/base/utf/internal.h b/src/base/utf/internal.h new file mode 100644 index 0000000..49945dd --- /dev/null +++ b/src/base/utf/internal.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +/* + * NOTE: we use the preprocessor to ensure we have unsigned constants. + * UTF-8 code: + * 1 byte: + * 0xxxxxxx + * 2 byte: + * 110xxxxx 10xxxxxx + * 3 byte: + * 1110xxxx 10xxxxxx 10xxxxxx + * 4 byte: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +#define Tx 0x80u // 0b10000000 transfer header +#define TMask 0x3Fu // 0b00111111 transfer mask + +#define TByte1 0xC0u // 0b11000000 +#define TByte2 0xE0u // 0b11100000 +#define TByte3 0xF0u // 0b11110000 +#define TByte4 0xF8u // 0b11111000 + +#define RuneMask 0x1FFFFFu + +#define Rune1Byte 0x000080u // 1 << 8 (1 byte) +#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) +#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) +#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) + + +/* UTF-16 nonsense */ +#define RuneSurrogateMin 0x0D8000 +#define RuneSurrogateMax 0x0D8FFF diff --git a/src/base/utf/len.c b/src/base/utf/len.c new file mode 100644 index 0000000..8fbd679 --- /dev/null +++ b/src/base/utf/len.c @@ -0,0 +1,21 @@ +#include "internal.h" + +int +utf8·len(char *s) +{ + int c; + long n; + rune r; + + n = 0; + for(;;){ + c = *(uchar*)s; + if(c < Tx){ + if(c == 0) + return n; + s++; + }else + s += utf8·decode(s, &r); + n++; + } +} diff --git a/src/base/utf/rules.mk b/src/base/utf/rules.mk new file mode 100644 index 0000000..446c113 --- /dev/null +++ b/src/base/utf/rules.mk @@ -0,0 +1,71 @@ +UNICODE=14.0.0 + +SRCS_$(d)+=\ + $(d)/utf/encode.c\ + $(d)/utf/decode.c\ + $(d)/utf/decodeprev.c\ + $(d)/utf/find.c\ + $(d)/utf/findlast.c\ + $(d)/utf/canfit.c\ + $(d)/utf/runelen.c\ + $(d)/utf/len.c\ + $(d)/utf/runetype-$(UNICODE).c\ + $(d)/utf/runewidth-$(UNICODE).c + +# ======================================================================== +# table generation + +# NOTE: this is pretty hacky... +NEED_OBJS=\ + $(OBJ_DIR)/base/arg.o\ + $(OBJ_DIR)/base/utf/decode.o\ + $(OBJ_DIR)/base/error/panicf.o\ + $(OBJ_DIR)/base/io/readln.o\ + $(OBJ_DIR)/base/io/open.o\ + $(OBJ_DIR)/base/io/close.o + +$(d)/utf/vendor/common.o: $(d)/utf/vendor/common.c + $(COMPILE) + +# rune categories +$(d)/utf/vendor/UnicodeData-$(UNICODE).txt: + @echo "GET UnicodeData.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@ + +$(d)/utf/vendor/mkrunetype: $(d)/utf/vendor/mkrunetype.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +GENS += $(d)/utf/vendor/mkrunetype + +$(d)/utf/runetype-$(UNICODE).c: $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/mkrunetype + @$(dir $@)vendor/mkrunetype $< > $@ + +# rune widths +$(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt: + @echo "GET EastAsianWidth.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@ + +$(d)/utf/vendor/EmojiData-$(UNICODE).txt: + @echo "GET EmojiData.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@ + +$(d)/utf/vendor/mkrunewidth: $(d)/utf/vendor/mkrunewidth.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +GENS += $(d)/utf/vendor/mkrunewidth + +$(d)/utf/runewidth-$(UNICODE).c: $(d)/utf/vendor/mkrunewidth $(d)/utf/vendor/UnicodeData-$(UNICODE).txt $(d)/utf/vendor/EastAsianWidth-$(UNICODE).txt $(d)/utf/vendor/EmojiData-$(UNICODE).txt + @$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@ + +# grapheme boundaries +$(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt: + @echo "GET GraphemeBreakProperty.txt";\ + curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@ + +$(d)/utf/vendor/mkgraphemedata: $(d)/utf/vendor/mkgraphemedata.c $(d)/utf/vendor/common.o $(NEED_OBJS) + $(COMPLINK) + +$(d)/utf/graphemedata-$(UNICODE).c: $(d)/utf/vendor/mkgraphemedata $(d)/utf/vendor/GraphemeBreakProperty-$(UNICODE).txt + $^ > $@ + +GENS += $(d)/utf/vendor/mkgraphemedata diff --git a/src/base/utf/runelen.c b/src/base/utf/runelen.c new file mode 100644 index 0000000..dac7f15 --- /dev/null +++ b/src/base/utf/runelen.c @@ -0,0 +1,8 @@ +#include "internal.h" + +int +utf8·runelen(rune r) +{ + byte s[10]; + return utf8·encode(&r, s); +} diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c new file mode 100644 index 0000000..fcf1177 --- /dev/null +++ b/src/base/utf/vendor/common.c @@ -0,0 +1,220 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// input functions + +int +parse(io·Stream *io, int nfield, char **field, int len, char *line) +{ + int n; + if((n=io·readln(io, len, line)) <= 0) + return ParseEOF; + + if(n == len) + panicf("line too long"); + + if(line[n-1] != '\n') + panicf("invalid line: expected '\n', found '%c'", line[n]); + + line[n-1] = 0; + + if(line[0] == '#' || line[0] == 0) + return ParseSkip; + + /* tokenize line into fields */ + n = 0; + field[n] = line; + while(*line){ + if(*line == ';'){ + *line = 0; + field[++n] = line+1; + } + line++; + } + + if(n != nfield-1) + panicf("expected %d number of fields, got %d: %s", nfield, n, line); + + return ParseOK; +} + +int +codepoint(char *s) +{ + int c, b; + + c = 0; + while((b=*s++)){ + c <<= 4; + if(b >= '0' && b <= '9') + c += b - '0'; + else if(b >= 'A' && b <= 'F') + c += b - 'A' + 10; + else + panicf("bad codepoint char '%c'", b); + } + + return c; +} + +void +codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop) +{ + int e, c; + char *other[NumFields], line[1024]; + + // XXX: the stop variable passes in the previous stopping character + e = *stop; + c = codepoint(field[Fcode]); + + if(c >= NumRunes) + panicf("unexpected large codepoint %x", c); + if(c <= e) + panicf("bad code sequence: %x then %x", e, c); + e = c; + + if(strstr(field[Fname], ", First>") != nil){ + if(!parse(utf8, arrlen(other), other, arrlen(line), line)) + panicf("range start at end of file"); + if(strstr(other[Fname], ", Last>") == nil) + panicf("range start not followed by range end"); + + e = codepoint(other[Fcode]); + + if(e <= c) + panicf("bad code sequence: %x then %x", c, e); + if(strcmp(field[Fcategory], other[Fcategory]) != 0) + panicf("range with mismatched category"); + } + + *start = c; + *stop = e; +} + +// ----------------------------------------------------------------------- +// output functions + +void +putsearch(void) +{ + puts( + "#include \n" + "#include \n" + "\n" + "static\n" + "rune*\n" + "rangesearch(rune c, rune *t, int n, int ne)\n" + "{\n" + " rune *p;\n" + " int m;\n" + " while(n > 1) {\n" + " m = n >> 1;\n" + " p = t + m*ne;\n" + " if(c >= p[0]){\n" + " t = p;\n" + " n = n-m;\n" + " }else\n" + " n = m;\n" + " }\n" + " if(n && c >= t[0])\n" + " return t;\n" + " return 0;\n" + "}\n" + ); + +} + +int +putrange(char *ident, char *prop, int force) +{ + int l, r, start; + + start = 0; + for(l = 0; l < NumRunes;) { + if(!prop[l]){ + l++; + continue; + } + + for(r = l+1; r < NumRunes; r++){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(force || r > l + 1){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r-1); + } + + l = r; + } + + if(start) + printf("};\n\n"); + + return start; +} + +int +putpair(char *ident, char *prop) +{ + int l, r, start; + + start = 0; + for(l=0; l+2 < NumRunes; ){ + if(!prop[l]){ + l++; + continue; + } + + for(r = l + 2; r < NumRunes; r += 2){ + if(!prop[r]) + break; + prop[r] = 0; + } + + if(r != l + 2){ + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[l] = 0; + printf("\t0x%.4x, 0x%.4x,\n", l, r - 2); + } + + l = r; + } + + if(start) + printf("};\n\n"); + return start; +} + +int +putsingle(char *ident, char *prop) +{ + int i, start; + + start = 0; + for(i = 0; i < NumRunes; i++) { + if(!prop[i]) + continue; + + if(!start){ + printf("static rune %s[] = {\n", ident); + start = 1; + } + prop[i] = 0; + printf("\t0x%.4x,\n", i); + } + + if(start) + printf("};\n\n"); + + return start; +} diff --git a/src/base/utf/vendor/common.h b/src/base/utf/vendor/common.h new file mode 100644 index 0000000..51a53bd --- /dev/null +++ b/src/base/utf/vendor/common.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +enum +{ + // Fields inside UnicodeData.txt + Fcode, + Fname, + Fcategory, + Fcombine, + Fbidir, + Fdecomp, + Fdecimal, + Fdigit, + Fnumeric, + Fmirror, + Foldname, + Fcomment, + Fupper, + Flower, + Ftitle, + + NumFields, + NumRunes = 1 << 21, +}; + +/* input functions */ +enum +{ + ParseEOF, + ParseOK, + ParseSkip, +}; + +int parse(io·Stream *io, int nfield, char **field, int len, char *line); +int codepoint(char *s); +void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop); + +/* output functions */ +void putsearch(void); +int putrange(char *ident, char *prop, int force); +int putpair(char *ident, char *prop); +int putsingle(char *ident, char *prop); diff --git a/src/base/utf/vendor/mkgraphemedata.c b/src/base/utf/vendor/mkgraphemedata.c new file mode 100644 index 0000000..ce5a952 --- /dev/null +++ b/src/base/utf/vendor/mkgraphemedata.c @@ -0,0 +1,24 @@ +#include +#include +#include + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkgraphemedata \n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + io·Stream *utf8; + char line[1024]; + + ARGBEGIN{ + }ARGEND; +} diff --git a/src/base/utf/vendor/mkrunetype.c b/src/base/utf/vendor/mkrunetype.c new file mode 100644 index 0000000..b33df32 --- /dev/null +++ b/src/base/utf/vendor/mkrunetype.c @@ -0,0 +1,390 @@ +#include "common.h" + +// ----------------------------------------------------------------------- +// globals + +#define OFFSET (1 << 20) +#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x)) + +// TODO: use bitarrays. will reduce executable size 8x +struct Table +{ + /* properties */ + char isspace[NumRunes]; + char isalpha[NumRunes]; + char ismark[NumRunes]; + char isdigit[NumRunes]; + char isupper[NumRunes]; + char islower[NumRunes]; + char istitle[NumRunes]; + char ispunct[NumRunes]; + char issymbl[NumRunes]; + char iscntrl[NumRunes]; + + char combine[NumRunes]; + + /* transformations */ + int toupper[NumRunes]; + int tolower[NumRunes]; + int totitle[NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +int +isrange(char *label, char *prop, int force) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putrange(ident, prop, force); +} + +static +int +ispair(char *label, char *prop) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putpair(ident, prop); +} + +static +int +issingle(char *label, char *prop) +{ + char ident[128]; + if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident)) + panicf("out of identifier space\n"); + + return putsingle(ident, prop); +} + +static +void +makeis(char *label, char *table, int pairs, int onlyranges) +{ + int hasr, hasp=0, hass=0; + + hasr = isrange(label, table, onlyranges); + if(!onlyranges && pairs) + hasp = ispair(label, table); + if(!onlyranges) + hass = issingle(label, table); + + printf( + "int\n" + "utf8·is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(hasr){ + printf( + " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + label, label); + } + + if(hasp){ + printf( + " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + label, label); + } + + if(hass) + printf( + " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + label, label); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + +static +int +torange(char *label, int *index, int force) +{ + int l, r, d, start = 0; + + for(l = 0; l < NumRunes; ){ + if(index[l] == l){ + l++; + continue; + } + + d = DELTA(index[l], l); + if(d != (rune)d) + panicf("bad map delta %d", d); + + for(r = l+1; r < NumRunes; r++){ + if(DELTA(index[r], r) != d) + break; + index[r] = r; + } + + if(force || r != l + 1){ + if(!start){ + printf("static rune to%s_range[] = {\n", label); + start = 1; + } + index[l] = l; + printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d); + } + l = r; + } + if(start) + printf("};\n\n"); + + return start; +} + +static +int +topair(char *label, int *index) +{ + int l, r, d, start = 0; + + for(l = 0; l + 2 < NumRunes; ){ + if(index[l] == l){ + l++; + continue; + } + + d = DELTA(index[l], l); + if(d != (rune)d) + panicf("bad delta %d", d); + + for(r = l+2; r < NumRunes; r += 2){ + if(DELTA(index[r], r) != d) + break; + index[r] = r; + } + + if(r > l+2){ + if(!start){ + printf("static rune to%s_pair[] = {\n", label); + start = 1; + } + index[l] = l; + printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d); + } + + l = r; + } + if(start) + printf("};\n\n"); + + return start; +} + +static +int +tosingle(char *label, int *index) +{ + int i, d, start = 0; + + for(i=0; i < NumRunes; i++) { + if(index[i] == i) + continue; + + d = DELTA(index[i], i); + if(d != (rune)d) + panicf("bad map delta %d", d); + + if(!start){ + printf("static rune to%s_single[] = {\n", label); + start = 1; + } + index[i] = i; + printf("\t0x%.4x, %d,\n", i, d); + } + if(start) + printf("};\n\n"); + + return start; +} + +static +void +mkto(char *label, int *index, int pairs, int onlyrange) +{ + int hasr, hasp=0, hass=0; + + hasr = torange(label, index, !onlyrange); + if(!onlyrange && pairs) + hasp = topair(label, index); + if(!onlyrange) + hass = tosingle(label, index); + + printf( + "rune\n" + "utf8·to%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(hasr) + printf( + " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return c + p[2] - %d;\n", + label, label, OFFSET); + + if(hasp) + printf( + " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return c + p[2] - %d;\n", + label, label, OFFSET); + + if(hass) + printf( + " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n" + " if(p && c == p[0])\n" + " return c + p[1] - %d;\n", + label, label, OFFSET); + + + printf( + " return c;\n" + "}\n" + "\n" + ); +} + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunetype \n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int i, sc, c, ec; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + ARGBEGIN{ + }ARGEND; + + if(argc != 1) + usage(); + + if(!(utf8 = io·open(argv[0], "r"))) + panicf("can't open %s\n", argv[0]); + + /* by default each character maps to itself */ + for(i = 0; i < NumRunes; i++) { + table.toupper[i] = i; + table.tolower[i] = i; + table.totitle[i] = i; + } + + /* ensure all C local white space characters pass */ + table.isspace['\t'] = 1; + table.isspace['\n'] = 1; + table.isspace['\r'] = 1; + table.isspace['\f'] = 1; + table.isspace['\v'] = 1; + table.isspace[0x85] = 1; + + ec = -1; + // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + /* parse unicode range */ + codepointrange(utf8, field, &sc, &ec); + prop = field[Fcategory]; + + for(c = sc; c <= ec; c++){ + /* grab properties */ + switch(prop[0]){ + case 'L': + table.isalpha[c] = 1; + switch(prop[1]){ + case 'u': table.isupper[c] = 1; break; + case 'l': table.islower[c] = 1; break; + case 't': table.istitle[c] = 1; break; + case 'm': break; // modifier letters + case 'o': break; // ideograph letters + default: + goto badproperty; + } + break; + + case 'Z': + table.isspace[c] = 1; + break; + + case 'M': + table.ismark[c] = 1; + break; + + case 'N': + table.isdigit[c] = 1; + break; + + case 'P': + table.ispunct[c] = 1; + break; + + case 'S': + table.issymbl[c] = 1; + break; + + case 'C': + table.iscntrl[c] = 1; + break; + + default: badproperty: + panicf("unrecognized category '%s'", prop); + } + /* grab transformations */ + if(*field[Fupper]) + table.toupper[c] = codepoint(field[Fupper]); + if(*field[Flower]) + table.tolower[c] = codepoint(field[Flower]); + if(*field[Ftitle]) + table.totitle[c] = codepoint(field[Ftitle]); + } + } + io·close(utf8); + + putsearch(); + + makeis("space", table.isspace, 0, 1); + makeis("digit", table.isdigit, 0, 1); + makeis("alpha", table.isalpha, 0, 0); + makeis("upper", table.isupper, 1, 0); + makeis("lower", table.islower, 1, 0); + makeis("title", table.istitle, 1, 0); + makeis("punct", table.ispunct, 1, 0); + + mkto("upper", table.toupper, 1, 0); + mkto("lower", table.tolower, 1, 0); + mkto("title", table.totitle, 1, 0); + + return 0; +} diff --git a/src/base/utf/vendor/mkrunewidth.c b/src/base/utf/vendor/mkrunewidth.c new file mode 100644 index 0000000..14e6973 --- /dev/null +++ b/src/base/utf/vendor/mkrunewidth.c @@ -0,0 +1,325 @@ +#include "common.h" + +/* + * inspired by design choices in utf8proc/charwidths.jl + * all widths default to 1 unless they fall within the categories: + * 1. Mn 2. Mc 3. Me 4. Zl + * 5. Zp 6. Cc 7. Cf 8. Cs + * these default to zero width + */ +enum +{ + /* width ? */ + WidthNeutral, /* (N) practially treated like narrow but unclear ... */ + WidthAmbiguous, /* (A) sometimes wide and sometimes not... */ + /* width 1 */ + WidthHalf, /* (H) = to narrow (compatability equivalent) */ + WidthNarrow, /* (Na) ASCII width */ + /* width 2 */ + WidthWide, /* (W) 2x width */ + WidthFull, /* (F) = to wide (compatability equivalent) */ +}; + +struct Table +{ + char width[3][NumRunes]; +}; + +static struct Table table; + +// ----------------------------------------------------------------------- +// internal functions + +static +void +parse_category(char *path) +{ + int sc, c, ec, w; + io·Stream *utf8; + char *prop, *field[NumFields], line[1024]; + + if(!(utf8 = io·open(path, "r"))) + panicf("can't open %s\n", path); + + // NOTE: we don't check for comments here + ec = -1; + while(parse(utf8, arrlen(field), field, arrlen(line), line)){ + codepointrange(utf8, field, &sc, &ec); + + prop = field[Fcategory]; + + switch(prop[0]){ + case 'M': + switch(prop[1]){ + case 'n': case 'c': case 'e': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'Z': + switch(prop[1]){ + case 'l': case 'p': + w = 0; + break; + default: + w = 1; + break; + } + break; + case 'C': + switch(prop[1]){ + case 'c': case 'f': case 's': + w = 0; + break; + default: + w = 1; + break; + } + default: + w = 1; + } + + for(c = sc; c <= ec; c++) + table.width[w][c] = 1; + } + + io·close(utf8); +} + +static +void +coderange(char *field, int *l, int *r) +{ + char *s; + + if(!(s = strstr(field, ".."))) + *l=*r=codepoint(field); + else{ + *s++ = 0, *s++ = 0; + *l=codepoint(field); + *r=codepoint(s); + } +} + +static +void +parse_eawidths(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + switch(field[1][0]){ + case 'A': continue; + case 'N': + if(field[1][1] != 'a') + continue; + /* fallthrough */ + case 'H': w = 1; break; + + case 'W': /* fallthrough */ + case 'F': w = 2; break; + + default: + panicf("malformed east asian width class: %s\n", field[1]); + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + /* ensure it only exists in one table */ + table.width[w][c] = 1; + table.width[(w+1)%3][c] = 0; + table.width[(w+2)%3][c] = 0; + } + } + io·close(utf8); +} + +static +void +parse_emoji(char *path) +{ + int at, w; + int l, c, r; + io·Stream *utf8; + char *s, *field[2], line[1024]; + + utf8 = io·open(path, "r"); + while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ + if(at == ParseSkip) + continue; + + /* only override emoji presentation */ + if(!strstr(field[1], "Emoji_Presentation")) + continue; + + /* trim trailing space */ + for(s=field[0]; *s; s++){ + if(*s == ' ') + *s = 0; + } + + coderange(field[0], &l, &r); + + for(c=l; c <= r; c++){ + table.width[0][c] = 0; + table.width[1][c] = 0; + table.width[2][c] = 1; + } + } + + io·close(utf8); +} + +/* output functions */ +static +void +maketable(char *label, char *table, int pairs, int onlyranges) +{ + int r, p=0, s=0; + char ident[3][128]; + + enum + { + Irange, + Ipair, + Isingle, + }; + + /* ranges */ + if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange])) + panicf("out of identifier space\n"); + r = putrange(ident[Irange], table, onlyranges); + + if(!onlyranges && pairs){ + if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair])) + panicf("out of identifier space\n"); + p = putpair(ident[Ipair], table); + } + if(!onlyranges){ + if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle])) + panicf("out of identifier space\n"); + + s = putsingle(ident[Isingle], table); + } + + printf( + "static int\n" + "is%s(rune c)\n" + "{\n" + " rune *p;\n" + "\n", + label); + + if(r){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1])\n" + " return 1;\n", + ident[Irange], ident[Irange]); + } + + if(p){ + printf( + " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" + " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" + " return 1;\n", + ident[Ipair], ident[Ipair]); + } + + if(s) + printf( + " p = rangesearch(c, %s, arrlen(%s), 1);\n" + " if(p && c == p[0])\n" + " return 1;\n", + ident[Isingle], ident[Isingle]); + + printf( + " return 0;\n" + "}\n" + "\n"); +} + +// ----------------------------------------------------------------------- +// main point of entry + +static +void +usage(void) +{ + fprintf(stderr, "usage: mkrunewidth \n"); + exit(1); +} + +#define SETW0(c) \ + table.width[0][(c)] = 1, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 0; + +#define SETW1(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 1, \ + table.width[2][(c)] = 0; + +#define SETW2(c) \ + table.width[0][(c)] = 0, \ + table.width[1][(c)] = 0, \ + table.width[2][(c)] = 1; + + +int +main(int argc, char *argv[]) +{ + int c; + + ARGBEGIN{ + }ARGEND; + + if(argc != 3) + usage(); + + parse_category(*argv++); + parse_eawidths(*argv++); + parse_emoji(*argv); + + /* overrides */ + SETW0(0x2028); + SETW0(0x2029); + + SETW1(0x00AD); + + /* simple checking */ + for(c=0; c 1) + panicf("improper table state"); + } + + putsearch(); + + maketable("width0", table.width[0], 1, 0); + maketable("width1", table.width[1], 1, 0); + maketable("width2", table.width[2], 1, 0); + + puts( + "\n" + "int\n" + "utf8·runewidth(rune c)\n" + "{\n" + " if(iswidth1(c))\n" + " return 1;\n" + " if(iswidth2(c))\n" + " return 2;\n" + " return 0;\n" + "}" + ); +} diff --git a/src/cmd/core/basename.c b/src/cmd/core/basename.c index ba9d4c9..263592f 100644 --- a/src/cmd/core/basename.c +++ b/src/cmd/core/basename.c @@ -1,6 +1,5 @@ #include #include -#include static void usage(void) diff --git a/src/cmd/core/cat.c b/src/cmd/core/cat.c index 4f0929c..e9b770b 100644 --- a/src/cmd/core/cat.c +++ b/src/cmd/core/cat.c @@ -1,6 +1,5 @@ #include #include -#include static void usage(void) diff --git a/src/cmd/dwm/dwm.h b/src/cmd/dwm/dwm.h index afec1f2..269f373 100644 --- a/src/cmd/dwm/dwm.h +++ b/src/cmd/dwm/dwm.h @@ -2,7 +2,6 @@ #pragma once #include #include -#include #include #include diff --git a/src/cmd/menu/menu.h b/src/cmd/menu/menu.h index f4345bb..de01607 100644 --- a/src/cmd/menu/menu.h +++ b/src/cmd/menu/menu.h @@ -1,7 +1,6 @@ /* See LICENSE file for copyright and license details. */ #include #include -#include #include #include diff --git a/src/cmd/rc/rc.h b/src/cmd/rc/rc.h index f52e4e2..76a1b3d 100644 --- a/src/cmd/rc/rc.h +++ b/src/cmd/rc/rc.h @@ -2,7 +2,7 @@ #include #include -#include +#include // ----------------------------------------------------------------------- // types diff --git a/src/cmd/term/term.h b/src/cmd/term/term.h index 6784974..c370239 100644 --- a/src/cmd/term/term.h +++ b/src/cmd/term/term.h @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/src/libbio/rules.mk b/src/libbio/rules.mk index f4e75f4..032670c 100644 --- a/src/libbio/rules.mk +++ b/src/libbio/rules.mk @@ -6,7 +6,6 @@ SRCS_$(d) := \ $(d)/newick.c \ $(d)/phylo.c LIBS_$(d) := $(d)/libbio.a -BINS_$(d) := # CHECK_$(d) := \ # $(d)/test.c \ # $(d)/simulate.c @@ -17,8 +16,8 @@ include share/paths.mk $(LIBS_$(d)): $(OBJS_$(d)) $(OBJS_$(d)/io) $(ARCHIVE) -$(TEST_$(d)): TLIBS = $(LIBS_$(d)) $(OBJ_DIR)/libn/libn.a -$(TEST_$(d)): $(UNIT_$(d)) $(LIBS_$(d)) $(OBJ_DIR)/libn/libn.a +$(TEST_$(d)): TLIBS = $(LIBS_$(d)) $(OBJ_DIR)/base/base.a +$(TEST_$(d)): $(UNIT_$(d)) $(LIBS_$(d)) $(OBJ_DIR)/base/base.a $(LINK) include share/pop.mk diff --git a/src/libc/rules.mk b/src/libc/rules.mk deleted file mode 100644 index 34e0912..0000000 --- a/src/libc/rules.mk +++ /dev/null @@ -1,20 +0,0 @@ -include share/push.mk - -# Iterate through subdirectory tree - -# Local sources -SRCS_$(d) := $(wildcard $(d)/*.c) -LIBS_$(d) := $(d)/libc_n.a -BINS_$(d) := - -include share/paths.mk - -# Local rules -$(LIBS_$(d)): TCFLAGS = -ffreestanding -fno-builtin -nostdlib -$(LIBS_$(d)): $(OBJS_$(d)) - $(ARCHIVE) - -$(BINS_$(d)): $(OBJ_DIR)/libn/test.o - $(LINK) - -include share/pop.mk diff --git a/src/libc/stdio.c b/src/libc/stdio.c deleted file mode 100644 index 8bbbe9a..0000000 --- a/src/libc/stdio.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include - -int -printf(byte* fmt, ...) -{ - va_list args; - va_start(args, fmt); - - int nw, rem, peek, len; - byte *str, c; - - while (*fmt) { - rem = INT_MAX - nw; - - if (fmt[0] != '%' || fmt[1] == '%') { - if (fmt[0] == '%') fmt++; - - for (peek = 1; fmt[peek] && fmt[peek] != '%'; peek++) { - ; - } - if (rem < peek) return -1; - // TODO: Print here. - fmt += peek; - nw += peek; - continue; - } - - str = fmt++; - - switch (*fmt++) { - case 'c': - c = va_arg(args, int); - if (rem < 0) return -1; - // TODO: Print here - nw++; - break; - - case 's': - str = va_arg(args, byte*); - len = strlen(str); - if (rem < len) return -1; - // TODO: Print here - nw += len; - break; - default: - fmt = str; - len = strlen(fmt); - if (rem < len) return -1; - // TODO: Print here - nw += len; - fmt += len; - break; - } - } - - va_end(args); - return nw; -} diff --git a/src/libc/string.c b/src/libc/string.c deleted file mode 100644 index 0e41efa..0000000 --- a/src/libc/string.c +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include - -void* -memcopy(void *dst, void *src, intptr n) -{ - byte *e, *s, *d; - - d = dst; - e = d + n; - for (s = src ; d != e; ++s, ++d) { - *d = *s; - } - - return dst; -} - -void* -memmove(void *dst, void *src, intptr n) -{ - byte *e, *s, *d; - s = src; - d = dst; - - if (d < s) { - e = d + n; - for (; d != e; ++s, ++d) - *d = *s; - - } else { - e = d; - d += n; - s += n; - for (; d != e; --s, --d) - d[-1] = s[-1]; - } - - return dst; -} - -void* -memset(void *buf, int val, intptr n) -{ - byte *b, *e; - b = buf; - e = b + n; - for (; b != e; b++) { - *b = (byte)val; - } - - return buf; -} - -int -memcmp(void *lhs, void *rhs, intptr n) -{ - byte *bl, *br, *e; - - br = rhs; - e = br + n; - for (bl = lhs; br != e; ++bl, ++br) { - if (*bl < *br) - return -1; - else if (*bl > *br) - return 1; - } - - return 0; -} - -int -strlen(byte* s) -{ - byte* b; - for (b = s; *b; b++) { - ; - } - - return b - s; -} diff --git a/src/libfmt/buffer.c b/src/libfmt/buffer.c deleted file mode 100644 index 0099e72..0000000 --- a/src/libfmt/buffer.c +++ /dev/null @@ -1,60 +0,0 @@ -#include "internal.h" - -static int -flush(fmt·State *io) -{ - int n; - char *s; - - void *heap = io->heap; - mem·Reallocator mem = io->mem; - - if(!io->buffer.beg) - return 0; - - n = 2*(uintptr)io->file; - s = io->buffer.beg; - - io->buffer.beg = mem.realloc(heap, io->buffer.beg, n, 1); - if(!io->buffer.beg){ - io->file = io->buffer.cur = io->buffer.end = nil; - mem.free(heap, s); - return 0; - } - io->file = (void*)(uintptr)n; - io->buffer.cur = io->buffer.beg + (io->buffer.cur - s); - io->buffer.end = io->buffer.beg + n - 1; - - return 1; -} - -int -fmt·make(mem·Reallocator mem, void *heap, fmt·State *io) -{ - int n; - - memset(io, 0, sizeof(*io)); - - n = 32; - io->buffer.beg = io->buffer.cur = mem.alloc(heap, n, 1); - if(!io->buffer.beg) - return -1; - io->buffer.end = io->buffer.beg + n - 1; - - io->flush = flush; - io->file = (void*)(uintptr)n; - io->n = 0; - - fmt·setlocale(io, nil, nil, nil); - return 0; -} - -void -fmt·free(fmt·State *io) -{ - void *heap = io->heap; - mem·Reallocator mem = io->mem; - - mem.free(heap, io->buffer.beg); - io->buffer.beg = io->buffer.cur = io->buffer.end = nil; -} diff --git a/src/libfmt/do.c b/src/libfmt/do.c deleted file mode 100644 index bd2e65c..0000000 --- a/src/libfmt/do.c +++ /dev/null @@ -1,728 +0,0 @@ -#include "internal.h" -#include - -#define MaxFmt 128 -#define atomic·load(p) (*(p)) - -// ----------------------------------------------------------------------- -// globals - -/* built in verbs */ -static int fmtflag(fmt·State *); -static int fmtpercent(fmt·State *); -static int fmtrune(fmt·State *); -static int fmtfloat(fmt·State *); -static int fmtutf8(fmt·State *); -static int fmtint(fmt·State *); -static int fmtchar(fmt·State *); -static int fmtcount(fmt·State *); -static int fmtstring(fmt·State *); -static int fmterror(fmt·State *); - -static int badfmt(fmt·State *); - -static struct -{ - volatile int len; - Verb verb[MaxFmt]; -} formatter = -{ - 30, - { - {' ', fmtflag}, - {'#', fmtflag}, - {'%', fmtpercent}, - {'\'',fmtflag}, - {'+', fmtflag}, - {',', fmtflag}, - {'-', fmtflag}, - {'C', fmtrune}, - {'E', fmtfloat}, - {'F', fmtfloat}, - {'G', fmtfloat}, - {'L', fmtflag}, - {'S', fmtutf8}, - {'X', fmtint}, - {'b', fmtint}, - {'c', fmtchar}, - {'d', fmtint}, - {'e', fmtfloat}, - {'f', fmtfloat}, - {'g', fmtfloat}, - {'h', fmtflag}, - {'i', fmtint}, - {'l', fmtflag}, - {'n', fmtcount}, - {'o', fmtint}, - {'p', fmtint}, - {'r', fmterror}, - {'s', fmtstring}, - {'U', fmtflag}, - {'u', fmtint}, - {'x', fmtint}, - } -}; - -// ----------------------------------------------------------------------- -// internal functions - -static Formatter -format(int c) -{ - Verb *v, *e; - e = &formatter.verb[atomic·load(&formatter.len)]; - for(v=e; v > formatter.verb; --v){ - if(v->c == c) - return v->fmt; - } - - return badfmt; -} - -static char * -dispatch(fmt·State *io, char *fmt) -{ - rune r; - int i, n; - - io->flag = 0; - io->width = io->prec = 0; - - /* - * the form of each print verb: - * % [flags] verb - * + the verb is a single character - * + each flag is either - * - a single character - * - a decimal numeric string - * - up to 2 decimal strings can be used - * - [width|*].[prec|*] - * - if missing, set to 0 - * - if *, grab from varargs - */ - for(;;){ - fmt += utf8·decode(fmt, &r); - io->verb = r; - switch(r){ - case 0: - return nil; - case '.': - io->flag |= fmt·Width|fmt·Prec; - continue; - case '0': - if(!(io->flag & fmt·Width)){ - io->flag |= fmt·Zero; - continue; - } - /* fallthrough */ - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - i = 0; - while('0' <= r && r <= '9'){ - i = 10*i + (r-'0'); - r = *fmt++; - } - fmt--; - number: - if(io->flag & fmt·Width){ - io->flag |= fmt·Prec; - io->prec = i; - }else{ - io->flag |= fmt·Width; - io->width = i; - } - continue; - case '*': - i = va_arg(io->args, int); - if(i < 0){ - if(io->flag&fmt·Prec){ - io->flag &= ~fmt·Prec; - io->prec = 0; - continue; - } - i = -i; - io->flag |= fmt·Left; - } - goto number; - } - n = format(r)(io); - if(n < 0) - return nil; - if(!n) - return fmt; - } -} - -static char * -flush(fmt·State *io, char *b, int len) -{ - io->n += b - io->buffer.cur; - io->buffer.cur = b; - if(!io->flush || !(*io->flush)(io) || io->buffer.cur + len >= io->buffer.end) { - io->buffer.end = io->buffer.cur; - return nil; - } - return io->buffer.cur; -} - -static int -pad(fmt·State *io, int n) -{ - int i; - char *b=io->buffer.cur, *e=io->buffer.end; - - for(i=0; i=e){ - if(!(b=flush(io, b, 1))) - return -1; - e = io->buffer.end; - } - *b++ = ' '; - } - - io->n += b - io->buffer.cur; - io->buffer.cur = b; - return 0; -} - -static int -copy(fmt·State *io, char *m, int sz, int n) -{ - ulong f; - rune r; - int nc, w, nb; - char *b, *e, *me; - - w = 0; - f = io->flag; - me = m + sz; - - if(f&fmt·Width) - w = io->width; - if(f&fmt·Prec && n > io->prec) - n = io->prec; - if(!(f&fmt·Left) && pad(io, w-n)<0) - return -1; - - b = io->buffer.cur; - e = io->buffer.end; - - for(nc=n; nc>0; nc--){ - r = *(uchar *)m; - if(utf8·onebyte(r)){ - nb=1; - m++; - }else if((me-m) >= UTFmax || utf8·canfit(m, me-m)){ - nb=utf8·decode(m, &r); - m+=n; - }else - break; - - if(b+n>e){ - if(!(b=flush(io, b, nb))) - return -1; - e = io->buffer.end; - } - b += utf8·encode(&r, b); - } - - io->n += b - io->buffer.cur; - io->buffer.cur = b; - if(f&fmt·Left && pad(io, w-n)<0) - return -1; - - return 0; -} - -static int -copyrune(fmt·State *io, rune *m, int n) -{ - ulong f; - rune r, *me; - int w, nb; - char *b, *e; - - w = 0; - f = io->flag; - - if(f&fmt·Width) - w = io->width; - if(f&fmt·Prec && n > io->prec) - n = io->prec; - - if(!(f&fmt·Left) && pad(io, w-n)<0) - return -1; - - b = io->buffer.cur; - e = io->buffer.end; - - for(me=m+n; m < me; m++){ - r = *m; - nb = utf8·runelen(r); - if(b + nb > e){ - if(!(b=flush(io, b, nb))) - return -1; - e = io->buffer.end; - } - b += utf8·encode(&r, b); - } - - io->n += b - io->buffer.cur; - io->buffer.cur = b; - if(f&fmt·Left && pad(io, w-n)<0) - return -1; - - return 0; -} - -static int -copystring(fmt·State *io, char *s) -{ - rune r; - int i,j; - - if(!s) - return copy(io, "", 5, 5); - - if(io->flag&fmt·Prec){ - i = 0; - for(j=0; j < io->prec && s[i]; j++) - i += utf8·decode(s+i, &r); - - return copy(io, s, i, j); - } - return copy(io, s, strlen(s), utf8·len(s)); -} - -static int -copyutf8(fmt·State *io, rune *s) -{ - rune *e; - int n,p; - - if(!s) - return copy(io, "", 5, 5); - - if(io->flag & fmt·Prec){ - p = io->prec; - for(n=0; n group){ - if((*groups)[1] != 0) - (*groups)++; - *digits = 1; - return 1; - } - return 0; -} - -// ----------------------------------------------------------------------- -// formatters - -static int -fmtchar(fmt·State *io) -{ - char x[1]; - x[0] = va_arg(io->args, int); - io->prec = 1; - - return copy(io, x, 1, 1); -} - -static int -fmtstring(fmt·State *io) -{ - char *s; - s = va_arg(io->args, char *); - return copystring(io, s); -} - -static int -fmterror(fmt·State *io) -{ - char *s; - s = strerror(errno); - return copystring(io, s); -} - -static int -fmtrune(fmt·State *io) -{ - rune x[1]; - - x[0] = va_arg(io->args, int); - return copyrune(io, x, 1); -} - -static int -fmtutf8(fmt·State *io) -{ - rune *s; - - s = va_arg(io->args, rune *); - return copyutf8(io, s); -} - -static int -fmtpercent(fmt·State *io) -{ - rune x[1]; - - x[0] = io->verb; - io->prec = 1; - return copyrune(io, x, 1); -} - -static int -fmtint(fmt·State *io) -{ - union{ - ulong u; - uvlong v; - } val; - int neg, base, i, n, f, w, isv; - int digits, bytes, runes, excess; - char *groups, *thousands; - char *p, *conv, buf[140]; - - f = io->flag; - neg = 0; - isv = 0; - val.u = 0; - - switch(io->verb){ - case 'o': case 'p': case 'u': case 'x': case 'X': - f |= fmt·Unsigned; - f &= ~(fmt·Sign|fmt·Space); - } - - /* set flags */ - if(io->verb=='p'){ - val.u = (ulong)va_arg(io->args, void*); - io->verb = 'x'; - f |= fmt·Unsigned; - }else if(f&fmt·Vlong){ - isv=1; - if(f&fmt·Unsigned) - val.v = va_arg(io->args, uvlong); - else - val.v = va_arg(io->args, vlong); - }else if(f&fmt·Long){ - if(f&fmt·Unsigned) - val.u = va_arg(io->args, ulong); - else - val.u = va_arg(io->args, long); - }else if(f&fmt·Byte){ - if(f&fmt·Unsigned) - val.u = (uchar)va_arg(io->args, int); - else - val.u = (char)va_arg(io->args, int); - }else if(f&fmt·Short){ - if(f&fmt·Unsigned) - val.u = (ushort)va_arg(io->args, int); - else - val.u = (short)va_arg(io->args, int); - }else{ - if(f&fmt·Unsigned) - val.u = va_arg(io->args, uint); - else - val.u = va_arg(io->args, int); - } - - conv = "0123456789abcdef"; - groups = "\4"; - thousands = io->thousands; - /* get base */ - switch(io->verb){ - case 'd': case 'i': case 'u': - base = 10; - groups = io->groups; - break; - case 'X': - conv = "0123456789ABCDEF"; - /*fallthrough*/ - case 'x': - base = 16; - thousands = ":"; - break; - case 'b': - base = 2; - thousands = ":"; - break; - case 'o': - base = 8; - break; - default: - return -1; - } - - /* check for negativity */ - if(!(f&fmt·Unsigned)){ - if(isv && (vlong)val.v < 0){ - val.v = -(vlong)val.v; - neg = 1; - }else if(!isv && (long)val.u < 0){ - val.u = -(long)val.u; - neg = 1; - } - } - - p = buf + sizeof(buf) - 1; - n = 0; - digits = 0; - excess = 0; - runes = utf8·len(thousands); - bytes = strlen(thousands); - -#define PARSE(VALUE) \ - while((VALUE)){ \ - i = (VALUE) % base; \ - (VALUE) /= base; \ - if((f&fmt·Comma) && n%4 == 3){ \ - *p-- = ','; \ - n++; \ - } \ - if((f&fmt·Apost) && needseperate(&digits, &groups)){ \ - n += runes; \ - excess += bytes - runes; \ - p -= bytes; \ - memmove(p+1, thousands, bytes); \ - } \ - *p-- = conv[i]; \ - n++; \ - } - if(isv) - PARSE(val.v) - else - PARSE(val.u) -#undef PARSE - - if(!n){ - if(!(f&fmt·Prec) || io->prec != 0 || (io->verb == 'o' && (f&fmt·Sharp))){ - *p-- = '0'; - n = 1; - if(f&fmt·Apost) - needseperate(&digits,&groups); - } - - if(io->verb == 'x' || io->verb == 'X') - f &= ~fmt·Sharp; - } - - for(w = io->prec; n < w && p > buf+3; n++){ - if((f&fmt·Apost) && needseperate(&digits, &groups)){ - n += runes; - excess += bytes - runes; - p -= bytes; - memmove(p+1, thousands, bytes); - } - *p-- = '0'; - } - - if(neg || (f&(fmt·Sign|fmt·Space))) - n++; - - if(f&fmt·Sharp){ - if(base==16) - n += 2; - else if(base == 8){ - if(p[1] == '0') - f &= ~fmt·Sharp; - else - n++; - } - } - - if(f&fmt·Zero && !(f & (fmt·Left|fmt·Prec))){ - w = 0; - if(f & fmt·Width) - w = io->width; - for(; n < w && p > buf+3; n++){ - if((f & fmt·Apost) && needseperate(&digits, &groups)){ - n += runes; - excess += bytes - runes; - p -= bytes; - memmove(p+1, thousands, bytes); - } - *p-- = '0'; - } - io->flag &= ~fmt·Width; - } - - if(f&fmt·Sharp){ - if(base==16) - *p-- = io->verb; - if(base==16 || base == 8) - *p-- = '0'; - } - - if(neg) - *p-- = '-'; - else if(f & fmt·Sign) - *p-- = '+'; - else if (f & fmt·Space) - *p-- = ' '; - - io->flag &= ~fmt·Prec; - return copy(io, p+1, n+excess, n); -} - -static int -fmtcount(fmt·State *io) -{ - void *p; - ulong f; - - f = io->flag; - p = va_arg(io->args, void*); - - if(f&fmt·Vlong) - *(vlong*)p = io->n; - else if(f&fmt·Long) - *(long*)p = io->n; - else if(f&fmt·Byte) - *(char*)p = io->n; - else if(f&fmt·Short) - *(short*)p = io->n; - else - *(int*)p = io->n; - - return 0; -} - -static int -fmtflag(fmt·State *io) -{ - switch(io->verb){ - case ',': io->flag |= fmt·Comma; break; - case '-': io->flag |= fmt·Left; break; - case '+': io->flag |= fmt·Sign; break; - case '#': io->flag |= fmt·Sharp; break; - case '\'': io->flag |= fmt·Apost; break; - case ' ': io->flag |= fmt·Space; break; - case 'u': io->flag |= fmt·Unsigned; break; - case 'L': io->flag |= fmt·Ldouble; break; - case 'h': - if(io->flag&fmt·Short) - io->flag |= fmt·Byte; - io->flag |= fmt·Short; - break; - case 'l': - if(io->flag&fmt·Long) - io->flag |= fmt·Vlong; - io->flag |= fmt·Long; - break; - } - return 1; -} - -static int -badfmt(fmt·State *io) -{ - int n; - char x[UTFmax+2]; - - x[0] = '%'; - n = 1 + utf8·encode(&io->verb, x+1); - x[n++] = '%'; - io->prec = n; - copy(io, x, n, n); - - return 0; -} - -#include "float.c" - -// ----------------------------------------------------------------------- -// exports - -int -fmt·do(fmt·State *io, char *fmt) -{ - rune r; - int c, n; - char *b, *e; - - for(;;){ - b = io->buffer.cur; - e = io->buffer.end; - while((c = *(uchar *)fmt) && c != '%'){ - if(utf8·onebyte(c)){ - if(b >= e){ - if(!(b=flush(io, b, 1))) - return -1; - e = io->buffer.end; - } - *b++ = *fmt++; - }else{ - n = utf8·decode(fmt, &r); - if(b + n > e){ - if(!(b=flush(io, b, n))) - return -1; - e = io->buffer.end; - } - while(n--) - *b++ = *fmt++; - } - } - fmt++; - io->n += b - io->buffer.cur; - io->buffer.cur = b; - if(!c) /* we hit our nul terminator */ - return io->n - n; - io->buffer.end = e; - - if(!(fmt=dispatch(io, fmt))) - return -1; - } -} - -int -fmt·install(int verb, Formatter func) -{ - Verb *v; - int i, ret; - -lock: - if(verb <= 0 || verb >= 65536){ - ret = -1; - goto unlock; - } - if(!func) - func = badfmt; - - if((i = atomic·load(&formatter.len))==MaxFmt) - return -1; - - v = &formatter.verb[i]; - v->c = verb; - v->fmt = func; - - atomic·store(&formatter.len, i+1); - ret = 0; -unlock: - return ret; -} diff --git a/src/libfmt/esprint.c b/src/libfmt/esprint.c deleted file mode 100644 index 6d97340..0000000 --- a/src/libfmt/esprint.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "internal.h" - -char * -fmt·esprint(char *buf, char *end, char *fmt, ...) -{ - char *p; - va_list args; - - va_start(args, fmt); - p = fmt·vesprint(buf, end, fmt, args); - va_end(args); - - return p; -} diff --git a/src/libfmt/float.c b/src/libfmt/float.c deleted file mode 100644 index 63ea80f..0000000 --- a/src/libfmt/float.c +++ /dev/null @@ -1,1077 +0,0 @@ -#define FDIGIT 30 -#define FDEFLT 6 -#define NSIGNIF 17 - -static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; -static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; -static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; - -static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; - -static int -isNaN(double val) -{ - union{ - uvlong i; - double f; - }x; - - x.f = val; - return (x.i&uvinf) == uvinf && (x.i&~uvneginf) != 0; -} - -static double -NaN(void) -{ - union{ - uvlong i; - double f; - }x; - x.i = uvnan; - return x.f; -} - -static int -isInf(double val, int sign) -{ - union{ - uvlong i; - double f; - }x; - - x.f = val; - if(sign == 0) - return x.i == uvinf || x.i == uvneginf; - else if(sign == 1) - return x.i == uvinf; - else - return x.i == uvneginf; -} - -static double pows10[] = -{ - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, - 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, - 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, - 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, - 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, - 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, - 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, - 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, - 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, - 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, - 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, - 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, - 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, - 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, - 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, - 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, -}; - -static double -fpow10(int n) -{ - double d; - int neg; - - neg = 0; - if(n < 0){ - neg = 1; - n = -n; - } - - if(n NSIGNIF) - return 0; - - for(b = a+n-1; b >= a; b--){ - c = *b + 1; - if(c <= '9'){ - *b = c; - return 0; - } - *b = '0'; - } - /* - * need to overflow adding digit. - * shift number down and insert 1 at beginning. - * decimal is known to be 0s or we wouldn't - * have gotten this far. (e.g., 99999+1 => 00000) - */ - a[0] = '1'; - return 1; -} - -static int -sub1(char *a, int n) -{ - int c; - char *b; - - if(n < 0 || n > NSIGNIF) - return 0; - for(b = a+n-1; b >= a; b--){ - c = *b - 1; - if(c >= '0'){ - if(c == '0' && b == a){ - /* - * just zeroed the top digit; shift everyone up. - * decimal is known to be 9s or we wouldn't - * have gotten this far. (e.g., 10000-1 => 09999) - */ - *b = '9'; - return 1; - } - *b = c; - return 0; - } - *b = '9'; - } - /* - * can't get here. the number a is always normalized - * so that it has a nonzero first digit. - */ - abort(); -} - -// ----------------------------------------------------------------------- -// strtod - -#define Nbits 28 -#define Nmant 53 -#define Prec ((Nmant+Nbits+1)/Nbits) - -#define Sigbit (1<<(Prec*Nbits-Nmant)) /* first significant bit of Prec-th word */ -#define Ndig 1500 -#define One (ulong)(1<>1) -#define Maxe 310 - -#define Fsign (1<<0) /* found - */ -#define Fesign (1<<1) /* found e- */ -#define Fdpoint (1<<2) /* found . */ - -#define S0 0 /* _ _S0 +S1 #S2 .S3 */ -#define S1 1 /* _+ #S2 .S3 */ -#define S2 2 /* _+# #S2 .S4 eS5 */ -#define S3 3 /* _+. #S4 */ -#define S4 4 /* _+#.# #S4 eS5 */ -#define S5 5 /* _+#.#e +S6 #S7 */ -#define S6 6 /* _+#.#e+ #S7 */ -#define S7 7 /* _+#.#e+# #S7 */ - -typedef struct Tab Tab; -struct Tab -{ - int bp; - int siz; - char *cmp; -}; - -static ulong -umuldiv(ulong a, ulong b, ulong c) -{ - double d; - - d = ((double)a * (double)b) / (double)c; - if(d >= 4294967295.) - d = 4294967295.; - return (ulong)d; -} - -static void -frnorm(ulong *f) -{ - int i, c; - - c = 0; - for(i=Prec-1; i>0; i--) { - f[i] += c; - c = f[i] >> Nbits; - f[i] &= One-1; - } - f[0] += c; -} - -static int -fpcmp(char *a, ulong* f) -{ - ulong tf[Prec]; - int i, d, c; - - for(i=0; i> Nbits) + '0'; - tf[0] &= One-1; - - /* compare next digit */ - c = *a; - if(c == 0) { - if('0' < d) - return -1; - if(tf[0] != 0) - goto cont; - for(i=1; i d) - return +1; - if(c < d) - return -1; - a++; - cont:; -} -} - -static void -divby(char *a, int *na, int b) -{ - int n, c; - char *p; - - p = a; - n = 0; - while(n>>b == 0){ - c = *a++; - if(c == 0) { - while(n) { - c = n*10; - if(c>>b) - break; - n = c; - } - goto xx; - } - n = n*10 + c-'0'; - (*na)--; - } - for(;;){ - c = n>>b; - n -= c<>b; - n -= c<= (int)(arrlen(tab1))) - d = (int)(arrlen(tab1))-1; - t = tab1 + d; - b = t->bp; - if(memcmp(a, t->cmp, t->siz) > 0) - d--; - *dp -= d; - *bp += b; - divby(a, na, b); -} - -static void -mulby(char *a, char *p, char *q, int b) -{ - int n, c; - - n = 0; - *p = 0; - for(;;) { - q--; - if(q < a) - break; - c = *q - '0'; - c = (c<= (int)(arrlen(tab2))) - d = (int)(arrlen(tab2))-1; - t = tab2 + d; - b = t->bp; - if(memcmp(a, t->cmp, t->siz) < 0) - d--; - p = a + *na; - *bp -= b; - *dp += d; - *na += d; - mulby(a, p+d, p, b); -} - -static int -cmp(char *a, char *b) -{ - int c1, c2; - - while((c1 = *b++) != '\0') { - c2 = *a++; - if(isupper(c2)) - c2 = tolower(c2); - if(c1 != c2) - return 1; - } - return 0; -} - -double -fmtstrtod(char *as, char **aas) -{ - int na, ex, dp, bp, c, i, flag, state; - ulong low[Prec], hig[Prec], mid[Prec]; - double d; - char *s, a[Ndig]; - - flag = 0; /* Fsign, Fesign, Fdpoint */ - na = 0; /* number of digits of a[] */ - dp = 0; /* na of decimal point */ - ex = 0; /* exonent */ - - state = S0; - for(s=as;;s++){ - c = *s; - if('0' <= c && c <= '9'){ - switch(state){ - case S0: case S1: case S2: - state = S2; - break; - case S3: case S4: - state = S4; - break; - case S5: case S6: case S7: - state = S7; - ex = ex*10 + (c-'0'); - continue; - } - - if(na == 0 && c == '0'){ - dp--; - continue; - } - if(na < Ndig-50) - a[na++] = c; - continue; - } - switch(c){ - case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': - if(state == S0) - continue; - break; - case '-': - if(state == S0) - flag |= Fsign; - else - flag |= Fesign; - case '+': - if(state == S0) - state = S1; - else - if(state == S5) - state = S6; - else - break; /* syntax */ - continue; - case '.': - flag |= Fdpoint; - dp = na; - if(state == S0 || state == S1){ - state = S3; - continue; - } - if(state == S2){ - state = S4; - continue; - } - break; - case 'e': case 'E': - if(state == S2 || state == S4){ - state = S5; - continue; - } - break; - } - break; - } - - /* clean up return char-pointer */ - switch(state) { - case S0: - if(cmp(s, "nan") == 0){ - if(aas != nil) - *aas = s+3; - goto retnan; - } - case S1: - if(cmp(s, "infinity") == 0){ - if(aas != nil) - *aas = s+8; - goto retinf; - } - if(cmp(s, "inf") == 0){ - if(aas != nil) - *aas = s+3; - goto retinf; - } - case S3: - if(aas != nil) - *aas = as; - goto ret0; /* no digits found */ - case S6: - s--; /* back over +- */ - case S5: - s--; /* back over e */ - break; - } - if(aas != nil) - *aas = s; - - if(flag & Fdpoint) - while(na > 0 && a[na-1] == '0') - na--; - if(na == 0) - goto ret0; /* zero */ - a[na] = 0; - if(!(flag & Fdpoint)) - dp = na; - if(flag & Fesign) - ex = -ex; - dp += ex; - if(dp < -Maxe){ - errno = ERANGE; - goto ret0; /* underflow by exp */ - } else - if(dp > +Maxe) - goto retinf; /* overflow by exp */ - - /* - * normalize the decimal ascii number - * to range .[5-9][0-9]* e0 - */ - bp = 0; /* binary exponent */ - while(dp > 0) - divascii(a, &na, &dp, &bp); - while(dp < 0 || a[0] < '5') - mulascii(a, &na, &dp, &bp); - - /* close approx by naive conversion */ - mid[0] = 0; - mid[1] = 1; - for(i=0; (c=a[i]) != '\0'; i++) { - mid[0] = mid[0]*10 + (c-'0'); - mid[1] = mid[1]*10; - if(i >= 8) - break; - } - low[0] = umuldiv(mid[0], One, mid[1]); - hig[0] = umuldiv(mid[0]+1, One, mid[1]); - for(i=1; i>= 1; - } - frnorm(mid); - - /* compare */ - c = fpcmp(a, mid); - if(c > 0) { - c = 1; - for(i=0; i= Sigbit/2) { - mid[Prec-1] += Sigbit; - frnorm(mid); - } - goto out; - -ret0: - return 0; - -retnan: - return NaN(); - -retinf: - /* Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ - errno = ERANGE; - if(flag & Fsign) - return -HUGE_VAL; - return HUGE_VAL; - -out: - d = 0; - for(i=0; i 0) - *p++ = se[--i]; - - *p++ = '\0'; -} - -/* - * compute decimal integer m, exp such that: - * f = m*10^exp - * m is as short as possible with losing exactness - * assumes special cases (NaN, +Inf, -Inf) have been handled. - */ -static void -dtoa(double f, char *s, int *exp, int *neg, int *len) -{ - int c, d, e2, e, ee, i, ndigit, oerrno; - char buf[NSIGNIF+10]; - double g; - - oerrno = errno; - - *neg = 0; - if(f < 0){ - f = -f; - *neg = 1; - } - - if(f == 0){ - *exp = 0; - s[0] = '0'; - s[1] = 0; - *len = 1; - return; - } - - frexp(f, &e2); - e = (int)(e2 * .301029995664); - g = f * fpow10(-e); - while(g < 1) { - e--; - g = f * fpow10(-e); - } - while(g >= 10){ - e++; - g = f * fpow10(-e); - } - - /* convert nsignif digits as a first approximation */ - for(i=0; i g) { - if(add1(s, NSIGNIF)){ - /* gained a digit */ - e--; - fmtexp(s+NSIGNIF, e, 0); - } - continue; - } - if(f < g){ - if(sub1(s, NSIGNIF)){ - /* lost a digit */ - e++; - fmtexp(s+NSIGNIF, e, 0); - } - continue; - } - break; - } - - /* - * bump last few digits down to 0 as we can. - */ - for(i=NSIGNIF-1; i>=NSIGNIF-3; i--){ - c = s[i]; - if(c != '0'){ - s[i] = '0'; - g=fmtstrtod(s, nil); - if(g != f){ - s[i] = c; - break; - } - } - } - - /* - * remove trailing zeros. - */ - ndigit = NSIGNIF; - while(ndigit > 1 && s[ndigit-1] == '0'){ - e++; - --ndigit; - } - s[ndigit] = 0; - *exp = e; - *len = ndigit; - - errno = oerrno; -} - - -static int -fmtfloat(fmt·State *io) -{ - char buf[NSIGNIF+10], *dot, *digits, *p, *end, suf[10], *cur; - double val; - int c, verb, ndot, e, exp, f, ndigits, neg, newndigits; - int npad, pt, prec, realverb, sign, nsuf, ucase, n, z1, z2; - - if(io->flag&fmt·Long) - val = va_arg(io->args, long double); - else - val = va_arg(io->args, double); - - /* extract formatting flags */ - f = io->flag; - io->flag = 0; - prec = FDEFLT; - if(f & fmt·Prec) - prec = io->prec; - - verb = io->verb; - ucase = 0; - switch(verb) { - case 'A': - case 'E': - case 'F': - case 'G': - verb += 'a'-'A'; - ucase = 1; - break; - } - - /* pick off special numbers. */ - if(isNaN(val)) { - end = special[0+ucase]; - special: - io->flag = f & (fmt·Width|fmt·Left); - return copy(io, end, strlen(end), strlen(end)); - } - if(isInf(val, 1)) { - end = special[2+ucase]; - goto special; - } - if(isInf(val, -1)) { - end = special[4+ucase]; - goto special; - } - - /* get exact representation. */ - digits = buf; - dtoa(val, digits, &exp, &neg, &ndigits); - - /* get locale's decimal point. */ - dot = io->decimal; - if(dot == nil) - dot = "."; - ndot = utf8·len(dot); - - /* - * now the formatting fun begins. - * compute parameters for actual fmt: - * - * pad: number of spaces to insert before/after field. - * z1: number of zeros to insert before digits - * z2: number of zeros to insert after digits - * point: number of digits to print before decimal point - * ndigits: number of digits to use from digits[] - * suf: trailing suffix, like "e-5" - */ - realverb = verb; - switch(verb){ - case 'g': - /* convert to at most prec significant digits. (prec=0 means 1) */ - if(prec == 0) - prec = 1; - if(ndigits > prec) { - if(digits[prec] >= '5' && add1(digits, prec)) - exp++; - exp += ndigits-prec; - ndigits = prec; - } - - /* - * extra rules for %g (implemented below): - * trailing zeros removed after decimal unless FmtSharp. - * decimal point only if digit follows. - */ - - /* fall through to %e */ - default: - case 'e': - /* one significant digit before decimal, no leading zeros. */ - pt = 1; - z1 = 0; - - /* - * decimal point is after ndigits digits right now. - * slide to be after first. - */ - e = exp + (ndigits-1); - - /* if this is %g, check exponent and convert prec */ - if(realverb == 'g') { - if(-4 <= e && e < prec) - goto casef; - prec--; /* one digit before decimal; rest after */ - } - - /* compute trailing zero padding or truncate digits. */ - if(1+prec >= ndigits) - z2 = 1+prec - ndigits; - else { - /* truncate digits */ - assert(realverb != 'g'); - newndigits = 1+prec; - if(digits[newndigits] >= '5' && add1(digits, newndigits)) { - /* had 999e4, now have 100e5 */ - e++; - } - ndigits = newndigits; - z2 = 0; - } - fmtexp(suf, e, ucase); - nsuf = strlen(suf); - break; - - casef: - case 'f': - /* determine where digits go with respect to decimal point */ - if(ndigits+exp > 0) { - pt = ndigits+exp; - z1 = 0; - } else { - pt = 1; - z1 = 1 + -(ndigits+exp); - } - - /* - * %g specifies prec = number of significant digits - * convert to number of digits after decimal point - */ - if(realverb == 'g') - prec += z1 - pt; - - /* compute trailing zero padding or truncate digits. */ - if(pt+prec >= z1+ndigits) - z2 = pt+prec - (z1+ndigits); - else{ - /* truncate digits */ - assert(realverb != 'g'); - newndigits = pt+prec - z1; - if(newndigits < 0){ - z1 += newndigits; - newndigits = 0; - }else if(newndigits == 0){ - /* perhaps round up */ - if(digits[0] >= '5'){ - digits[0] = '1'; - newndigits = 1; - goto newdigit; - } - }else if(digits[newndigits] >= '5' && add1(digits, newndigits)){ - /* digits was 999, is now 100; make it 1000 */ - digits[newndigits++] = '0'; - newdigit: - /* account for new digit */ - if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ - z1--; - else /* 9.99 => 10.00 */ - pt++; - } - z2 = 0; - ndigits = newndigits; - } - nsuf = 0; - break; - } - - /* - * if %g is given without FmtSharp, remove trailing zeros. - * must do after truncation, so that e.g. print %.3g 1.001 - * produces 1, not 1.00. sorry, but them's the rules. - */ - if(realverb == 'g' && !(f & fmt·Sharp)) { - if(z1+ndigits+z2 >= pt) { - if(z1+ndigits < pt) - z2 = pt - (z1+ndigits); - else{ - z2 = 0; - while(z1+ndigits > pt && digits[ndigits-1] == '0') - ndigits--; - } - } - } - - /* - * compute width of all digits and decimal point and suffix if any - */ - n = z1+ndigits+z2; - if(n > pt) - n += ndot; - else if(n == pt){ - if(f & fmt·Sharp) - n += ndot; - else - pt++; /* do not print any decimal point */ - } - n += nsuf; - - /* - * determine sign - */ - sign = 0; - if(neg) - sign = '-'; - else if(f & fmt·Sign) - sign = '+'; - else if(f & fmt·Space) - sign = ' '; - if(sign) - n++; - - /* compute padding */ - npad = 0; - if((f & fmt·Width) && io->width > n) - npad = io->width - n; - if(npad && !(f & fmt·Left) && (f & fmt·Zero)){ - z1 += npad; - pt += npad; - npad = 0; - } - - /* format the actual field. too bad about doing this twice. */ - if(npad && !(f & fmt·Left) && pad(io, npad < 0)) - return -1; - - cur = io->buffer.cur; - end = io->buffer.end; - - if(sign){ - if(cur+1 > end){ - if(!(cur=flush(io,cur,1))) - return -1; - end = io->buffer.end; - } - *cur++ = sign; - } - - while(z1>0 || ndigits>0 || z2>0){ - if(z1 > 0){ - z1--; - c = '0'; - }else if(ndigits > 0){ - ndigits--; - c = *digits++; - }else{ - z2--; - c = '0'; - } - - if(cur+1 > end){ - if(!(cur=flush(io,cur,1))) - return -1; - end = io->buffer.end; - } - *cur++ = c; - - if(--pt == 0) - for(p=dot; *p; p++){ - if(cur+1 > end){ - if(!(cur=flush(io,cur,1))) - return -1; - end = io->buffer.end; - } - *cur++ = *p; - } - } - io->n += cur - (char*)io->buffer.cur; - io->buffer.cur = cur; - if(nsuf && copy(io, suf, nsuf, nsuf) < 0) - return -1; - if(npad && (f & fmt·Left) && pad(io, npad < 0)) - return -1; - - return 0; -} diff --git a/src/libfmt/fprint.c b/src/libfmt/fprint.c deleted file mode 100644 index 5077359..0000000 --- a/src/libfmt/fprint.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "internal.h" - -int -fmt·fprint(int fd, char *fmt, ...) -{ - int n; - va_list args; - - va_start(args, fmt); - n = fmt·vfprint(fd, fmt, args); - va_end(args); - - return n; -} diff --git a/src/libfmt/internal.h b/src/libfmt/internal.h deleted file mode 100644 index 725cfff..0000000 --- a/src/libfmt/internal.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -typedef int (*Formatter)(fmt·State *io); -typedef struct Verb Verb; - -struct Verb -{ - int c; - Formatter fmt; -}; - -void fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups); diff --git a/src/libfmt/locale.c b/src/libfmt/locale.c deleted file mode 100644 index 437c61e..0000000 --- a/src/libfmt/locale.c +++ /dev/null @@ -1,16 +0,0 @@ -#include "internal.h" - -void -fmt·setlocale(fmt·State *io, char *decimal, char *thousands, char *groups) -{ - if(decimal == nil || decimal[0] == '\0') - decimal = "."; - if(thousands == nil) - thousands = ","; - if(groups == nil) - groups = "\3"; - - io->groups = groups; - io->decimal = decimal; - io->thousands = thousands; -} diff --git a/src/libfmt/nsprint.c b/src/libfmt/nsprint.c deleted file mode 100644 index 90489e0..0000000 --- a/src/libfmt/nsprint.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "internal.h" - -int -fmt·nsprint(int len, char *buf, char *fmt, ...) -{ - int n; - va_list args; - - va_start(args, fmt); - n = fmt·vnsprint(len, buf, fmt, args); - va_end(args); - - return n; -} diff --git a/src/libfmt/open.c b/src/libfmt/open.c deleted file mode 100644 index 8aadef5..0000000 --- a/src/libfmt/open.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "internal.h" - -static int -flush(fmt·State *io) -{ - int n, fd; - - fd = (uintptr)io->file; - n = io->buffer.cur - io->buffer.beg; - if(n && write(fd, io->buffer.beg, n) != n) - return -1; - - io->buffer.cur = io->buffer.beg; - return io->n; -} - -int -fmt·open(int fd, int len, char *buf, fmt·State *io) -{ - io->buffer.beg = buf; - io->buffer.cur = buf; - io->buffer.end = buf+len; - io->flush = flush; - io->file = (void*)(uintptr)fd; - io->flag = 0; - io->n = 0; - /* no heap needed */ - io->heap = nil; - io->mem = (mem·Reallocator){ 0 }; - - fmt·setlocale(io, nil, nil, nil); - - return 0; -} diff --git a/src/libfmt/panic.c b/src/libfmt/panic.c deleted file mode 100644 index 25ee277..0000000 --- a/src/libfmt/panic.c +++ /dev/null @@ -1,15 +0,0 @@ -#include "internal.h" - -void -fmt·panic(char *fmt, ...) -{ - char buf[256]; - va_list arg; - - va_start(arg, fmt); - fmt·vesprint(buf, arrend(buf), fmt, arg); - va_end(arg); - - fmt·fprint(2, "%s: %s\n", argv0 ? argv0 : "", buf); - exits("fatal"); -} diff --git a/src/libfmt/print.c b/src/libfmt/print.c deleted file mode 100644 index 20b8e00..0000000 --- a/src/libfmt/print.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "internal.h" - -int -fmt·print(char *fmt, ...) -{ - int n; - va_list args; - - va_start(args, fmt); - n = fmt·vfprint(1, fmt, args); - va_end(args); - return n; -} diff --git a/src/libfmt/rules.mk b/src/libfmt/rules.mk deleted file mode 100644 index 75a7bc7..0000000 --- a/src/libfmt/rules.mk +++ /dev/null @@ -1,36 +0,0 @@ -include share/push.mk - -# Local sources -SRCS_$(d):=\ - $(d)/buffer.c\ - $(d)/do.c\ - $(d)/esprint.c\ - $(d)/fprint.c\ - $(d)/locale.c\ - $(d)/nsprint.c\ - $(d)/open.c\ - $(d)/print.c\ - $(d)/sprint.c\ - $(d)/vesprint.c\ - $(d)/vfprint.c\ - $(d)/vnsprint.c\ - $(d)/vprint.c\ - $(d)/vwrite.c\ - $(d)/panic.c\ - $(d)/write.c - -LIBS_$(d):=\ - $(d)/libfmt.a - -CHECK_$(d):=\ - $(d)/test.c - -include share/paths.mk - -$(LIBS_$(d)): $(OBJS_$(d)) - $(ARCHIVE) - -$(TEST_$(d)): $(UNIT_$(d)) $(LIBS_$(d)) $(OBJ_DIR)/libutf/libutf.a $(OBJ_DIR)/base/base.a - $(COMPLINK) - -include share/pop.mk diff --git a/src/libfmt/sprint.c b/src/libfmt/sprint.c deleted file mode 100644 index f1be6dd..0000000 --- a/src/libfmt/sprint.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "internal.h" - -int -fmt·sprint(char *buf, char *fmt, ...) -{ - int n; - uint len; - va_list args; - - len = 1 << 30; - if(buf+len < buf) - len = -(uintptr)buf-1; - - va_start(args, fmt); - n = fmt·vnsprint(len, buf, fmt, args); - va_end(args); - - return n; -} diff --git a/src/libfmt/test.c b/src/libfmt/test.c deleted file mode 100644 index d81a62e..0000000 --- a/src/libfmt/test.c +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -#include - -typedef struct Complex -{ - double r, i; -} Complex; - -int -Xfmt(fmt·State *io) -{ - Complex c; - c = va_arg(io->args, Complex); - - return fmt·write(io, "(real=%g,imag=%g)", c.r, c.i); -} - -int -main(int argc, char *argv[]) -{ - fmt·print("basic tests\n"); - fmt·print("\tx: %x\n", 0x87654321); - fmt·print("\tu: %u\n", 0x87654321); - fmt·print("\td: %d\n", 0x87654321); - fmt·print("\ts: %s\n", "hi there"); - fmt·print("\tc: %c\n", '!'); - fmt·print("\tg: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10); - fmt·print("\te: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10); - fmt·print("\tf: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10); - fmt·print("\tsmiley: %C\n", (rune)0x263a); - fmt·print("\t%g %.18g\n", 2e25, 2e25); - fmt·print("\t%2.18g\n", 1.0); - fmt·print("\t%2.18f\n", 1.0); - fmt·print("\t%f\n", 3.1415927/4); - fmt·print("\t%d\n", 23); - fmt·print("\t%i\n", 23); - fmt·print("\t%0.10d\n", 12345); - - fmt·print("%%4%%d tests\n"); - fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); - fmt·print("\t%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); - fmt·print("\t%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); - fmt·print("\t%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); - fmt·print("\t%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); - - /* test %'d formats */ - fmt·print("%%'%%d tests\n"); - fmt·print("\t%'d %'d %'d\n", 1, 2222, 33333333); - fmt·print("\t%'019d\n", 0); - fmt·print("\t%08d %08d %08d\n", 1, 2222, 33333333); - fmt·print("\t%'08d %'08d %'08d\n", 1, 2222, 33333333); - fmt·print("\t%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); - fmt·print("\t%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); - fmt·print("\t%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); - fmt·print("\t%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); - fmt·print("\t%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); - fmt·print("\t%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); - - /* test precision */ - fmt·print("precision tests\n"); - fmt·print("%020.10d\n", 100); - - /* test install */ - fmt·install('X', Xfmt); - Complex c = { 1.5, -2.3 }; - fmt·print("x = %X\n", c); - - return 0; - -} diff --git a/src/libfmt/vesprint.c b/src/libfmt/vesprint.c deleted file mode 100644 index 18f4dd2..0000000 --- a/src/libfmt/vesprint.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "internal.h" - -char* -fmt·vesprint(char *buf, char *end, char *fmt, va_list args) -{ - fmt·State io; - - if(end <= buf) - return nil; - - io.n = 0; - io.buffer.beg = io.buffer.cur = buf; - io.buffer.end = end-1; - io.flush = nil; - io.file = nil; - - va_copy(io.args, args); - - fmt·setlocale(&io, nil, nil, nil); - fmt·do(&io, fmt); - - va_end(io.args); - - *(io.buffer.cur) = 0; - return io.buffer.cur; -} diff --git a/src/libfmt/vfprint.c b/src/libfmt/vfprint.c deleted file mode 100644 index 4306ea7..0000000 --- a/src/libfmt/vfprint.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "internal.h" - -int -fmt·vfprint(int fd, char *fmt, va_list args) -{ - int n; - fmt·State io; - char buf[256]; - - fmt·open(fd, sizeof(buf), buf, &io); - - va_copy(io.args, args); - n = fmt·do(&io, fmt); - va_end(io.args); - - if(n > 0 && io.flush(&io) < 0) - return -1; - return n; -} diff --git a/src/libfmt/vnsprint.c b/src/libfmt/vnsprint.c deleted file mode 100644 index 7ded908..0000000 --- a/src/libfmt/vnsprint.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "internal.h" - -int -fmt·vnsprint(int len, char *buf, char *fmt, va_list args) -{ - fmt·State io; - - if(len <= 0) - return -1; - - io.n = 0; - io.buffer.beg = io.buffer.cur = buf; - io.buffer.end = buf+len-1; - io.flush = nil; - io.file = nil; - - va_copy(io.args, args); - - fmt·setlocale(&io, nil, nil, nil); - fmt·do(&io, fmt); - - va_end(io.args); - - *(io.buffer.cur) = 0; - return io.buffer.cur - io.buffer.beg; -} diff --git a/src/libfmt/vprint.c b/src/libfmt/vprint.c deleted file mode 100644 index bb3076b..0000000 --- a/src/libfmt/vprint.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "internal.h" - -int -fmt·vprint(char *fmt, va_list args) -{ - fmt·State io; - int n; - char buf[256]; - - fmt·open(1, sizeof(buf), buf, &io); - - va_copy(io.args, args); - n = fmt·do(&io, fmt); - va_end(io.args); - - if(n > 0 && io.flush(&io) < 0) - return -1; - return n; -} diff --git a/src/libfmt/vwrite.c b/src/libfmt/vwrite.c deleted file mode 100644 index cacdef2..0000000 --- a/src/libfmt/vwrite.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "internal.h" - -int -fmt·vwrite(fmt·State *io, char *fmt, va_list args) -{ - int n; - va_list tmp; - - io->flag = io->width = io->prec = 0; - - va_copy(tmp, io->args); - va_end(io->args); - - va_copy(io->args,args); - n = fmt·do(io, fmt); - va_end(io->args); - - va_copy(io->args, tmp); - va_end(tmp); - - io->flag = io->width = io->prec = 0; - - if(n >= 0) - return 0; - return n; -} diff --git a/src/libfmt/write.c b/src/libfmt/write.c deleted file mode 100644 index 9a77223..0000000 --- a/src/libfmt/write.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "internal.h" - -int -fmt·write(fmt·State *io, char *fmt, ...) -{ - int n; - va_list args; - - io->flag = io->width = io->prec = 0; - - va_copy(args, io->args); - va_end(io->args); - - va_start(io->args, fmt); - n = fmt·do(io, fmt); - va_end(io->args); - - io->flag = io->width = io->prec = 0; - if(n >= 0) - return 0; - return n; -} diff --git a/src/libutf/canfit.c b/src/libutf/canfit.c deleted file mode 100644 index 4579ab3..0000000 --- a/src/libutf/canfit.c +++ /dev/null @@ -1,23 +0,0 @@ -#include "internal.h" - -/* returns 1 if string of length n is long enough to be decoded */ -int -utf8·canfit(byte* s, int n) -{ - int i; - rune c; - - if(n <= 0) - return 0; - - c = *(ubyte*)s; - if(c < TByte1) - return 1; - - if(c < TByte3) - return n >= 2; - if(c < TByte4) - return n >= 3; - - return n >= UTFmax; -} diff --git a/src/libutf/decode.c b/src/libutf/decode.c deleted file mode 100644 index 01797f1..0000000 --- a/src/libutf/decode.c +++ /dev/null @@ -1,98 +0,0 @@ -#include "internal.h" - -#define ACCEPT 0 -#define REJECT 12 - -static uint8 decode[] = { - /* - * the first part of the table maps bytes to character classes that - * to reduce the size of the transition table and create bitmasks - */ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - - /* - * the second part is a transition table that maps a combination - * of a state of the automaton and a character class to a state - */ - 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, - 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, - 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, - 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, - 12,36,12,12,12,12,12,12,12,12,12,12, -}; - -int -utf8·decode(char *s, rune *r) -{ - int n; - rune v; - uint8 b, t, x=ACCEPT; - - b = ((uint8 *)s)[0]; - t = decode[b]; - v = (0xFF >> t) & b; - x = decode[256+x+t]; - - for(n=1; x > REJECT && n < UTFmax; n++){ - b = ((uint8 *)s)[n]; - t = decode[b]; - v = (v << 6) | (b & TMask); - x = decode[256+x+t]; - } - - if(x != ACCEPT){ - *r = RuneErr; - return 1; - } - - *r = v; - return n; -} - -#if 0 -int -utf8·decode(byte *s, rune *r) -{ - int c[UTFmax], i; - rune l; - - c[0] = *(ubyte*)(s); - if(c[0] < Tx){ - *r = c[0]; - return 1; - } - - l = c[0]; - for(i = 1; i < UTFmax; i++){ - c[i] = *(ubyte*)(s+i); - c[i] ^= Tx; - if(c[i] & Testx) goto bad; - - l = (l << Bitx) | c[i]; - if(c[0] < Tbyte(i + 2)){ - l &= RuneX(i + 1); - if(i == 1){ - if(c[0] < Tbyte(2) || l <= Rune1) - goto bad; - }else if(l <= RuneX(i) || l > RuneMax) - goto bad; - - if(i == 2 && SurrogateMin <= l && l <= SurrogateMax) - goto bad; - - *r = l; - return i + 1; - } - } -bad: - *r = RuneErr; - return 1; -} -#endif diff --git a/src/libutf/decodeprev.c b/src/libutf/decodeprev.c deleted file mode 100644 index 27dced6..0000000 --- a/src/libutf/decodeprev.c +++ /dev/null @@ -1,60 +0,0 @@ -#include "internal.h" - -#define ACCEPT 0 -#define REJECT 12 - -static uint8 decode[] = { - /* - * the first part of the table maps bytes to character classes that - * to reduce the size of the transition table and create bitmasks. - */ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - /* - * The second part is a transition table that maps a combination - * of a state of the automaton and a character class to a state. - */ - // 0 1 2 3 4 5 6 7 8 9 10 11 - 0,24,12,12,12,12,12,24,12,24,12,12, - 0,24,12,12,12,12,12,24,12,24,12,12, - 12,36, 0,12,12,12,12,48,12,36,12,12, - 12,60,12, 0, 0,12,12,72,12,72,12,12, - 12,60,12, 0,12,12,12,72,12,72, 0,12, - 12,12,12,12,12, 0, 0,12,12,12,12,12, - 12,12,12,12,12,12,12,12,12,12,12, 0 -}; - -int -utf8·decodeprev(byte *s, rune *r) -{ - int n; - rune v; - uint8 b, t, d, x=ACCEPT; - - v=0, n=0, d=0; -nextbyte: - b = ((uint8 *)s)[-n++]; - t = decode[b]; - x = decode[256+x+t]; - - if(x > REJECT && n < UTFmax){ - v = v | ((b & TMask) << d); - d += 6; - goto nextbyte; - } - - if(x != ACCEPT) - *r = RuneErr; - else{ - v |= (((0xFFu >> t) & b) << d); - *r = v; - } - - return n; -} diff --git a/src/libutf/encode.c b/src/libutf/encode.c deleted file mode 100644 index fa7c93e..0000000 --- a/src/libutf/encode.c +++ /dev/null @@ -1,69 +0,0 @@ -#include "internal.h" - -int -utf8·encode(rune *r, byte *s) -{ - rune c; - - c = *r; - if(c < Rune1Byte){ // 7 bits - s[0] = (uint8)c; - return 1; - } - - if(c < Rune2Byte){ // 11 bits - s[0] = TByte1 | (c >> 6); - s[1] = Tx | (c & TMask); - return 2; - } - - if(c < Rune3Byte){ // 16 bits - s[0] = TByte2 | ((c >> 12)); - s[1] = Tx | ((c >> 6) & TMask); - s[2] = Tx | ((c) & TMask); - return 3; - } - - // 22 bits - if(c > RuneMax || (RuneSurrogateMin <= c && c <= RuneSurrogateMax)) - c = RuneErr; - - s[0] = TByte3 | ((c >> 18)); - s[1] = Tx | ((c >> 12) & TMask); - s[2] = Tx | ((c >> 6) & TMask); - s[3] = Tx | ((c) & TMask); - - return 4; -} - -#if 0 -int -utf8·encode(rune* r, byte* s) -{ - int i, j; - rune c; - - c = *r; - if(c <= Rune1) { - s[0] = c; - return 1; - } - - for(i = 2; i < UTFmax + 1; i++){ - if(i == 3){ - if(c > RuneMax) - c = RuneErr; - if(SurrogateMin <= c && c <= SurrogateMax) - c = RuneErr; - } - if(c <= RuneX(i) || i == UTFmax) { - s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); - for(j = 1; j < i; j++) - s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); - return i; - } - } - - return UTFmax; -} -#endif diff --git a/src/libutf/find.c b/src/libutf/find.c deleted file mode 100644 index d75feb8..0000000 --- a/src/libutf/find.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "internal.h" - -byte* -utf8·find(byte* s, rune c) -{ - long c1; - rune r; - int n; - - if(c < Tx) - return strchr(s, c); - - for(;;){ - c1 = *(ubyte*)s; - if(c1 < Tx){ - if(c1 == 0) return nil; - if(c1 == c) return s; - s++; - continue; - } - - n = utf8·decode(s, &r); - - if(r == c) - return s; - - s += n; - } - - return nil; -} diff --git a/src/libutf/findlast.c b/src/libutf/findlast.c deleted file mode 100644 index ab25ab2..0000000 --- a/src/libutf/findlast.c +++ /dev/null @@ -1,32 +0,0 @@ -#include "internal.h" - -byte* -utf8·findlast(byte* s, rune c) -{ - long c1; - rune r; - byte *l; - - if(c < Tx) - return strrchr(s, c); - - l = nil; - for(;;){ - c1 = *(ubyte*)s; - if(c1 < Tx){ - if(c1 == 0) return l; - if(c1 == c) l = s; - s++; - continue; - } - - c1 = utf8·decode(s, &r); - - if(r == c) - l = s; - - s += c1; - } - - return nil; -} diff --git a/src/libutf/internal.h b/src/libutf/internal.h deleted file mode 100644 index 9719977..0000000 --- a/src/libutf/internal.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include -#include -#include - -/* - * NOTE: we use the preprocessor to ensure we have unsigned constants. - * UTF-8 code: - * 1 byte: - * 0xxxxxxx - * 2 byte: - * 110xxxxx 10xxxxxx - * 3 byte: - * 1110xxxx 10xxxxxx 10xxxxxx - * 4 byte: - * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - */ - -#define Tx 0x80u // 0b10000000 transfer header -#define TMask 0x3Fu // 0b00111111 transfer mask - -#define TByte1 0xC0u // 0b11000000 -#define TByte2 0xE0u // 0b11100000 -#define TByte3 0xF0u // 0b11110000 -#define TByte4 0xF8u // 0b11111000 - -#define RuneMask 0x1FFFFFu - -#define Rune1Byte 0x000080u // 1 << 8 (1 byte) -#define Rune2Byte 0x001000u // 1 << 12 (2 bytes) -#define Rune3Byte 0x020000u // 1 << 17 (3 bytes) -#define Rune4Byte 0x400000u // 1 << 22 (4 bytes) - - -/* UTF-16 nonsense */ -#define RuneSurrogateMin 0x0D8000 -#define RuneSurrogateMax 0x0D8FFF diff --git a/src/libutf/len.c b/src/libutf/len.c deleted file mode 100644 index 8fbd679..0000000 --- a/src/libutf/len.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "internal.h" - -int -utf8·len(char *s) -{ - int c; - long n; - rune r; - - n = 0; - for(;;){ - c = *(uchar*)s; - if(c < Tx){ - if(c == 0) - return n; - s++; - }else - s += utf8·decode(s, &r); - n++; - } -} diff --git a/src/libutf/rules.mk b/src/libutf/rules.mk deleted file mode 100644 index aeb86b2..0000000 --- a/src/libutf/rules.mk +++ /dev/null @@ -1,76 +0,0 @@ -include share/push.mk - -UNICODE=14.0.0 - -SRCS_$(d):=\ - $(d)/encode.c\ - $(d)/decode.c\ - $(d)/decodeprev.c\ - $(d)/find.c\ - $(d)/findlast.c\ - $(d)/canfit.c\ - $(d)/runelen.c\ - $(d)/len.c\ - $(d)/runetype-$(UNICODE).c\ - $(d)/runewidth-$(UNICODE).c - -LIBS_$(d):=$(d)/libutf.a - -include share/paths.mk - -# ======================================================================== -# table generation - -$(d)/vendor/common.o: $(d)/vendor/common.c - $(COMPILE) - -# rune categories -$(d)/vendor/UnicodeData-$(UNICODE).txt: - @echo "GET UnicodeData.txt";\ - curl https://www.unicode.org/Public/$(UNICODE)/ucd/UnicodeData.txt > $@ - -$(d)/vendor/mkrunetype: $(d)/vendor/mkrunetype.c $(d)/vendor/common.o $(OBJ_DIR)/base/base.a - $(COMPLINK) - -GENS += $(d)/vendor/mkrunetype - -$(d)/runetype-$(UNICODE).c: $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/mkrunetype - @$(dir $@)vendor/mkrunetype $< > $@ - -# rune widths -$(d)/vendor/EastAsianWidth-$(UNICODE).txt: - @echo "GET EastAsianWidth.txt";\ - curl https://www.unicode.org/Public/$(UNICODE)/ucd/EastAsianWidth.txt > $@ - -$(d)/vendor/EmojiData-$(UNICODE).txt: - @echo "GET EmojiData.txt";\ - curl https://www.unicode.org/Public/$(UNICODE)/ucd/emoji/emoji-data.txt > $@ - -$(d)/vendor/mkrunewidth: $(d)/vendor/mkrunewidth.c $(d)/vendor/common.o $(OBJ_DIR)/base/base.a - $(COMPLINK) - -GENS += $(d)/vendor/mkrunewidth - -$(d)/runewidth-$(UNICODE).c: $(d)/vendor/mkrunewidth $(d)/vendor/UnicodeData-$(UNICODE).txt $(d)/vendor/EastAsianWidth-$(UNICODE).txt $(d)/vendor/EmojiData-$(UNICODE).txt - @$(dir $@)vendor/mkrunewidth $(filter-out $<, $^) > $@ - -# grapheme boundaries -$(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt: - @echo "GET GraphemeBreakProperty.txt";\ - curl https://www.unicode.org/Public/$(UNICODE)/ucd/auxiliary/GraphemeBreakProperty.txt > $@ - -$(d)/vendor/mkgraphemedata: $(d)/vendor/mkgraphemedata.c $(d)/vendor/common.o $(OBJ_DIR)/base/base.a - $(COMPLINK) - -$(d)/graphemedata-$(UNICODE).c: $(d)/vendor/mkgraphemedata $(d)/vendor/GraphemeBreakProperty-$(UNICODE).txt - $^ > $@ - -GENS += $(d)/vendor/mkgraphemedata - -# ======================================================================== -# normal operations - -$(LIBS_$(d)): $(OBJS_$(d)) - $(ARCHIVE) - -include share/pop.mk diff --git a/src/libutf/runelen.c b/src/libutf/runelen.c deleted file mode 100644 index dac7f15..0000000 --- a/src/libutf/runelen.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "internal.h" - -int -utf8·runelen(rune r) -{ - byte s[10]; - return utf8·encode(&r, s); -} diff --git a/src/libutf/vendor/common.c b/src/libutf/vendor/common.c deleted file mode 100644 index 5a03a50..0000000 --- a/src/libutf/vendor/common.c +++ /dev/null @@ -1,220 +0,0 @@ -#include "common.h" - -// ----------------------------------------------------------------------- -// input functions - -int -parse(io·Stream *io, int nfield, char **field, int len, char *line) -{ - int n; - if((n=io·readln(io, len, line)) <= 0) - return ParseEOF; - - if(n == len) - panicf("line too long"); - - if(line[n-1] != '\n') - panicf("invalid line: expected '\n', found '%c'", line[n]); - - line[n-1] = 0; - - if(line[0] == '#' || line[0] == 0) - return ParseSkip; - - /* tokenize line into fields */ - n = 0; - field[n] = line; - while(*line){ - if(*line == ';'){ - *line = 0; - field[++n] = line+1; - } - line++; - } - - if(n != nfield-1) - panicf("expected %d number of fields, got %d: %s", nfield, n, line); - - return ParseOK; -} - -int -codepoint(char *s) -{ - int c, b; - - c = 0; - while((b=*s++)){ - c <<= 4; - if(b >= '0' && b <= '9') - c += b - '0'; - else if(b >= 'A' && b <= 'F') - c += b - 'A' + 10; - else - panicf("bad codepoint char '%c'", b); - } - - return c; -} - -void -codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop) -{ - int e, c; - char *other[NumFields], line[1024]; - - // XXX: the stop variable passes in the previous stopping character - e = *stop; - c = codepoint(field[Fcode]); - - if(c >= NumRunes) - panicf("unexpected large codepoint %x", c); - if(c <= e) - panicf("bad code sequence: %x then %x", e, c); - e = c; - - if(strstr(field[Fname], ", First>") != nil){ - if(!parse(utf8, arrlen(other), other, arrlen(line), line)) - panicf("range start at end of file"); - if(strstr(other[Fname], ", Last>") == nil) - panicf("range start not followed by range end"); - - e = codepoint(other[Fcode]); - - if(e <= c) - panicf("bad code sequence: %x then %x", c, e); - if(strcmp(field[Fcategory], other[Fcategory]) != 0) - panicf("range with mismatched category"); - } - - *start = c; - *stop = e; -} - -// ----------------------------------------------------------------------- -// output functions - -void -putsearch(void) -{ - puts( - "#include \n" - "#include \n" - "\n" - "static\n" - "rune*\n" - "rangesearch(rune c, rune *t, int n, int ne)\n" - "{\n" - " rune *p;\n" - " int m;\n" - " while(n > 1) {\n" - " m = n >> 1;\n" - " p = t + m*ne;\n" - " if(c >= p[0]){\n" - " t = p;\n" - " n = n-m;\n" - " }else\n" - " n = m;\n" - " }\n" - " if(n && c >= t[0])\n" - " return t;\n" - " return 0;\n" - "}\n" - ); - -} - -int -putrange(char *ident, char *prop, int force) -{ - int l, r, start; - - start = 0; - for(l = 0; l < NumRunes;) { - if(!prop[l]){ - l++; - continue; - } - - for(r = l+1; r < NumRunes; r++){ - if(!prop[r]) - break; - prop[r] = 0; - } - - if(force || r > l + 1){ - if(!start){ - printf("static rune %s[] = {\n", ident); - start = 1; - } - prop[l] = 0; - printf("\t0x%.4x, 0x%.4x,\n", l, r-1); - } - - l = r; - } - - if(start) - printf("};\n\n"); - - return start; -} - -int -putpair(char *ident, char *prop) -{ - int l, r, start; - - start = 0; - for(l=0; l+2 < NumRunes; ){ - if(!prop[l]){ - l++; - continue; - } - - for(r = l + 2; r < NumRunes; r += 2){ - if(!prop[r]) - break; - prop[r] = 0; - } - - if(r != l + 2){ - if(!start){ - printf("static rune %s[] = {\n", ident); - start = 1; - } - prop[l] = 0; - printf("\t0x%.4x, 0x%.4x,\n", l, r - 2); - } - - l = r; - } - - if(start) - printf("};\n\n"); - return start; -} - -int -putsingle(char *ident, char *prop) -{ - int i, start; - - start = 0; - for(i = 0; i < NumRunes; i++) { - if(!prop[i]) - continue; - - if(!start){ - printf("static rune %s[] = {\n", ident); - start = 1; - } - prop[i] = 0; - printf("\t0x%.4x,\n", i); - } - - if(start) - printf("};\n\n"); - - return start; -} diff --git a/src/libutf/vendor/common.h b/src/libutf/vendor/common.h deleted file mode 100644 index 62f6c5b..0000000 --- a/src/libutf/vendor/common.h +++ /dev/null @@ -1,46 +0,0 @@ -#pragma once - -#include -#include -#include - -enum -{ - // Fields inside UnicodeData.txt - Fcode, - Fname, - Fcategory, - Fcombine, - Fbidir, - Fdecomp, - Fdecimal, - Fdigit, - Fnumeric, - Fmirror, - Foldname, - Fcomment, - Fupper, - Flower, - Ftitle, - - NumFields, - NumRunes = 1 << 21, -}; - -/* input functions */ -enum -{ - ParseEOF, - ParseOK, - ParseSkip, -}; - -int parse(io·Stream *io, int nfield, char **field, int len, char *line); -int codepoint(char *s); -void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop); - -/* output functions */ -void putsearch(void); -int putrange(char *ident, char *prop, int force); -int putpair(char *ident, char *prop); -int putsingle(char *ident, char *prop); diff --git a/src/libutf/vendor/mkgraphemedata.c b/src/libutf/vendor/mkgraphemedata.c deleted file mode 100644 index ce5a952..0000000 --- a/src/libutf/vendor/mkgraphemedata.c +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include -#include - -// ----------------------------------------------------------------------- -// main point of entry - -static -void -usage(void) -{ - fprintf(stderr, "usage: mkgraphemedata \n"); - exit(1); -} - -int -main(int argc, char *argv[]) -{ - io·Stream *utf8; - char line[1024]; - - ARGBEGIN{ - }ARGEND; -} diff --git a/src/libutf/vendor/mkrunetype.c b/src/libutf/vendor/mkrunetype.c deleted file mode 100644 index b33df32..0000000 --- a/src/libutf/vendor/mkrunetype.c +++ /dev/null @@ -1,390 +0,0 @@ -#include "common.h" - -// ----------------------------------------------------------------------- -// globals - -#define OFFSET (1 << 20) -#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x)) - -// TODO: use bitarrays. will reduce executable size 8x -struct Table -{ - /* properties */ - char isspace[NumRunes]; - char isalpha[NumRunes]; - char ismark[NumRunes]; - char isdigit[NumRunes]; - char isupper[NumRunes]; - char islower[NumRunes]; - char istitle[NumRunes]; - char ispunct[NumRunes]; - char issymbl[NumRunes]; - char iscntrl[NumRunes]; - - char combine[NumRunes]; - - /* transformations */ - int toupper[NumRunes]; - int tolower[NumRunes]; - int totitle[NumRunes]; -}; - -static struct Table table; - -// ----------------------------------------------------------------------- -// internal functions - -static -int -isrange(char *label, char *prop, int force) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putrange(ident, prop, force); -} - -static -int -ispair(char *label, char *prop) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putpair(ident, prop); -} - -static -int -issingle(char *label, char *prop) -{ - char ident[128]; - if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident)) - panicf("out of identifier space\n"); - - return putsingle(ident, prop); -} - -static -void -makeis(char *label, char *table, int pairs, int onlyranges) -{ - int hasr, hasp=0, hass=0; - - hasr = isrange(label, table, onlyranges); - if(!onlyranges && pairs) - hasp = ispair(label, table); - if(!onlyranges) - hass = issingle(label, table); - - printf( - "int\n" - "utf8·is%s(rune c)\n" - "{\n" - " rune *p;\n" - "\n", - label); - - if(hasr){ - printf( - " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1])\n" - " return 1;\n", - label, label); - } - - if(hasp){ - printf( - " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return 1;\n", - label, label); - } - - if(hass) - printf( - " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n" - " if(p && c == p[0])\n" - " return 1;\n", - label, label); - - printf( - " return 0;\n" - "}\n" - "\n"); -} - -static -int -torange(char *label, int *index, int force) -{ - int l, r, d, start = 0; - - for(l = 0; l < NumRunes; ){ - if(index[l] == l){ - l++; - continue; - } - - d = DELTA(index[l], l); - if(d != (rune)d) - panicf("bad map delta %d", d); - - for(r = l+1; r < NumRunes; r++){ - if(DELTA(index[r], r) != d) - break; - index[r] = r; - } - - if(force || r != l + 1){ - if(!start){ - printf("static rune to%s_range[] = {\n", label); - start = 1; - } - index[l] = l; - printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d); - } - l = r; - } - if(start) - printf("};\n\n"); - - return start; -} - -static -int -topair(char *label, int *index) -{ - int l, r, d, start = 0; - - for(l = 0; l + 2 < NumRunes; ){ - if(index[l] == l){ - l++; - continue; - } - - d = DELTA(index[l], l); - if(d != (rune)d) - panicf("bad delta %d", d); - - for(r = l+2; r < NumRunes; r += 2){ - if(DELTA(index[r], r) != d) - break; - index[r] = r; - } - - if(r > l+2){ - if(!start){ - printf("static rune to%s_pair[] = {\n", label); - start = 1; - } - index[l] = l; - printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d); - } - - l = r; - } - if(start) - printf("};\n\n"); - - return start; -} - -static -int -tosingle(char *label, int *index) -{ - int i, d, start = 0; - - for(i=0; i < NumRunes; i++) { - if(index[i] == i) - continue; - - d = DELTA(index[i], i); - if(d != (rune)d) - panicf("bad map delta %d", d); - - if(!start){ - printf("static rune to%s_single[] = {\n", label); - start = 1; - } - index[i] = i; - printf("\t0x%.4x, %d,\n", i, d); - } - if(start) - printf("};\n\n"); - - return start; -} - -static -void -mkto(char *label, int *index, int pairs, int onlyrange) -{ - int hasr, hasp=0, hass=0; - - hasr = torange(label, index, !onlyrange); - if(!onlyrange && pairs) - hasp = topair(label, index); - if(!onlyrange) - hass = tosingle(label, index); - - printf( - "rune\n" - "utf8·to%s(rune c)\n" - "{\n" - " rune *p;\n" - "\n", - label); - - if(hasr) - printf( - " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n" - " if(p && c >= p[0] && c <= p[1])\n" - " return c + p[2] - %d;\n", - label, label, OFFSET); - - if(hasp) - printf( - " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n" - " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return c + p[2] - %d;\n", - label, label, OFFSET); - - if(hass) - printf( - " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n" - " if(p && c == p[0])\n" - " return c + p[1] - %d;\n", - label, label, OFFSET); - - - printf( - " return c;\n" - "}\n" - "\n" - ); -} - -// ----------------------------------------------------------------------- -// main point of entry - -static -void -usage(void) -{ - fprintf(stderr, "usage: mkrunetype \n"); - exit(1); -} - -int -main(int argc, char *argv[]) -{ - int i, sc, c, ec; - io·Stream *utf8; - char *prop, *field[NumFields], line[1024]; - - ARGBEGIN{ - }ARGEND; - - if(argc != 1) - usage(); - - if(!(utf8 = io·open(argv[0], "r"))) - panicf("can't open %s\n", argv[0]); - - /* by default each character maps to itself */ - for(i = 0; i < NumRunes; i++) { - table.toupper[i] = i; - table.tolower[i] = i; - table.totitle[i] = i; - } - - /* ensure all C local white space characters pass */ - table.isspace['\t'] = 1; - table.isspace['\n'] = 1; - table.isspace['\r'] = 1; - table.isspace['\f'] = 1; - table.isspace['\v'] = 1; - table.isspace[0x85] = 1; - - ec = -1; - // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any - while(parse(utf8, arrlen(field), field, arrlen(line), line)){ - /* parse unicode range */ - codepointrange(utf8, field, &sc, &ec); - prop = field[Fcategory]; - - for(c = sc; c <= ec; c++){ - /* grab properties */ - switch(prop[0]){ - case 'L': - table.isalpha[c] = 1; - switch(prop[1]){ - case 'u': table.isupper[c] = 1; break; - case 'l': table.islower[c] = 1; break; - case 't': table.istitle[c] = 1; break; - case 'm': break; // modifier letters - case 'o': break; // ideograph letters - default: - goto badproperty; - } - break; - - case 'Z': - table.isspace[c] = 1; - break; - - case 'M': - table.ismark[c] = 1; - break; - - case 'N': - table.isdigit[c] = 1; - break; - - case 'P': - table.ispunct[c] = 1; - break; - - case 'S': - table.issymbl[c] = 1; - break; - - case 'C': - table.iscntrl[c] = 1; - break; - - default: badproperty: - panicf("unrecognized category '%s'", prop); - } - /* grab transformations */ - if(*field[Fupper]) - table.toupper[c] = codepoint(field[Fupper]); - if(*field[Flower]) - table.tolower[c] = codepoint(field[Flower]); - if(*field[Ftitle]) - table.totitle[c] = codepoint(field[Ftitle]); - } - } - io·close(utf8); - - putsearch(); - - makeis("space", table.isspace, 0, 1); - makeis("digit", table.isdigit, 0, 1); - makeis("alpha", table.isalpha, 0, 0); - makeis("upper", table.isupper, 1, 0); - makeis("lower", table.islower, 1, 0); - makeis("title", table.istitle, 1, 0); - makeis("punct", table.ispunct, 1, 0); - - mkto("upper", table.toupper, 1, 0); - mkto("lower", table.tolower, 1, 0); - mkto("title", table.totitle, 1, 0); - - return 0; -} diff --git a/src/libutf/vendor/mkrunewidth.c b/src/libutf/vendor/mkrunewidth.c deleted file mode 100644 index 14e6973..0000000 --- a/src/libutf/vendor/mkrunewidth.c +++ /dev/null @@ -1,325 +0,0 @@ -#include "common.h" - -/* - * inspired by design choices in utf8proc/charwidths.jl - * all widths default to 1 unless they fall within the categories: - * 1. Mn 2. Mc 3. Me 4. Zl - * 5. Zp 6. Cc 7. Cf 8. Cs - * these default to zero width - */ -enum -{ - /* width ? */ - WidthNeutral, /* (N) practially treated like narrow but unclear ... */ - WidthAmbiguous, /* (A) sometimes wide and sometimes not... */ - /* width 1 */ - WidthHalf, /* (H) = to narrow (compatability equivalent) */ - WidthNarrow, /* (Na) ASCII width */ - /* width 2 */ - WidthWide, /* (W) 2x width */ - WidthFull, /* (F) = to wide (compatability equivalent) */ -}; - -struct Table -{ - char width[3][NumRunes]; -}; - -static struct Table table; - -// ----------------------------------------------------------------------- -// internal functions - -static -void -parse_category(char *path) -{ - int sc, c, ec, w; - io·Stream *utf8; - char *prop, *field[NumFields], line[1024]; - - if(!(utf8 = io·open(path, "r"))) - panicf("can't open %s\n", path); - - // NOTE: we don't check for comments here - ec = -1; - while(parse(utf8, arrlen(field), field, arrlen(line), line)){ - codepointrange(utf8, field, &sc, &ec); - - prop = field[Fcategory]; - - switch(prop[0]){ - case 'M': - switch(prop[1]){ - case 'n': case 'c': case 'e': - w = 0; - break; - default: - w = 1; - break; - } - break; - case 'Z': - switch(prop[1]){ - case 'l': case 'p': - w = 0; - break; - default: - w = 1; - break; - } - break; - case 'C': - switch(prop[1]){ - case 'c': case 'f': case 's': - w = 0; - break; - default: - w = 1; - break; - } - default: - w = 1; - } - - for(c = sc; c <= ec; c++) - table.width[w][c] = 1; - } - - io·close(utf8); -} - -static -void -coderange(char *field, int *l, int *r) -{ - char *s; - - if(!(s = strstr(field, ".."))) - *l=*r=codepoint(field); - else{ - *s++ = 0, *s++ = 0; - *l=codepoint(field); - *r=codepoint(s); - } -} - -static -void -parse_eawidths(char *path) -{ - int at, w; - int l, c, r; - io·Stream *utf8; - char *field[2], line[1024]; - - utf8 = io·open(path, "r"); - while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ - if(at == ParseSkip) - continue; - - switch(field[1][0]){ - case 'A': continue; - case 'N': - if(field[1][1] != 'a') - continue; - /* fallthrough */ - case 'H': w = 1; break; - - case 'W': /* fallthrough */ - case 'F': w = 2; break; - - default: - panicf("malformed east asian width class: %s\n", field[1]); - } - - coderange(field[0], &l, &r); - - for(c=l; c <= r; c++){ - /* ensure it only exists in one table */ - table.width[w][c] = 1; - table.width[(w+1)%3][c] = 0; - table.width[(w+2)%3][c] = 0; - } - } - io·close(utf8); -} - -static -void -parse_emoji(char *path) -{ - int at, w; - int l, c, r; - io·Stream *utf8; - char *s, *field[2], line[1024]; - - utf8 = io·open(path, "r"); - while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){ - if(at == ParseSkip) - continue; - - /* only override emoji presentation */ - if(!strstr(field[1], "Emoji_Presentation")) - continue; - - /* trim trailing space */ - for(s=field[0]; *s; s++){ - if(*s == ' ') - *s = 0; - } - - coderange(field[0], &l, &r); - - for(c=l; c <= r; c++){ - table.width[0][c] = 0; - table.width[1][c] = 0; - table.width[2][c] = 1; - } - } - - io·close(utf8); -} - -/* output functions */ -static -void -maketable(char *label, char *table, int pairs, int onlyranges) -{ - int r, p=0, s=0; - char ident[3][128]; - - enum - { - Irange, - Ipair, - Isingle, - }; - - /* ranges */ - if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange])) - panicf("out of identifier space\n"); - r = putrange(ident[Irange], table, onlyranges); - - if(!onlyranges && pairs){ - if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair])) - panicf("out of identifier space\n"); - p = putpair(ident[Ipair], table); - } - if(!onlyranges){ - if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle])) - panicf("out of identifier space\n"); - - s = putsingle(ident[Isingle], table); - } - - printf( - "static int\n" - "is%s(rune c)\n" - "{\n" - " rune *p;\n" - "\n", - label); - - if(r){ - printf( - " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1])\n" - " return 1;\n", - ident[Irange], ident[Irange]); - } - - if(p){ - printf( - " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n" - " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n" - " return 1;\n", - ident[Ipair], ident[Ipair]); - } - - if(s) - printf( - " p = rangesearch(c, %s, arrlen(%s), 1);\n" - " if(p && c == p[0])\n" - " return 1;\n", - ident[Isingle], ident[Isingle]); - - printf( - " return 0;\n" - "}\n" - "\n"); -} - -// ----------------------------------------------------------------------- -// main point of entry - -static -void -usage(void) -{ - fprintf(stderr, "usage: mkrunewidth \n"); - exit(1); -} - -#define SETW0(c) \ - table.width[0][(c)] = 1, \ - table.width[1][(c)] = 0, \ - table.width[2][(c)] = 0; - -#define SETW1(c) \ - table.width[0][(c)] = 0, \ - table.width[1][(c)] = 1, \ - table.width[2][(c)] = 0; - -#define SETW2(c) \ - table.width[0][(c)] = 0, \ - table.width[1][(c)] = 0, \ - table.width[2][(c)] = 1; - - -int -main(int argc, char *argv[]) -{ - int c; - - ARGBEGIN{ - }ARGEND; - - if(argc != 3) - usage(); - - parse_category(*argv++); - parse_eawidths(*argv++); - parse_emoji(*argv); - - /* overrides */ - SETW0(0x2028); - SETW0(0x2029); - - SETW1(0x00AD); - - /* simple checking */ - for(c=0; c 1) - panicf("improper table state"); - } - - putsearch(); - - maketable("width0", table.width[0], 1, 0); - maketable("width1", table.width[1], 1, 0); - maketable("width2", table.width[2], 1, 0); - - puts( - "\n" - "int\n" - "utf8·runewidth(rune c)\n" - "{\n" - " if(iswidth1(c))\n" - " return 1;\n" - " if(iswidth2(c))\n" - " return 2;\n" - " return 0;\n" - "}" - ); -} diff --git a/src/rules.mk b/src/rules.mk index 9bb61ae..368479c 100644 --- a/src/rules.mk +++ b/src/rules.mk @@ -8,12 +8,6 @@ include $(DIR)/rules.mk DIR := $(d)/base include $(DIR)/rules.mk -DIR := $(d)/libutf -include $(DIR)/rules.mk - -DIR := $(d)/libfmt -include $(DIR)/rules.mk - DIR := $(d)/libmath include $(DIR)/rules.mk -- cgit v1.2.1