From fa25c8f3df6791727b9384c9b405c996ac68b8ab Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 15 May 2020 18:11:58 -0700 Subject: feat: added buffered io to libn --- sys/libbio/test.c | 18 ---- sys/libc/rules.mk | 2 +- sys/libc/stdio.c | 4 +- sys/libn/.generated/utf8.c | 52 ++++++------ sys/libn/bufio.c | 206 ++++++++++++++++++++++++++++++++++++++++----- sys/libn/string.c | 14 +-- 6 files changed, 221 insertions(+), 75 deletions(-) (limited to 'sys') diff --git a/sys/libbio/test.c b/sys/libbio/test.c index da29c84..b7cbae1 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -3,7 +3,6 @@ #include #include -#include "kseq.h" // ----------------------------------------------------------------------- // Global data @@ -83,8 +82,6 @@ my_read(Stream *s, void *buf, int n) return io·read(s, 1, n, buf); } -KSEQ_INIT(Stream*, my_read) - // ----------------------------------------------------------------------- // Point of entry for testing @@ -198,23 +195,8 @@ test·fastq() clock_t t; - int n, slen; - kseq_t *kseq; - fd = io·open("/home/nolln/root/data/test/eg.fq", "r"); - t = clock(); - kseq = kseq_init(fd); - while (kseq_read(kseq) >= 0) { - ++n, slen += kseq->seq.l; - } - t = clock() - t; - printf("heng's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); - - kseq_destroy(kseq); - - io·seek(fd, 0, seek·set); - rdr = bio·openfastq((io·Reader){.read = &io·read}, fd, mem·sys, nil); t = clock(); diff --git a/sys/libc/rules.mk b/sys/libc/rules.mk index f017738..96d4202 100644 --- a/sys/libc/rules.mk +++ b/sys/libc/rules.mk @@ -4,7 +4,7 @@ include share/push.mk # Local sources SRCS_$(d) := $(wildcard $(d)/*.c) -LIBS_$(d) := $(d)/libc.a +LIBS_$(d) := $(d)/libc_n.a BINS_$(d) := include share/paths.mk diff --git a/sys/libc/stdio.c b/sys/libc/stdio.c index f3295b2..8bbbe9a 100644 --- a/sys/libc/stdio.c +++ b/sys/libc/stdio.c @@ -8,7 +8,7 @@ printf(byte* fmt, ...) va_start(args, fmt); int nw, rem, peek, len; - byte* str; + byte *str, c; while (*fmt) { rem = INT_MAX - nw; @@ -30,7 +30,7 @@ printf(byte* fmt, ...) switch (*fmt++) { case 'c': - byte c = va_arg(args, int); + c = va_arg(args, int); if (rem < 0) return -1; // TODO: Print here nw++; diff --git a/sys/libn/.generated/utf8.c b/sys/libn/.generated/utf8.c index 862f100..e101e1a 100644 --- a/sys/libn/.generated/utf8.c +++ b/sys/libn/.generated/utf8.c @@ -1,7 +1,7 @@ -Rune* -rbsearch(Rune c, Rune* t, int n, int nelem) +rune* +rbsearch(rune c, rune* t, int n, int nelem) { - Rune* p; + rune* p; int m; while (n > 1) { @@ -19,18 +19,18 @@ rbsearch(Rune c, Rune* t, int n, int nelem) else return 0; } -static Rune isspace_rtab[] = { +static rune isspace_rtab[] = { 0x0009, 0x000c, 0x2000, 0x200a, 0x2028, 0x2029, }; -static Rune isspace_stab[] = { +static rune isspace_stab[] = { 0x0020, 0x0085, 0x00a0, 0x1680, 0x202f, 0x205f, 0x3000, }; int -utf8·IsSpace(Rune c) +utf8·isspace(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isspace_rtab, arrlen(isspace_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -40,7 +40,7 @@ utf8·IsSpace(Rune c) return 0; } -static Rune islower_rtab[] = { +static rune islower_rtab[] = { 0x0061, 0x007a, 0x00df, 0x00f6, 0x00f8, 0x00ff, 0x0137, 0x0138, 0x0148, 0x0149, 0x017e, 0x0180, 0x018c, 0x018d, 0x0199, 0x019b, 0x01aa, 0x01ab, 0x01b9, 0x01ba, 0x01bd, 0x01bf, 0x01dc, 0x01dd, 0x01ef, 0x01f0, 0x0233, 0x0239, 0x023f, 0x0240, @@ -64,7 +64,7 @@ static Rune islower_rtab[] = { 0x1d78a, 0x1d78f, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7c9, }; -static Rune islower_stab[] = { +static rune islower_stab[] = { 0x00b5, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, 0x0131, 0x0133, 0x0135, 0x013a, 0x013c, 0x013e, 0x0140, 0x0142, @@ -117,9 +117,9 @@ static Rune islower_stab[] = { }; int -utf8·IsLower(Rune c) +utf8·islower(rune c) { - Rune* p; + rune* p; p = rbsearch(c, islower_rtab, arrlen(islower_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -129,7 +129,7 @@ utf8·IsLower(Rune c) return 0; } -static Rune isupper_rtab[] = { +static rune isupper_rtab[] = { 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, 0x0178, 0x0179, 0x0181, 0x0182, 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x0191, 0x0193, 0x0194, 0x0196, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0, 0x01a6, 0x01a7, 0x01ae, 0x01af, 0x01b1, 0x01b3, @@ -150,7 +150,7 @@ static Rune isupper_rtab[] = { 0x1d6a8, 0x1d6c0, 0x1d6e2, 0x1d6fa, 0x1d71c, 0x1d734, 0x1d756, 0x1d76e, 0x1d790, 0x1d7a8, }; -static Rune isupper_stab[] = { +static rune isupper_stab[] = { 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, @@ -204,9 +204,9 @@ static Rune isupper_stab[] = { }; int -utf8·IsUpper(Rune c) +utf8·isupper(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isupper_rtab, arrlen(isupper_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -216,18 +216,18 @@ utf8·IsUpper(Rune c) return 0; } -static Rune istitle_rtab[] = { +static rune istitle_rtab[] = { 0x1f88, 0x1f8f, 0x1f98, 0x1f9f, 0x1fa8, 0x1faf, }; -static Rune istitle_stab[] = { +static rune istitle_stab[] = { 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1fbc, 0x1fcc, }; int -utf8·IsTitle(Rune c) +utf8·istitle(rune c) { - Rune* p; + rune* p; p = rbsearch(c, istitle_rtab, arrlen(istitle_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -237,7 +237,7 @@ utf8·IsTitle(Rune c) return 0; } -static Rune isletter_rtab[] = { +static rune isletter_rtab[] = { 0x0041, 0x005a, 0x0061, 0x007a, 0x00c0, 0x00d6, 0x00d8, 0x00f6, 0x00f8, 0x02c1, 0x02c6, 0x02d1, 0x02e0, 0x02e4, 0x0370, 0x0374, 0x0376, 0x0377, 0x037a, 0x037d, 0x0388, 0x038a, 0x038e, 0x03a1, 0x03a3, 0x03f5, 0x03f7, 0x0481, 0x048a, 0x052f, @@ -333,7 +333,7 @@ static Rune isletter_rtab[] = { 0x1ee80, 0x1ee89, 0x1ee8b, 0x1ee9b, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, }; -static Rune isletter_stab[] = { +static rune isletter_stab[] = { 0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5, 0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2, 0x09bd, 0x09ce, 0x09fc, 0x0a5e, 0x0abd, 0x0ad0, 0x0af9, 0x0b3d, 0x0b71, 0x0b83, @@ -352,9 +352,9 @@ static Rune isletter_stab[] = { }; int -utf8·IsLetter(Rune c) +utf8·isletter(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isletter_rtab, arrlen(isletter_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -364,7 +364,7 @@ utf8·IsLetter(Rune c) return 0; } -static Rune isdigit_rtab[] = { +static rune isdigit_rtab[] = { 0x0030, 0x0039, 0x0660, 0x0669, 0x06f0, 0x06f9, 0x07c0, 0x07c9, 0x0966, 0x096f, 0x09e6, 0x09ef, 0x0a66, 0x0a6f, 0x0ae6, 0x0aef, 0x0b66, 0x0b6f, 0x0be6, 0x0bef, 0x0c66, 0x0c6f, 0x0ce6, 0x0cef, 0x0d66, 0x0d6f, 0x0de6, 0x0def, 0x0e50, 0x0e59, @@ -380,9 +380,9 @@ static Rune isdigit_rtab[] = { }; int -utf8·IsDigit(Rune c) +utf8·isdigit(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isdigit_rtab, arrlen(isdigit_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; diff --git a/sys/libn/bufio.c b/sys/libn/bufio.c index 6b15760..38714a5 100644 --- a/sys/libn/bufio.c +++ b/sys/libn/bufio.c @@ -1,33 +1,197 @@ #include #include -enum +// ----------------------------------------------------------------------- +// reader + +error +bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h) +{ + if (buf->state) { + errorf("attemped to initialize an active buffer, state is '%d'", buf->state); + return bufio·err; + } + buf->state = bufio·rdr; + buf->runesize = 0; + buf->h = h; + buf->rdr = rdr; + buf->beg = buf->buf + bufio·ungets; + buf->pos = buf->beg; + buf->end = buf->pos; + buf->size = buf->end - buf->beg; + + return 0; +} + +void +bufio·finireader(io·Buffer *buf) +{ + buf->state = bufio·nil; + buf->runesize = 0; + buf->rdr = (io·Reader){ .read = nil }; +} + +static +int +refill(io·Buffer *buf) +{ + int n, d; + + if (buf->state & bufio·end) { + return bufio·err; + } + + n = buf->rdr.read(buf->h, 1, buf->size, buf->buf); + if (n < 0) + return bufio·err; + if (n == 0) { + buf->state |= bufio·end; + return 0; + } + + if (n < buf->size) { + d = buf->size - n; + + buf->state |= bufio·end; + + memmove(buf->pos + d, buf->pos, n); + memmove(buf->pos + d - bufio·ungets, buf->buf, bufio·ungets); + } + + return n; +} + +int +bufio·getbyte(io·Buffer *buf) +{ +getbyte: + if (buf->pos < buf->end) { + return *buf->pos++; + } + + memmove(buf->buf, buf->end - bufio·ungets, bufio·ungets); + + if (refill(buf) <= 0) + return bufio·eof; + + goto getbyte; +} + +error +bufio·ungetbyte(io·Buffer *buf, byte c) { - BUF·size = 8 * 2048, - BUF·ungets = 8, + buf->state ^= bufio·end; + if (buf->state & bufio·rdr) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } - BUF·bad = -2, - BUF·eof = -1, + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } - BUF·inactive = 0, - BUF·rdractive, - BUF·wtractive, + if (c != *buf->pos) { + errorf("unget char does not match"); + return bufio·err; + } - BUF·END, -} bmode; + buf->pos--; + return 0; +} -typedef struct Buffer +rune +bufio·getrune(io·Buffer *buf) { - uint8 state; - vlong off; - vlong size; + ubyte b; + int i; + byte str[UTFmax+1]; + rune r; + + // NOTE: I'm worried about the sign here... + b = bufio·getbyte(buf); + if (b < RuneSelf) { + buf->runesize = 1; + return b; + } + + i = 0; + str[i++] = b; + +nextbyte: + b = bufio·getbyte(buf); + if (b < 0) return b; + if (i >= arrlen(str)) return RuneErr; + str[i++] = b; + if (!utf8·fullrune(str, i)) + goto nextbyte; - byte *bbuf, *ebuf; - byte b[BUF·size + BUF·ungets]; -} Buffer; + buf->runesize = utf8·chartorune(&r, str); + if (r == RuneErr && b == 1) { + errorf("illegal UTF-8 sequence"); + for (; i >= 0; i--) + errorf("%s%.2x", i > 0 ? " " : "", *(ubyte*)(str+i)); + errorf("\n"); -struct bufio·Stream + buf->runesize = 0; + } else + for (; i > buf->runesize; i--) + bufio·ungetbyte(buf, str[i]); + + return r; +} + +// TODO: Check that we are given the correct rune! +error +bufio·ungetrune(io·Buffer *buf, rune r) { - Stream *s; - Buffer buf; -}; + buf->state ^= bufio·end; + if (buf->state & bufio·rdr) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } + + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } + + buf->pos -= buf->runesize; + return 0; +} + +int +bufio·read(io·Buffer *buf, int sz, int n, void *out) +{ + byte *wtr; + int nr, rem, diff; + + if (n == 0 || buf->state & bufio·end) + return bufio·err; + + assert(buf->state & bufio·rdr); + + wtr = out; + rem = n*sz; + while (rem > 0) { + diff = buf->end - buf->pos; + nr = MIN(diff, rem); + if (!nr) { + if (buf->state & bufio·end) + break; + if (refill(buf) <= 0) + break; + + continue; + } + memmove(wtr, buf->pos, nr); + wtr += nr; + buf->pos += nr; + rem -= nr; + } + + return n - rem/sz; +} + +// ----------------------------------------------------------------------- +// writer diff --git a/sys/libn/string.c b/sys/libn/string.c index ca53bdc..100c1fe 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -27,10 +27,10 @@ enum }; int -utf8·chartorune(Rune* r, byte* s) +utf8·chartorune(rune* r, byte* s) { int c[UTFmax], i; - Rune l; + rune l; c[0] = *(ubyte*)(s); if (c[0] < Tx) { @@ -65,10 +65,10 @@ bad: } int -utf8·runetochar(byte* s, Rune* r) +utf8·runetochar(byte* s, rune* r) { int i, j; - Rune c; + rune c; c = *r; if (c <= Rune1) { @@ -95,7 +95,7 @@ utf8·runetochar(byte* s, Rune* r) } int -utf8·runelen(Rune r) +utf8·runelen(rune r) { byte s[10]; return utf8·runetochar(s, &r); @@ -105,7 +105,7 @@ int utf8·fullrune(byte* s, int n) { int i; - Rune c; + rune c; if (n <= 0) return 0; c = *(ubyte*) s; @@ -122,7 +122,7 @@ byte* utf8·findrune(byte* s, long c) { long c1; - Rune r; + rune r; int n; if (c < RuneSync) return strchr(s, c); -- cgit v1.2.1