From fa25c8f3df6791727b9384c9b405c996ac68b8ab Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 15 May 2020 18:11:58 -0700 Subject: feat: added buffered io to libn --- Makefile | 11 +-- bin/initmk | 14 ++- compile_commands.json | 235 +++++++++++++++++++++++++++++++-------------- include/libn.h | 54 +++++++++-- include/libn/macro/qsort.h | 2 +- rules.mk | 6 +- share/paths.mk | 2 +- sys/libbio/test.c | 18 ---- sys/libc/rules.mk | 2 +- sys/libc/stdio.c | 4 +- sys/libn/.generated/utf8.c | 52 +++++----- sys/libn/bufio.c | 206 +++++++++++++++++++++++++++++++++++---- sys/libn/string.c | 14 +-- 13 files changed, 454 insertions(+), 166 deletions(-) diff --git a/Makefile b/Makefile index ecd8408..be2ff44 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Compiler, Linker, and Assembler -CC := clang +CC := gcc AR := ar AS := nasm @@ -17,8 +17,7 @@ CFINI := `gcc --print-file-name=crtendS.o` $(LIB_DIR)/crt/x86_64/crtn.o # Flags, Libraries and Includes CFLAGS := -g -march=native \ - -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions \ - -Wno-microsoft-anon-tag -Wno-incompatible-function-pointer-types + -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions STATIC := -static -nodefaultlibs -nostartfiles AFLAGS := -f elf64 INCS := -isystem $(INC_DIR)/vendor/libc -I $(INC_DIR) @@ -26,11 +25,11 @@ ELIBS := -L$(LIB_DIR) -lc # Named generic rules (must be evaluated lazily) COMPILE = @echo "CC "$^;\ - $(CC) -MMD $(CFLAGS) $(TCFLAGS) $(INCS) $(TCINCS) -o $@ -c $< + $(CC) -MD $(CFLAGS) $(TCFLAGS) $(INCS) $(TCINCS) -o $@ -c $< LINK = @echo "LD "$@;\ - $(CC) -MMD $(CFLAGS) $(STATIC) $(TCFLAGS) -o $@ $(CINIT) $^ $(CFINI) $(TCLIBS) $(ELIBS) + $(CC) -MD $(CFLAGS) $(STATIC) $(TCFLAGS) -o $@ $(CINIT) $^ $(CFINI) $(TCLIBS) $(ELIBS) COMPLINK = @echo "LD "$@;\ - $(CC) -MMD $(CFLAGS) $(STATIC) $(TCFLAGS) $u$(INCS) $(TCINCS) -o $@ $(CINIT) $^ $(CFINI) $(TCLIBS) $(ELIBS) + $(CC) -MD $(CFLAGS) $(STATIC) $(TCFLAGS) $(INCS) $(TCINCS) -o $@ $(CINIT) $^ $(CFINI) $(TCLIBS) $(ELIBS) ASSEMBLE = @echo "AS "$^;\ $(AS) $(AFLAGS) $(TCFLAGS) -o $@ $< ARCHIVE = @echo "AR "$@;\ diff --git a/bin/initmk b/bin/initmk index 77fcfd7..141764d 100755 --- a/bin/initmk +++ b/bin/initmk @@ -1,5 +1,6 @@ #!/bin/python +import os import sys NAME = "rules.mk" @@ -10,7 +11,7 @@ TEMPLATE = """include share/push.mk SRCS_$(d) := $(wildcard $(d)/*.c) LIBS_$(d) := BINS_$(d) := -TSTS_$(d) := +TSTS_$(d) := $(wildcard $(d)/*_test.c:.c=.test) include share/pop.mk @@ -28,8 +29,17 @@ $(BINS_$(d)): $(OBJS_$(d)) include share/pop.mk""" if __name__ == "__main__": + if len(sys.argv) == 2: + dir = sys.argv[1] + if not os.path.exists(dir): + raise ValueError(f"path '{dir}' does not exist") + path = f"{dir}/{NAME}" + elif len(sys.argv) > 2: + raise ValueError("only one argument is accepted") + else: + path = NAME try: - with open(NAME, 'x') as makefile: + with open(path, 'x') as makefile: makefile.write(f"{TEMPLATE}\n") except: print("rules.mk already present", file=sys.stderr) diff --git a/compile_commands.json b/compile_commands.json index cbb7712..0b9f71c 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -16,11 +16,11 @@ "-I", "include", "-o", - "build/libn/coro.o", - "sys/libn/coro.c" + "build/libn/random.o", + "sys/libn/random.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/coro.c" + "file": "sys/libn/random.c" }, { "arguments": [ @@ -34,16 +34,17 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libn/memory.o", - "sys/libn/memory.c" + "build/libmath/linalg.o", + "sys/libmath/linalg.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/memory.c" + "file": "sys/libmath/linalg.c" }, { "arguments": [ @@ -57,19 +58,40 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libc/string.o", - "sys/libc/string.c" + "build/libmath/blas1.o", + "sys/libmath/blas1.c" ], "directory": "/home/nolln/root", - "file": "sys/libc/string.c" + "file": "sys/libmath/blas1.c" + }, + { + "arguments": [ + "clang", + "-c", + "-g", + "-march=native", + "-ffast-math", + "-fno-strict-aliasing", + "-fwrapv", + "-fms-extensions", + "-Wno-microsoft-anon-tag", + "-Wno-incompatible-function-pointer-types", + "-isystem", + "include/vendor/libc", + "-I", + "include", + "-o", + "build/libn/coro.o", + "sys/libn/coro.c" + ], + "directory": "/home/nolln/root", + "file": "sys/libn/coro.c" }, { "arguments": [ @@ -89,11 +111,11 @@ "-I", "include", "-o", - "build/libmath/lapack.o", - "sys/libmath/lapack.c" + "build/libmath/basic.o", + "sys/libmath/basic.c" ], "directory": "/home/nolln/root", - "file": "sys/libmath/lapack.c" + "file": "sys/libmath/basic.c" }, { "arguments": [ @@ -112,11 +134,11 @@ "-I", "include", "-o", - "build/libn/random.o", - "sys/libn/random.c" + "build/libn/io.o", + "sys/libn/io.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/random.c" + "file": "sys/libn/io.c" }, { "arguments": [ @@ -135,11 +157,11 @@ "-I", "include", "-o", - "build/libn/test.o", - "sys/libn/test.c" + "build/libn/mmap.o", + "sys/libn/mmap.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/test.c" + "file": "sys/libn/mmap.c" }, { "arguments": [ @@ -153,17 +175,42 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libmath/blas.o", - "sys/libmath/blas.c" + "build/libbio/simulate.o", + "sys/libbio/simulate.c" ], "directory": "/home/nolln/root", - "file": "sys/libmath/blas.c" + "file": "sys/libbio/simulate.c" + }, + { + "arguments": [ + "clang", + "-c", + "-g", + "-march=native", + "-ffast-math", + "-fno-strict-aliasing", + "-fwrapv", + "-fms-extensions", + "-Wno-microsoft-anon-tag", + "-Wno-incompatible-function-pointer-types", + "-ffreestanding", + "-fno-builtin", + "-nostdlib", + "-isystem", + "include/vendor/libc", + "-I", + "include", + "-o", + "build/libc/string.o", + "sys/libc/string.c" + ], + "directory": "/home/nolln/root", + "file": "sys/libc/string.c" }, { "arguments": [ @@ -201,16 +248,17 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libbio/test.o", - "sys/libbio/test.c" + "build/libmath/blas2.o", + "sys/libmath/blas2.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/test.c" + "file": "sys/libmath/blas2.c" }, { "arguments": [ @@ -229,11 +277,11 @@ "-I", "include", "-o", - "build/libn/io.o", - "sys/libn/io.c" + "build/libn/test.o", + "sys/libn/test.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/io.c" + "file": "sys/libn/test.c" }, { "arguments": [ @@ -252,11 +300,11 @@ "-I", "include", "-o", - "build/libbio/phylo.o", - "sys/libbio/phylo.c" + "build/libn/gz.o", + "sys/libn/gz.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/phylo.c" + "file": "sys/libn/gz.c" }, { "arguments": [ @@ -275,11 +323,11 @@ "-I", "include", "-o", - "build/libn/sort.o", - "sys/libn/sort.c" + "build/libn/memory.o", + "sys/libn/memory.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/sort.c" + "file": "sys/libn/memory.c" }, { "arguments": [ @@ -293,19 +341,16 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libc/stdio.o", - "sys/libc/stdio.c" + "build/libbio/io/fasta.o", + "sys/libbio/io/fasta.c" ], "directory": "/home/nolln/root", - "file": "sys/libc/stdio.c" + "file": "sys/libbio/io/fasta.c" }, { "arguments": [ @@ -324,11 +369,11 @@ "-I", "include", "-o", - "build/libn/gz.o", - "sys/libn/gz.c" + "build/libn/bufio.o", + "sys/libn/bufio.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/gz.c" + "file": "sys/libn/bufio.c" }, { "arguments": [ @@ -342,16 +387,17 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libn/error.o", - "sys/libn/error.c" + "build/libmath/blas.o", + "sys/libmath/blas.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/error.c" + "file": "sys/libmath/blas.c" }, { "arguments": [ @@ -370,11 +416,11 @@ "-I", "include", "-o", - "build/libn/bufio.o", - "sys/libn/bufio.c" + "build/libn/error.o", + "sys/libn/error.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/bufio.c" + "file": "sys/libn/error.c" }, { "arguments": [ @@ -393,11 +439,11 @@ "-I", "include", "-o", - "build/libbio/align.o", - "sys/libbio/align.c" + "build/libn/string.o", + "sys/libn/string.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/align.c" + "file": "sys/libn/string.c" }, { "arguments": [ @@ -416,11 +462,11 @@ "-I", "include", "-o", - "build/libbio/io/newick.o", - "sys/libbio/io/newick.c" + "build/libn/sort.o", + "sys/libn/sort.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/io/newick.c" + "file": "sys/libn/sort.c" }, { "arguments": [ @@ -434,16 +480,19 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-ffreestanding", + "-fno-builtin", + "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libn/mmap.o", - "sys/libn/mmap.c" + "build/libc/stdio.o", + "sys/libc/stdio.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/mmap.c" + "file": "sys/libc/stdio.c" }, { "arguments": [ @@ -485,11 +534,11 @@ "-I", "include", "-o", - "build/libbio/io/fasta.o", - "sys/libbio/io/fasta.c" + "build/libbio/io/newick.o", + "sys/libbio/io/newick.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/io/fasta.c" + "file": "sys/libbio/io/newick.c" }, { "arguments": [ @@ -508,11 +557,11 @@ "-I", "include", "-o", - "build/libbio/simulate.o", - "sys/libbio/simulate.c" + "build/libbio/test.o", + "sys/libbio/test.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/simulate.c" + "file": "sys/libbio/test.c" }, { "arguments": [ @@ -531,10 +580,56 @@ "-I", "include", "-o", - "build/libn/string.o", - "sys/libn/string.c" + "build/libbio/phylo.o", + "sys/libbio/phylo.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/string.c" + "file": "sys/libbio/phylo.c" + }, + { + "arguments": [ + "clang", + "-c", + "-g", + "-march=native", + "-ffast-math", + "-fno-strict-aliasing", + "-fwrapv", + "-fms-extensions", + "-Wno-microsoft-anon-tag", + "-Wno-incompatible-function-pointer-types", + "-isystem", + "include/vendor/libc", + "-I", + "include", + "-o", + "build/libbio/align.o", + "sys/libbio/align.c" + ], + "directory": "/home/nolln/root", + "file": "sys/libbio/align.c" + }, + { + "arguments": [ + "clang", + "-c", + "-g", + "-march=native", + "-ffast-math", + "-fno-strict-aliasing", + "-fwrapv", + "-fms-extensions", + "-Wno-microsoft-anon-tag", + "-Wno-incompatible-function-pointer-types", + "-isystem", + "include/vendor/libc", + "-I", + "include", + "-o", + "build/cmd/cat/cat.o", + "sys/cmd/cat/cat.c" + ], + "directory": "/home/nolln/root", + "file": "sys/cmd/cat/cat.c" } ] \ No newline at end of file diff --git a/include/libn.h b/include/libn.h index 4efe3c1..c57868c 100644 --- a/include/libn.h +++ b/include/libn.h @@ -120,7 +120,7 @@ typedef struct str·Hdr // Perhaps break into own unit // TODO: Add to(upper|lower|title) -typedef uint32 Rune; +typedef uint32 rune; enum { @@ -134,14 +134,14 @@ enum /* utf8 helpers */ int utf8·fullrune(byte *s, int n); byte *utf8·findrune(byte *s, long i); -int utf8·chartorune(Rune *r, byte *s); -int utf8·runetochar(byte *s, Rune *r); +int utf8·chartorune(rune *r, byte *s); +int utf8·runetochar(byte *s, rune *r); int utf8·len(byte *s); -int utf8·runelen(Rune r); -int utf8·isletter(Rune r); -int utf8·isdigit(Rune r); -int utf8·isspace(Rune r); -int utf8·istitle(Rune r); +int utf8·runelen(rune r); +int utf8·isletter(rune r); +int utf8·isdigit(rune r); +int utf8·isspace(rune r); +int utf8·istitle(rune r); /* string helpers */ string str·makecap(const byte *s, vlong len, vlong cap); @@ -240,6 +240,44 @@ typedef struct io·ReadWriter io·Writer; } io·ReadWriter; +/* buffered i/o */ +typedef struct io·Buffer io·Buffer; + +enum +{ + bufio·size = 2*4096, + bufio·ungets = 8, + bufio·eof = -1, + bufio·err = -2, + + bufio·nil = 1 << 0, + bufio·rdr = 1 << 1, + bufio·wtr = 1 << 2, + bufio·end = 1 << 3, +}; + +struct io·Buffer +{ + int state; + int runesize; + void *h; + union { + io·Reader rdr; + io·Writer wtr; + }; + vlong size; + byte *beg, *pos, *end; + byte buf[bufio·size + bufio·ungets]; +}; + +error bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h); +void bufio·finireader(io·Buffer *buf); +int bufio·getbyte(io·Buffer *buf); +error bufio·ungetbyte(io·Buffer *buf, byte c); +rune bufio·getrune(io·Buffer *buf); +error bufio·ungetrune(io·Buffer *buf, rune r); +int bufio·read(io·Buffer *buf, int sz, int n, void *out); + // ----------------------------------------------------------------------------- // memory mapped files diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h index 2ff964a..6d0acaa 100644 --- a/include/libn/macro/qsort.h +++ b/include/libn/macro/qsort.h @@ -88,4 +88,4 @@ ENDOUTER: \ for (j = i; j > 0 && QLESS(j, j-1); j--) { \ QSWAP(j, j-1); \ } \ - } \ + } diff --git a/rules.mk b/rules.mk index 5d19d61..7e4a911 100644 --- a/rules.mk +++ b/rules.mk @@ -50,10 +50,10 @@ clean: install: targets @echo installing executables - @if [ -n "$$BINS" ]; then\ - mv $(BINS) $(BIN_DIR); \ + @if [ -n $$BINS ]; then\ + cp $(BINS) $(BIN_DIR); \ fi @echo installing libraries - @if [ -n "$$LIBS" ]; then\ + @if [ -n $$LIBS ]; then\ cp $(LIBS) $(LIB_DIR); \ fi diff --git a/share/paths.mk b/share/paths.mk index fe706a9..e4fabcc 100644 --- a/share/paths.mk +++ b/share/paths.mk @@ -9,7 +9,7 @@ DEPS := $(DEPS) $(DEPS_$(d)) LIBS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(LIBS_$(d))) LIBS := $(LIBS) $(LIBS_$(d)) -BINS_$(d) := $(patsubst $(SRC_DIR)/%, $(BIN_DIR)/%, $(BINS_$(d))) +BINS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(BINS_$(d))) BINS := $(BINS) $(BINS_$(d)) TSTS_$(d) := $(patsubst $(SRC_DIR)/%, $(TST_DIR)/%, $(TSTS_$(d))) diff --git a/sys/libbio/test.c b/sys/libbio/test.c index da29c84..b7cbae1 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -3,7 +3,6 @@ #include #include -#include "kseq.h" // ----------------------------------------------------------------------- // Global data @@ -83,8 +82,6 @@ my_read(Stream *s, void *buf, int n) return io·read(s, 1, n, buf); } -KSEQ_INIT(Stream*, my_read) - // ----------------------------------------------------------------------- // Point of entry for testing @@ -198,23 +195,8 @@ test·fastq() clock_t t; - int n, slen; - kseq_t *kseq; - fd = io·open("/home/nolln/root/data/test/eg.fq", "r"); - t = clock(); - kseq = kseq_init(fd); - while (kseq_read(kseq) >= 0) { - ++n, slen += kseq->seq.l; - } - t = clock() - t; - printf("heng's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); - - kseq_destroy(kseq); - - io·seek(fd, 0, seek·set); - rdr = bio·openfastq((io·Reader){.read = &io·read}, fd, mem·sys, nil); t = clock(); diff --git a/sys/libc/rules.mk b/sys/libc/rules.mk index f017738..96d4202 100644 --- a/sys/libc/rules.mk +++ b/sys/libc/rules.mk @@ -4,7 +4,7 @@ include share/push.mk # Local sources SRCS_$(d) := $(wildcard $(d)/*.c) -LIBS_$(d) := $(d)/libc.a +LIBS_$(d) := $(d)/libc_n.a BINS_$(d) := include share/paths.mk diff --git a/sys/libc/stdio.c b/sys/libc/stdio.c index f3295b2..8bbbe9a 100644 --- a/sys/libc/stdio.c +++ b/sys/libc/stdio.c @@ -8,7 +8,7 @@ printf(byte* fmt, ...) va_start(args, fmt); int nw, rem, peek, len; - byte* str; + byte *str, c; while (*fmt) { rem = INT_MAX - nw; @@ -30,7 +30,7 @@ printf(byte* fmt, ...) switch (*fmt++) { case 'c': - byte c = va_arg(args, int); + c = va_arg(args, int); if (rem < 0) return -1; // TODO: Print here nw++; diff --git a/sys/libn/.generated/utf8.c b/sys/libn/.generated/utf8.c index 862f100..e101e1a 100644 --- a/sys/libn/.generated/utf8.c +++ b/sys/libn/.generated/utf8.c @@ -1,7 +1,7 @@ -Rune* -rbsearch(Rune c, Rune* t, int n, int nelem) +rune* +rbsearch(rune c, rune* t, int n, int nelem) { - Rune* p; + rune* p; int m; while (n > 1) { @@ -19,18 +19,18 @@ rbsearch(Rune c, Rune* t, int n, int nelem) else return 0; } -static Rune isspace_rtab[] = { +static rune isspace_rtab[] = { 0x0009, 0x000c, 0x2000, 0x200a, 0x2028, 0x2029, }; -static Rune isspace_stab[] = { +static rune isspace_stab[] = { 0x0020, 0x0085, 0x00a0, 0x1680, 0x202f, 0x205f, 0x3000, }; int -utf8·IsSpace(Rune c) +utf8·isspace(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isspace_rtab, arrlen(isspace_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -40,7 +40,7 @@ utf8·IsSpace(Rune c) return 0; } -static Rune islower_rtab[] = { +static rune islower_rtab[] = { 0x0061, 0x007a, 0x00df, 0x00f6, 0x00f8, 0x00ff, 0x0137, 0x0138, 0x0148, 0x0149, 0x017e, 0x0180, 0x018c, 0x018d, 0x0199, 0x019b, 0x01aa, 0x01ab, 0x01b9, 0x01ba, 0x01bd, 0x01bf, 0x01dc, 0x01dd, 0x01ef, 0x01f0, 0x0233, 0x0239, 0x023f, 0x0240, @@ -64,7 +64,7 @@ static Rune islower_rtab[] = { 0x1d78a, 0x1d78f, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7c9, }; -static Rune islower_stab[] = { +static rune islower_stab[] = { 0x00b5, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, 0x0131, 0x0133, 0x0135, 0x013a, 0x013c, 0x013e, 0x0140, 0x0142, @@ -117,9 +117,9 @@ static Rune islower_stab[] = { }; int -utf8·IsLower(Rune c) +utf8·islower(rune c) { - Rune* p; + rune* p; p = rbsearch(c, islower_rtab, arrlen(islower_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -129,7 +129,7 @@ utf8·IsLower(Rune c) return 0; } -static Rune isupper_rtab[] = { +static rune isupper_rtab[] = { 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, 0x0178, 0x0179, 0x0181, 0x0182, 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x0191, 0x0193, 0x0194, 0x0196, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0, 0x01a6, 0x01a7, 0x01ae, 0x01af, 0x01b1, 0x01b3, @@ -150,7 +150,7 @@ static Rune isupper_rtab[] = { 0x1d6a8, 0x1d6c0, 0x1d6e2, 0x1d6fa, 0x1d71c, 0x1d734, 0x1d756, 0x1d76e, 0x1d790, 0x1d7a8, }; -static Rune isupper_stab[] = { +static rune isupper_stab[] = { 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, @@ -204,9 +204,9 @@ static Rune isupper_stab[] = { }; int -utf8·IsUpper(Rune c) +utf8·isupper(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isupper_rtab, arrlen(isupper_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -216,18 +216,18 @@ utf8·IsUpper(Rune c) return 0; } -static Rune istitle_rtab[] = { +static rune istitle_rtab[] = { 0x1f88, 0x1f8f, 0x1f98, 0x1f9f, 0x1fa8, 0x1faf, }; -static Rune istitle_stab[] = { +static rune istitle_stab[] = { 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1fbc, 0x1fcc, }; int -utf8·IsTitle(Rune c) +utf8·istitle(rune c) { - Rune* p; + rune* p; p = rbsearch(c, istitle_rtab, arrlen(istitle_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -237,7 +237,7 @@ utf8·IsTitle(Rune c) return 0; } -static Rune isletter_rtab[] = { +static rune isletter_rtab[] = { 0x0041, 0x005a, 0x0061, 0x007a, 0x00c0, 0x00d6, 0x00d8, 0x00f6, 0x00f8, 0x02c1, 0x02c6, 0x02d1, 0x02e0, 0x02e4, 0x0370, 0x0374, 0x0376, 0x0377, 0x037a, 0x037d, 0x0388, 0x038a, 0x038e, 0x03a1, 0x03a3, 0x03f5, 0x03f7, 0x0481, 0x048a, 0x052f, @@ -333,7 +333,7 @@ static Rune isletter_rtab[] = { 0x1ee80, 0x1ee89, 0x1ee8b, 0x1ee9b, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, }; -static Rune isletter_stab[] = { +static rune isletter_stab[] = { 0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5, 0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2, 0x09bd, 0x09ce, 0x09fc, 0x0a5e, 0x0abd, 0x0ad0, 0x0af9, 0x0b3d, 0x0b71, 0x0b83, @@ -352,9 +352,9 @@ static Rune isletter_stab[] = { }; int -utf8·IsLetter(Rune c) +utf8·isletter(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isletter_rtab, arrlen(isletter_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; @@ -364,7 +364,7 @@ utf8·IsLetter(Rune c) return 0; } -static Rune isdigit_rtab[] = { +static rune isdigit_rtab[] = { 0x0030, 0x0039, 0x0660, 0x0669, 0x06f0, 0x06f9, 0x07c0, 0x07c9, 0x0966, 0x096f, 0x09e6, 0x09ef, 0x0a66, 0x0a6f, 0x0ae6, 0x0aef, 0x0b66, 0x0b6f, 0x0be6, 0x0bef, 0x0c66, 0x0c6f, 0x0ce6, 0x0cef, 0x0d66, 0x0d6f, 0x0de6, 0x0def, 0x0e50, 0x0e59, @@ -380,9 +380,9 @@ static Rune isdigit_rtab[] = { }; int -utf8·IsDigit(Rune c) +utf8·isdigit(rune c) { - Rune* p; + rune* p; p = rbsearch(c, isdigit_rtab, arrlen(isdigit_rtab) / 2, 2); if (p && c >= p[0] && c <= p[1]) return 1; diff --git a/sys/libn/bufio.c b/sys/libn/bufio.c index 6b15760..38714a5 100644 --- a/sys/libn/bufio.c +++ b/sys/libn/bufio.c @@ -1,33 +1,197 @@ #include #include -enum +// ----------------------------------------------------------------------- +// reader + +error +bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h) +{ + if (buf->state) { + errorf("attemped to initialize an active buffer, state is '%d'", buf->state); + return bufio·err; + } + buf->state = bufio·rdr; + buf->runesize = 0; + buf->h = h; + buf->rdr = rdr; + buf->beg = buf->buf + bufio·ungets; + buf->pos = buf->beg; + buf->end = buf->pos; + buf->size = buf->end - buf->beg; + + return 0; +} + +void +bufio·finireader(io·Buffer *buf) +{ + buf->state = bufio·nil; + buf->runesize = 0; + buf->rdr = (io·Reader){ .read = nil }; +} + +static +int +refill(io·Buffer *buf) +{ + int n, d; + + if (buf->state & bufio·end) { + return bufio·err; + } + + n = buf->rdr.read(buf->h, 1, buf->size, buf->buf); + if (n < 0) + return bufio·err; + if (n == 0) { + buf->state |= bufio·end; + return 0; + } + + if (n < buf->size) { + d = buf->size - n; + + buf->state |= bufio·end; + + memmove(buf->pos + d, buf->pos, n); + memmove(buf->pos + d - bufio·ungets, buf->buf, bufio·ungets); + } + + return n; +} + +int +bufio·getbyte(io·Buffer *buf) +{ +getbyte: + if (buf->pos < buf->end) { + return *buf->pos++; + } + + memmove(buf->buf, buf->end - bufio·ungets, bufio·ungets); + + if (refill(buf) <= 0) + return bufio·eof; + + goto getbyte; +} + +error +bufio·ungetbyte(io·Buffer *buf, byte c) { - BUF·size = 8 * 2048, - BUF·ungets = 8, + buf->state ^= bufio·end; + if (buf->state & bufio·rdr) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } - BUF·bad = -2, - BUF·eof = -1, + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } - BUF·inactive = 0, - BUF·rdractive, - BUF·wtractive, + if (c != *buf->pos) { + errorf("unget char does not match"); + return bufio·err; + } - BUF·END, -} bmode; + buf->pos--; + return 0; +} -typedef struct Buffer +rune +bufio·getrune(io·Buffer *buf) { - uint8 state; - vlong off; - vlong size; + ubyte b; + int i; + byte str[UTFmax+1]; + rune r; + + // NOTE: I'm worried about the sign here... + b = bufio·getbyte(buf); + if (b < RuneSelf) { + buf->runesize = 1; + return b; + } + + i = 0; + str[i++] = b; + +nextbyte: + b = bufio·getbyte(buf); + if (b < 0) return b; + if (i >= arrlen(str)) return RuneErr; + str[i++] = b; + if (!utf8·fullrune(str, i)) + goto nextbyte; - byte *bbuf, *ebuf; - byte b[BUF·size + BUF·ungets]; -} Buffer; + buf->runesize = utf8·chartorune(&r, str); + if (r == RuneErr && b == 1) { + errorf("illegal UTF-8 sequence"); + for (; i >= 0; i--) + errorf("%s%.2x", i > 0 ? " " : "", *(ubyte*)(str+i)); + errorf("\n"); -struct bufio·Stream + buf->runesize = 0; + } else + for (; i > buf->runesize; i--) + bufio·ungetbyte(buf, str[i]); + + return r; +} + +// TODO: Check that we are given the correct rune! +error +bufio·ungetrune(io·Buffer *buf, rune r) { - Stream *s; - Buffer buf; -}; + buf->state ^= bufio·end; + if (buf->state & bufio·rdr) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } + + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } + + buf->pos -= buf->runesize; + return 0; +} + +int +bufio·read(io·Buffer *buf, int sz, int n, void *out) +{ + byte *wtr; + int nr, rem, diff; + + if (n == 0 || buf->state & bufio·end) + return bufio·err; + + assert(buf->state & bufio·rdr); + + wtr = out; + rem = n*sz; + while (rem > 0) { + diff = buf->end - buf->pos; + nr = MIN(diff, rem); + if (!nr) { + if (buf->state & bufio·end) + break; + if (refill(buf) <= 0) + break; + + continue; + } + memmove(wtr, buf->pos, nr); + wtr += nr; + buf->pos += nr; + rem -= nr; + } + + return n - rem/sz; +} + +// ----------------------------------------------------------------------- +// writer diff --git a/sys/libn/string.c b/sys/libn/string.c index ca53bdc..100c1fe 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -27,10 +27,10 @@ enum }; int -utf8·chartorune(Rune* r, byte* s) +utf8·chartorune(rune* r, byte* s) { int c[UTFmax], i; - Rune l; + rune l; c[0] = *(ubyte*)(s); if (c[0] < Tx) { @@ -65,10 +65,10 @@ bad: } int -utf8·runetochar(byte* s, Rune* r) +utf8·runetochar(byte* s, rune* r) { int i, j; - Rune c; + rune c; c = *r; if (c <= Rune1) { @@ -95,7 +95,7 @@ utf8·runetochar(byte* s, Rune* r) } int -utf8·runelen(Rune r) +utf8·runelen(rune r) { byte s[10]; return utf8·runetochar(s, &r); @@ -105,7 +105,7 @@ int utf8·fullrune(byte* s, int n) { int i; - Rune c; + rune c; if (n <= 0) return 0; c = *(ubyte*) s; @@ -122,7 +122,7 @@ byte* utf8·findrune(byte* s, long c) { long c1; - Rune r; + rune r; int n; if (c < RuneSync) return strchr(s, c); -- cgit v1.2.1