aboutsummaryrefslogtreecommitdiff
path: root/src/base/utf
diff options
context:
space:
mode:
Diffstat (limited to 'src/base/utf')
-rw-r--r--src/base/utf/rules.mk9
-rw-r--r--src/base/utf/vendor/common.c45
-rw-r--r--src/base/utf/vendor/common.h5
-rw-r--r--src/base/utf/vendor/mkgraphemedata.c1
-rw-r--r--src/base/utf/vendor/mkrunetype.c43
-rw-r--r--src/base/utf/vendor/mkrunewidth.c50
6 files changed, 73 insertions, 80 deletions
diff --git a/src/base/utf/rules.mk b/src/base/utf/rules.mk
index dfe2da1..554ba6a 100644
--- a/src/base/utf/rules.mk
+++ b/src/base/utf/rules.mk
@@ -20,9 +20,14 @@ NEED_OBJS=\
$(OBJ_DIR)/base/arg.o\
$(OBJ_DIR)/base/utf/decode.o\
$(OBJ_DIR)/base/error/panicf.o\
- $(OBJ_DIR)/base/io/readln.o\
+ $(OBJ_DIR)/base/io/readline.o\
+ $(OBJ_DIR)/base/io/readuntil.o\
$(OBJ_DIR)/base/io/open.o\
- $(OBJ_DIR)/base/io/close.o\
+ $(OBJ_DIR)/base/io/openfd.o\
+ $(OBJ_DIR)/base/io/flush.o\
+ $(OBJ_DIR)/base/io/init.o\
+ $(OBJ_DIR)/base/mem/move.o\
+ $(OBJ_DIR)/base/mem/findc.o\
$(d)/utf/vendor/common.o
$(d)/utf/vendor/common.o: $(d)/utf/vendor/common.c
diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c
index fcf1177..c35d022 100644
--- a/src/base/utf/vendor/common.c
+++ b/src/base/utf/vendor/common.c
@@ -4,36 +4,32 @@
// input functions
int
-parse(io·Stream *io, int nfield, char **field, int len, char *line)
+parse(io·Buffer *io, int nfield, char field[][FieldLen])
{
int n;
- if((n=io·readln(io, len, line)) <= 0)
- return ParseEOF;
-
- if(n == len)
- panicf("line too long");
-
- if(line[n-1] != '\n')
- panicf("invalid line: expected '\n', found '%c'", line[n]);
+ char *b, *e;
- line[n-1] = 0;
+ if(!(b=io·readline((io·Header*)io,1)))
+ return ParseEOF;
- if(line[0] == '#' || line[0] == 0)
+ if(b[0] == '#' || b[0] == 0)
return ParseSkip;
/* tokenize line into fields */
n = 0;
- field[n] = line;
- while(*line){
- if(*line == ';'){
- *line = 0;
- field[++n] = line+1;
- }
- line++;
+ e = b;
+ while(*e){
+ if(*e == ';'){
+ *e = 0;
+ strcpy(field[n++], b);
+ b = ++e;
+ }else
+ ++e;
}
+ strcpy(field[n++], b);
- if(n != nfield-1)
- panicf("expected %d number of fields, got %d: %s", nfield, n, line);
+ if(n != nfield)
+ panicf("expected %d number of fields, got %d: %s", nfield, n, b);
return ParseOK;
}
@@ -58,10 +54,10 @@ codepoint(char *s)
}
void
-codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
+codepointrange(io·Buffer *utf8, char field[NumFields][FieldLen], int *start, int *stop)
{
int e, c;
- char *other[NumFields], line[1024];
+ char other[NumFields][FieldLen];
// XXX: the stop variable passes in the previous stopping character
e = *stop;
@@ -74,7 +70,7 @@ codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
e = c;
if(strstr(field[Fname], ", First>") != nil){
- if(!parse(utf8, arrlen(other), other, arrlen(line), line))
+ if(!parse(utf8, arrlen(other), other))
panicf("range start at end of file");
if(strstr(other[Fname], ", Last>") == nil)
panicf("range start not followed by range end");
@@ -101,8 +97,7 @@ putsearch(void)
"#include <u.h>\n"
"#include <base/utf.h>\n"
"\n"
- "static\n"
- "rune*\n"
+ "static rune*\n"
"rangesearch(rune c, rune *t, int n, int ne)\n"
"{\n"
" rune *p;\n"
diff --git a/src/base/utf/vendor/common.h b/src/base/utf/vendor/common.h
index 51a53bd..95d7eaf 100644
--- a/src/base/utf/vendor/common.h
+++ b/src/base/utf/vendor/common.h
@@ -25,6 +25,7 @@ enum
NumFields,
NumRunes = 1 << 21,
};
+#define FieldLen 128
/* input functions */
enum
@@ -34,9 +35,9 @@ enum
ParseSkip,
};
-int parse(io·Stream *io, int nfield, char **field, int len, char *line);
+int parse(io·Buffer *io, int nfield, char field[][FieldLen]);
int codepoint(char *s);
-void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop);
+void codepointrange(io·Buffer *utf8, char field[NumFields][FieldLen], int *start, int *stop);
/* output functions */
void putsearch(void);
diff --git a/src/base/utf/vendor/mkgraphemedata.c b/src/base/utf/vendor/mkgraphemedata.c
index ce5a952..1d2394f 100644
--- a/src/base/utf/vendor/mkgraphemedata.c
+++ b/src/base/utf/vendor/mkgraphemedata.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <base.h>
-#include <libutf.h>
// -----------------------------------------------------------------------
// main point of entry
diff --git a/src/base/utf/vendor/mkrunetype.c b/src/base/utf/vendor/mkrunetype.c
index b33df32..3d75ce8 100644
--- a/src/base/utf/vendor/mkrunetype.c
+++ b/src/base/utf/vendor/mkrunetype.c
@@ -34,8 +34,7 @@ static struct Table table;
// -----------------------------------------------------------------------
// internal functions
-static
-int
+static int
isrange(char *label, char *prop, int force)
{
char ident[128];
@@ -45,8 +44,7 @@ isrange(char *label, char *prop, int force)
return putrange(ident, prop, force);
}
-static
-int
+static int
ispair(char *label, char *prop)
{
char ident[128];
@@ -56,8 +54,7 @@ ispair(char *label, char *prop)
return putpair(ident, prop);
}
-static
-int
+static int
issingle(char *label, char *prop)
{
char ident[128];
@@ -67,8 +64,7 @@ issingle(char *label, char *prop)
return putsingle(ident, prop);
}
-static
-void
+static void
makeis(char *label, char *table, int pairs, int onlyranges)
{
int hasr, hasp=0, hass=0;
@@ -116,8 +112,7 @@ makeis(char *label, char *table, int pairs, int onlyranges)
"\n");
}
-static
-int
+static int
torange(char *label, int *index, int force)
{
int l, r, d, start = 0;
@@ -154,8 +149,7 @@ torange(char *label, int *index, int force)
return start;
}
-static
-int
+static int
topair(char *label, int *index)
{
int l, r, d, start = 0;
@@ -193,8 +187,7 @@ topair(char *label, int *index)
return start;
}
-static
-int
+static int
tosingle(char *label, int *index)
{
int i, d, start = 0;
@@ -220,8 +213,7 @@ tosingle(char *label, int *index)
return start;
}
-static
-void
+static void
mkto(char *label, int *index, int pairs, int onlyrange)
{
int hasr, hasp=0, hass=0;
@@ -272,8 +264,7 @@ mkto(char *label, int *index, int pairs, int onlyrange)
// -----------------------------------------------------------------------
// main point of entry
-static
-void
+static void
usage(void)
{
fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
@@ -283,9 +274,9 @@ usage(void)
int
main(int argc, char *argv[])
{
- int i, sc, c, ec;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
+ int i, sc, c, ec, err;
+ static io·Buffer utf8;
+ char *prop, field[NumFields][FieldLen];
ARGBEGIN{
}ARGEND;
@@ -293,8 +284,8 @@ main(int argc, char *argv[])
if(argc != 1)
usage();
- if(!(utf8 = io·open(argv[0], "r")))
- panicf("can't open %s\n", argv[0]);
+ if((err=io·open(argv[0], sys·ORead, &utf8)))
+ panicf("can't open %s: %d: %s\n", argv[0], err, strerror(err));
/* by default each character maps to itself */
for(i = 0; i < NumRunes; i++) {
@@ -313,9 +304,9 @@ main(int argc, char *argv[])
ec = -1;
// NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+ while(parse(&utf8, arrlen(field), field)){
/* parse unicode range */
- codepointrange(utf8, field, &sc, &ec);
+ codepointrange(&utf8, field, &sc, &ec);
prop = field[Fcategory];
for(c = sc; c <= ec; c++){
@@ -370,7 +361,7 @@ main(int argc, char *argv[])
table.totitle[c] = codepoint(field[Ftitle]);
}
}
- io·close(utf8);
+ io·close((io·Header *)&utf8);
putsearch();
diff --git a/src/base/utf/vendor/mkrunewidth.c b/src/base/utf/vendor/mkrunewidth.c
index 14e6973..c911b66 100644
--- a/src/base/utf/vendor/mkrunewidth.c
+++ b/src/base/utf/vendor/mkrunewidth.c
@@ -30,21 +30,20 @@ static struct Table table;
// -----------------------------------------------------------------------
// internal functions
-static
-void
+static void
parse_category(char *path)
{
int sc, c, ec, w;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
+ static io·Buffer utf8;
+ char *prop, field[NumFields][FieldLen];
- if(!(utf8 = io·open(path, "r")))
+ if(io·open(path, sys·ORead, &utf8))
panicf("can't open %s\n", path);
// NOTE: we don't check for comments here
ec = -1;
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- codepointrange(utf8, field, &sc, &ec);
+ while(parse(&utf8, arrlen(field), field)){
+ codepointrange(&utf8, field, &sc, &ec);
prop = field[Fcategory];
@@ -86,11 +85,10 @@ parse_category(char *path)
table.width[w][c] = 1;
}
- io·close(utf8);
+ io·close((io·Header *)&utf8);
}
-static
-void
+static void
coderange(char *field, int *l, int *r)
{
char *s;
@@ -104,17 +102,18 @@ coderange(char *field, int *l, int *r)
}
}
-static
-void
+static void
parse_eawidths(char *path)
{
int at, w;
int l, c, r;
- io·Stream *utf8;
- char *field[2], line[1024];
+ static io·Buffer utf8;
+ char field[2][FieldLen];
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+ if(io·open(path, sys·ORead, &utf8))
+ panicf("can't open %s\n", path);
+
+ while((at=parse(&utf8, arrlen(field), field)) != ParseEOF){
if(at == ParseSkip)
continue;
@@ -142,20 +141,21 @@ parse_eawidths(char *path)
table.width[(w+2)%3][c] = 0;
}
}
- io·close(utf8);
+ io·close((io·Header*)&utf8);
}
-static
-void
+static void
parse_emoji(char *path)
{
int at, w;
int l, c, r;
- io·Stream *utf8;
- char *s, *field[2], line[1024];
+ static io·Buffer utf8;
+ char *s, field[2][FieldLen];
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
+ if(io·open(path, sys·ORead, &utf8))
+ panicf("can't open %s\n", path);
+
+ while((at=parse(&utf8, arrlen(field), field)) != ParseEOF){
if(at == ParseSkip)
continue;
@@ -178,7 +178,7 @@ parse_emoji(char *path)
}
}
- io·close(utf8);
+ io·close((io·Header*)&utf8);
}
/* output functions */
@@ -322,4 +322,6 @@ main(int argc, char *argv[])
" return 0;\n"
"}"
);
+
+ return 0;
}