aboutsummaryrefslogtreecommitdiff
path: root/sys/libunicode/vendor/mkrunetype.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libunicode/vendor/mkrunetype.c')
-rw-r--r--sys/libunicode/vendor/mkrunetype.c230
1 files changed, 23 insertions, 207 deletions
diff --git a/sys/libunicode/vendor/mkrunetype.c b/sys/libunicode/vendor/mkrunetype.c
index fc4301c..9f939f4 100644
--- a/sys/libunicode/vendor/mkrunetype.c
+++ b/sys/libunicode/vendor/mkrunetype.c
@@ -1,6 +1,4 @@
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
+#include "common.h"
// -----------------------------------------------------------------------
// globals
@@ -8,29 +6,6 @@
#define OFFSET (1 << 20)
#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x))
-enum
-{
- // Fields inside UnicodeData.txt
- Fcode,
- Fname,
- Fcategory,
- Fcombine,
- Fbidir,
- Fdecomp,
- Fdecimal,
- Fdigit,
- Fnumeric,
- Fmirror,
- Foldname,
- Fcomment,
- Fupper,
- Flower,
- Ftitle,
-
- NumFields,
- NumRunes = 1 << 21,
-};
-
// TODO: use bitarrays. will reduce executable size 8x
struct Table
{
@@ -59,154 +34,37 @@ static struct Table table;
// -----------------------------------------------------------------------
// internal functions
-int
-parse(io·Stream *io, char **field, int len, char *line)
-{
- int n;
- if((n=io·readln(io, len, line)) <= 0)
- return 0;
-
- if(n == len)
- panicf("line too long");
-
- if(line[n-1] != '\n')
- panicf("invalid line: expected '\n', found '%c'", line[n]);
-
- line[n-1] = 0;
-
- /* tokenize line into fields */
- n = 0;
- field[n] = line;
- while(*line){
- if(*line == ';'){
- *line = 0;
- field[++n] = line+1;
- }
- line++;
- }
-
- if(n != NumFields-1)
- panicf("expected %d number of fields, got %d", NumFields, n);
-
- return 1;
-}
-
-static
-int
-codepoint(char *s)
-{
- int c, b;
-
- c = 0;
- while((b=*s++)){
- c <<= 4;
- if(b >= '0' && b <= '9')
- c += b - '0';
- else if(b >= 'A' && b <= 'F')
- c += b - 'A' + 10;
- else
- panicf("bad codepoint char '%c'", b);
- }
-
- return c;
-}
-
static
int
isrange(char *label, char *prop, int force)
{
- int l, r, start;
-
- start = 0;
- for(l = 0; l < NumRunes;) {
- if(!prop[l]){
- l++;
- continue;
- }
-
- for(r = l+1; r < NumRunes; r++){
- if(!prop[r])
- break;
- prop[r] = 0;
- }
-
- if(force || r > l + 1){
- if(!start){
- printf("static rune is%s_range[] = {\n", label);
- start = 1;
- }
- prop[l] = 0;
- printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
- }
-
- l = r;
- }
-
- if(start)
- printf("};\n\n");
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
+ panicf("out of identifier space\n");
- return start;
+ return putrange(ident, prop, force);
}
static
int
ispair(char *label, char *prop)
{
- int l, r, start;
-
- start = 0;
- for(l=0; l+2 < NumRunes; ){
- if(!prop[l]){
- l++;
- continue;
- }
-
- for(r = l + 2; r < NumRunes; r += 2){
- if(!prop[r])
- break;
- prop[r] = 0;
- }
-
- if(r != l + 2){
- if(!start){
- printf("static rune is%s_pair[] = {\n", label);
- start = 1;
- }
- prop[l] = 0;
- printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
- }
-
- l = r;
- }
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
+ panicf("out of identifier space\n");
- if(start)
- printf("};\n\n");
- return start;
+ return putpair(ident, prop);
}
static
int
issingle(char *label, char *prop)
{
- int i, start;
+ char ident[128];
+ if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
+ panicf("out of identifier space\n");
- start = 0;
- for(i = 0; i < NumRunes; i++) {
- if(!prop[i])
- continue;
-
- if(!start){
- printf("static rune is%s_single[] = {\n", label);
- start = 1;
- }
- prop[i] = 0;
- printf("\t0x%.4x,\n", i);
- }
-
- if(start)
- printf("};\n\n");
-
- return start;
+ return putsingle(ident, prop);
}
static
@@ -425,9 +283,9 @@ usage(void)
int
main(int argc, char *argv[])
{
- int i, c, pc;
+ int i, sc, c, ec;
io·Stream *utf8;
- char *prop, *field[NumFields], *field2[NumFields], line[1024], line2[1024];
+ char *prop, *field[NumFields], line[1024];
ARGBEGIN{
}ARGEND;
@@ -453,33 +311,14 @@ main(int argc, char *argv[])
table.isspace['\v'] = 1;
table.isspace[0x85] = 1;
- pc = -1;
- while(parse(utf8, field, arrlen(line), line)){
- /* parse unicode value */
- c = codepoint(field[Fcode]);
- if(c >= NumRunes)
- panicf("unexpected large codepoint %x", c);
- if(c <= pc)
- panicf("bad code sequence: %x then %x", pc, c);
- pc = c;
-
- /* see if this is a range */
- if(strstr(field[Fname], ", First>") != nil){
- if(!parse(utf8, field2, arrlen(line2), line2))
- panicf("range start at end of file");
- if(strstr(field2[Fname], ", Last>") == nil)
- panicf("range start not followed by range end");
-
- pc = codepoint(field2[Fcode]);
-
- if(pc <= c)
- panicf("bad code sequence: %x then %x", c, pc);
- if(strcmp(field[Fcategory], field2[Fcategory]) != 0)
- panicf("range with mismatched category");
- }
-
+ ec = -1;
+ // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
+ while(parse(utf8, arrlen(field), field, arrlen(line), line)){
+ /* parse unicode range */
+ codepointrange(utf8, field, &sc, &ec);
prop = field[Fcategory];
- for(; c <= pc; c++){
+
+ for(c = sc; c <= ec; c++){
/* grab properties */
switch(prop[0]){
case 'L':
@@ -533,30 +372,7 @@ main(int argc, char *argv[])
}
io·close(utf8);
- puts(
- "#include <u.h>\n"
- "#include <libunicode.h>\n"
- "\n"
- "static\n"
- "rune*\n"
- "rangesearch(rune c, rune *t, int n, int ne)\n"
- "{\n"
- " rune *p;\n"
- " int m;\n"
- " while(n > 1) {\n"
- " m = n >> 1;\n"
- " p = t + m*ne;\n"
- " if(c >= p[0]){\n"
- " t = p;\n"
- " n = n-m;\n"
- " }else\n"
- " n = m;\n"
- " }\n"
- " if(n && c >= t[0])\n"
- " return t;\n"
- " return 0;\n"
- "}\n"
- );
+ putsearch();
makeis("space", table.isspace, 0, 1);
makeis("digit", table.isdigit, 0, 1);