aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas <nbnoll@eml.cc>2021-11-10 20:12:45 -0800
committerNicholas <nbnoll@eml.cc>2021-11-11 08:16:47 -0800
commit7ea1cdb7d31f00024f5a1d124b42cd19a03b959a (patch)
treef65abf9fa32856287de586129d4ed7c1c473864a
parent43688fe7190d0350349d47727c3663421d5618dc (diff)
chore: libunicode -> libutf
-rw-r--r--include/base.h8
-rw-r--r--include/libutf.h (renamed from include/libunicode.h)0
-rw-r--r--sys/base/mmap.c9
-rw-r--r--sys/cmd/dwm/dwm.h2
-rw-r--r--sys/cmd/dwm/rules.mk2
-rw-r--r--sys/cmd/ic/ic.c40
-rw-r--r--sys/cmd/ic/rules.mk2
-rw-r--r--sys/cmd/ic/strlcpy.c14
-rw-r--r--sys/cmd/menu/menu.h2
-rw-r--r--sys/cmd/rc/rc.h2
-rw-r--r--sys/cmd/rc/rules.mk2
-rw-r--r--sys/cmd/rules.mk3
-rw-r--r--sys/cmd/term/rules.mk4
-rw-r--r--sys/cmd/term/term.h2
-rw-r--r--sys/libunicode/vendor/common.c220
-rw-r--r--sys/libunicode/vendor/common.h46
-rw-r--r--sys/libunicode/vendor/mkgraphemedata.c24
-rw-r--r--sys/libunicode/vendor/mkrunetype.c388
-rw-r--r--sys/libunicode/vendor/mkrunewidth.c325
-rw-r--r--sys/libutf/canfit.c (renamed from sys/libunicode/canfit.c)0
-rw-r--r--sys/libutf/decode.c (renamed from sys/libunicode/decode.c)0
-rw-r--r--sys/libutf/decodeprev.c (renamed from sys/libunicode/decodeprev.c)0
-rw-r--r--sys/libutf/encode.c (renamed from sys/libunicode/encode.c)0
-rw-r--r--sys/libutf/find.c (renamed from sys/libunicode/find.c)0
-rw-r--r--sys/libutf/findlast.c (renamed from sys/libunicode/findlast.c)0
-rw-r--r--sys/libutf/internal.h (renamed from sys/libunicode/internal.h)2
-rw-r--r--sys/libutf/len.c (renamed from sys/libunicode/len.c)0
-rw-r--r--sys/libutf/rules.mk (renamed from sys/libunicode/rules.mk)2
-rw-r--r--sys/libutf/runelen.c (renamed from sys/libunicode/runelen.c)0
-rw-r--r--sys/libutf/runetype-14.0.0.c111
-rw-r--r--sys/libutf/runewidth-14.0.0.c71
-rw-r--r--sys/rules.mk2
32 files changed, 232 insertions, 1051 deletions
diff --git a/include/base.h b/include/base.h
index a7813e6..b841040 100644
--- a/include/base.h
+++ b/include/base.h
@@ -261,12 +261,12 @@ int bufio·read(io·Buffer *buf, int sz, int n, void *out);
// -----------------------------------------------------------------------------
// memory mapped files
-typedef struct mmap·Reader
+typedef struct mmap·Reader
{
vlong len;
- union {
- byte *buf;
- ubyte *ubuf;
+ union{
+ char *b;
+ uchar *u;
};
} mmap·Reader;
diff --git a/include/libunicode.h b/include/libutf.h
index 25d6dee..25d6dee 100644
--- a/include/libunicode.h
+++ b/include/libutf.h
diff --git a/sys/base/mmap.c b/sys/base/mmap.c
index 6edfe28..7f3062d 100644
--- a/sys/base/mmap.c
+++ b/sys/base/mmap.c
@@ -15,19 +15,19 @@ mmap·open(byte *filename)
s = io·open(filename, "r");
fd = io·fd(s);
err = io·stat(s, &st);
- if (err) {
+ if(err){
errorf("file stat: error code %d", err);
goto ERROR;
}
buf = mmap(nil, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
- if (!buf) {
+ if(!buf){
errorf("mmap: failed");
goto ERROR;
}
// NOTE: posix systems require that reference kept to mmap file after fd is closed
io·close(s);
- return (mmap·Reader){.len=st.st_size, .buf=buf};
+ return (mmap·Reader){.len=st.st_size, .b=buf};
ERROR:
io·close(s);
@@ -37,7 +37,6 @@ ERROR:
error
mmap·close(mmap·Reader rdr)
{
- munmap(rdr.buf, rdr.len);
-
+ munmap(rdr.b, rdr.len);
return 0;
}
diff --git a/sys/cmd/dwm/dwm.h b/sys/cmd/dwm/dwm.h
index 9b616d1..afec1f2 100644
--- a/sys/cmd/dwm/dwm.h
+++ b/sys/cmd/dwm/dwm.h
@@ -2,7 +2,7 @@
#pragma once
#include <u.h>
#include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
#include <errno.h>
#include <locale.h>
diff --git a/sys/cmd/dwm/rules.mk b/sys/cmd/dwm/rules.mk
index f7254cd..79c4548 100644
--- a/sys/cmd/dwm/rules.mk
+++ b/sys/cmd/dwm/rules.mk
@@ -22,7 +22,7 @@ $(BINS_$(d)): TCLIBS = \
`$(PKG) --libs freetype2` \
-lX11 -lXinerama -lXft -lX11-xcb -lxcb -lxcb-res
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a
$(COMPLINK)
include share/pop.mk
diff --git a/sys/cmd/ic/ic.c b/sys/cmd/ic/ic.c
index 19b868d..7fc37d8 100644
--- a/sys/cmd/ic/ic.c
+++ b/sys/cmd/ic/ic.c
@@ -1,6 +1,6 @@
/* See LICENSE file for license details. */
#include <u.h>
-#include <libn.h>
+#include <base.h>
#include <sys/select.h>
#include <sys/socket.h>
@@ -308,7 +308,7 @@ channel_rm(Channel *c)
free(c);
}
-static
+static
void
channel_leave(Channel *c)
{
@@ -321,7 +321,7 @@ channel_leave(Channel *c)
channel_rm(c);
}
-static
+static
void
loginkey(int ircfd, const char *key)
{
@@ -347,13 +347,13 @@ udsopen(const char *uds)
size_t len;
int fd;
- if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+ if((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
fprintf(stderr, "%s: socket: %s\n", argv0, strerror(errno));
exit(1);
}
sun.sun_family = AF_UNIX;
- if (strlcpy(sun.sun_path, uds, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
+ if(strlcpy(sun.sun_path, uds, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
fprintf(stderr, "%s: UNIX domain socket path truncation\n", argv0);
exit(1);
}
@@ -403,7 +403,7 @@ tcpopen(const char *host, const char *service)
return fd;
}
-static
+static
int
isnumeric(const char *s)
{
@@ -464,7 +464,7 @@ proc_channels_privmsg(int ircfd, Channel *c, char *buf)
ewritestr(ircfd, msg);
}
-static
+static
void
proc_channels_input(int ircfd, Channel *c, char *buf)
{
@@ -700,8 +700,8 @@ handle_channels_input(int ircfd, Channel *c)
{
char buf[IRC_MSG_MAX];
- if (read_line(c->fdin, buf, sizeof(buf)) == -1) {
- if (channel_reopen(c) == -1)
+ if(read_line(c->fdin, buf, sizeof(buf)) == -1) {
+ if(channel_reopen(c) == -1)
channel_rm(c);
return;
}
@@ -755,7 +755,7 @@ run(int ircfd, const char *host)
int r, maxfd;
snprintf(ping_msg, sizeof(ping_msg), "PING %s\r\n", host);
- while (isrunning) {
+ while(isrunning) {
maxfd = ircfd;
FD_ZERO(&rdset);
FD_SET(ircfd, &rdset);
@@ -767,12 +767,12 @@ run(int ircfd, const char *host)
memset(&tv, 0, sizeof(tv));
tv.tv_sec = 120;
r = select(maxfd + 1, &rdset, 0, 0, &tv);
- if (r < 0) {
+ if(r < 0){
if (errno == EINTR)
continue;
fprintf(stderr, "%s: select: %s\n", argv0, strerror(errno));
exit(1);
- } else if (r == 0) {
+ }else if(r == 0){
if (time(nil) - last_response >= PING_TIMEOUT) {
channel_print(channelmaster, "-!- ii shutting down: ping timeout");
exit(2); /* status code 2 for timeout */
@@ -780,11 +780,11 @@ run(int ircfd, const char *host)
ewritestr(ircfd, ping_msg);
continue;
}
- if (FD_ISSET(ircfd, &rdset)) {
+ if(FD_ISSET(ircfd, &rdset)) {
handle_server_output(ircfd);
last_response = time(nil);
}
- for (c = channels; c; c = tmp) {
+ for(c = channels; c; c = tmp) {
tmp = c->next;
if (FD_ISSET(c->fdin, &rdset))
handle_channels_input(ircfd, c);
@@ -803,7 +803,7 @@ main(int argc, char *argv[])
int ircfd, r;
/* use nickname and home dir of user by default */
- if (!(spw = getpwuid(getuid()))) {
+ if(!(spw = getpwuid(getuid()))) {
fprintf(stderr, "%s: getpwuid: %s\n", argv0, strerror(errno));
exit(1);
}
@@ -837,10 +837,10 @@ main(int argc, char *argv[])
break;
} ARGEND
- if (!*host)
+ if(!*host)
usage();
- if (uds)
+ if(uds)
ircfd = udsopen(uds);
else
ircfd = tcpopen(host, service);
@@ -861,15 +861,15 @@ main(int argc, char *argv[])
create_dirtree(ircpath);
channelmaster = channel_add(""); /* master channel */
- if (key)
+ if(key)
loginkey(ircfd, key);
loginuser(ircfd, host, fullname && *fullname ? fullname : nick);
setup();
run(ircfd, host);
- if (channelmaster)
+ if(channelmaster)
channel_leave(channelmaster);
- for (c = channels; c; c = tmp) {
+ for(c = channels; c; c = tmp) {
tmp = c->next;
channel_leave(c);
}
diff --git a/sys/cmd/ic/rules.mk b/sys/cmd/ic/rules.mk
index c373dfb..649c9ac 100644
--- a/sys/cmd/ic/rules.mk
+++ b/sys/cmd/ic/rules.mk
@@ -8,7 +8,7 @@ BINS_$(d) := $(d)/ic
include share/paths.mk
# Local rules
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/libn/libn.a
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/base/base.a
$(COMPLINK)
include share/pop.mk
diff --git a/sys/cmd/ic/strlcpy.c b/sys/cmd/ic/strlcpy.c
index db0e6f0..5af7906 100644
--- a/sys/cmd/ic/strlcpy.c
+++ b/sys/cmd/ic/strlcpy.c
@@ -15,18 +15,18 @@ strlcpy(char *dst, const char *src, size_t siz)
size_t n = siz;
/* Copy as many bytes as will fit */
- if (n != 0) {
- while (--n != 0) {
- if ((*d++ = *s++) == '\0')
+ if(n != 0) {
+ while(--n != 0) {
+ if((*d++ = *s++) == '\0')
break;
}
}
/* Not enough room in dst, add NUL and traverse rest of src */
- if (n == 0) {
- if (siz != 0)
+ if(n == 0) {
+ if(siz != 0)
*d = '\0'; /* NUL-terminate dst */
- while (*s++)
+ while(*s++)
;
}
- return(s - src - 1); /* count does not include NUL */
+ return s - src - 1; /* count does not include NUL */
}
diff --git a/sys/cmd/menu/menu.h b/sys/cmd/menu/menu.h
index 8f1d6d7..f4345bb 100644
--- a/sys/cmd/menu/menu.h
+++ b/sys/cmd/menu/menu.h
@@ -1,7 +1,7 @@
/* See LICENSE file for copyright and license details. */
#include <u.h>
#include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
#include <time.h>
#include <locale.h>
diff --git a/sys/cmd/rc/rc.h b/sys/cmd/rc/rc.h
index 83c39e9..9b415fc 100644
--- a/sys/cmd/rc/rc.h
+++ b/sys/cmd/rc/rc.h
@@ -1,7 +1,7 @@
#include <u.h>
#include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
// -----------------------------------------------------------------------
// types
diff --git a/sys/cmd/rc/rules.mk b/sys/cmd/rc/rules.mk
index ceec50b..a2fd058 100644
--- a/sys/cmd/rc/rules.mk
+++ b/sys/cmd/rc/rules.mk
@@ -25,7 +25,7 @@ $(d)/parse.h $(d)/parse.c: $(d)/syntax.y
yacc --header=$(<D)/parse.h --output=$(<D)/parse.c $(<)
# Local rules
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a $(d)/parse.h
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a $(d)/parse.h
$(COMPLINK)
include share/pop.mk
diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk
index 07adaf1..52a059b 100644
--- a/sys/cmd/rules.mk
+++ b/sys/cmd/rules.mk
@@ -17,6 +17,9 @@ include $(DIR)/rules.mk
# DIR := $(d)/test
# include $(DIR)/rules.mk
+DIR := $(d)/ic
+include $(DIR)/rules.mk
+
DIR := $(d)/dwm
include $(DIR)/rules.mk
diff --git a/sys/cmd/term/rules.mk b/sys/cmd/term/rules.mk
index ef307c1..4011839 100644
--- a/sys/cmd/term/rules.mk
+++ b/sys/cmd/term/rules.mk
@@ -2,7 +2,7 @@ include share/push.mk
# Iterate through subdirectory tree
# Local sources
-SRCS_$(d) := $(d)/term.c $(d)/x.c $(d)/hb.c
+SRCS_$(d) := $(d)/term.c $(d)/x.c #$(d)/hb.c
BINS_$(d) := $(d)/term
include share/paths.mk
@@ -18,7 +18,7 @@ $(BINS_$(d)): TCLIBS = \
`$(PKG) --libs freetype2` \
-lm -lrt -lX11 -lutil -lXft -lXrender -lharfbuzz
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libunicode/libunicode.a $(OBJ_DIR)/sys/base/base.a
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/sys/libutf/libutf.a $(OBJ_DIR)/sys/base/base.a
$(COMPLINK)
include share/pop.mk
diff --git a/sys/cmd/term/term.h b/sys/cmd/term/term.h
index 3ff2852..f779654 100644
--- a/sys/cmd/term/term.h
+++ b/sys/cmd/term/term.h
@@ -3,7 +3,7 @@
#include <u.h>
#include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
#include <signal.h>
#include <sys/ioctl.h>
diff --git a/sys/libunicode/vendor/common.c b/sys/libunicode/vendor/common.c
deleted file mode 100644
index 6b5d1b3..0000000
--- a/sys/libunicode/vendor/common.c
+++ /dev/null
@@ -1,220 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// input functions
-
-int
-parse(io·Stream *io, int nfield, char **field, int len, char *line)
-{
- int n;
- if((n=io·readln(io, len, line)) <= 0)
- return ParseEOF;
-
- if(n == len)
- panicf("line too long");
-
- if(line[n-1] != '\n')
- panicf("invalid line: expected '\n', found '%c'", line[n]);
-
- line[n-1] = 0;
-
- if(line[0] == '#' || line[0] == 0)
- return ParseSkip;
-
- /* tokenize line into fields */
- n = 0;
- field[n] = line;
- while(*line){
- if(*line == ';'){
- *line = 0;
- field[++n] = line+1;
- }
- line++;
- }
-
- if(n != nfield-1)
- panicf("expected %d number of fields, got %d: %s", nfield, n, line);
-
- return ParseOK;
-}
-
-int
-codepoint(char *s)
-{
- int c, b;
-
- c = 0;
- while((b=*s++)){
- c <<= 4;
- if(b >= '0' && b <= '9')
- c += b - '0';
- else if(b >= 'A' && b <= 'F')
- c += b - 'A' + 10;
- else
- panicf("bad codepoint char '%c'", b);
- }
-
- return c;
-}
-
-void
-codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
-{
- int e, c;
- char *other[NumFields], line[1024];
-
- // XXX: the stop variable passes in the previous stopping character
- e = *stop;
- c = codepoint(field[Fcode]);
-
- if(c >= NumRunes)
- panicf("unexpected large codepoint %x", c);
- if(c <= e)
- panicf("bad code sequence: %x then %x", e, c);
- e = c;
-
- if(strstr(field[Fname], ", First>") != nil){
- if(!parse(utf8, arrlen(other), other, arrlen(line), line))
- panicf("range start at end of file");
- if(strstr(other[Fname], ", Last>") == nil)
- panicf("range start not followed by range end");
-
- e = codepoint(other[Fcode]);
-
- if(e <= c)
- panicf("bad code sequence: %x then %x", c, e);
- if(strcmp(field[Fcategory], other[Fcategory]) != 0)
- panicf("range with mismatched category");
- }
-
- *start = c;
- *stop = e;
-}
-
-// -----------------------------------------------------------------------
-// output functions
-
-void
-putsearch(void)
-{
- puts(
- "#include <u.h>\n"
- "#include <libunicode.h>\n"
- "\n"
- "static\n"
- "rune*\n"
- "rangesearch(rune c, rune *t, int n, int ne)\n"
- "{\n"
- " rune *p;\n"
- " int m;\n"
- " while(n > 1) {\n"
- " m = n >> 1;\n"
- " p = t + m*ne;\n"
- " if(c >= p[0]){\n"
- " t = p;\n"
- " n = n-m;\n"
- " }else\n"
- " n = m;\n"
- " }\n"
- " if(n && c >= t[0])\n"
- " return t;\n"
- " return 0;\n"
- "}\n"
- );
-
-}
-
-int
-putrange(char *ident, char *prop, int force)
-{
- int l, r, start;
-
- start = 0;
- for(l = 0; l < NumRunes;) {
- if(!prop[l]){
- l++;
- continue;
- }
-
- for(r = l+1; r < NumRunes; r++){
- if(!prop[r])
- break;
- prop[r] = 0;
- }
-
- if(force || r > l + 1){
- if(!start){
- printf("static rune %s[] = {\n", ident);
- start = 1;
- }
- prop[l] = 0;
- printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
- }
-
- l = r;
- }
-
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-int
-putpair(char *ident, char *prop)
-{
- int l, r, start;
-
- start = 0;
- for(l=0; l+2 < NumRunes; ){
- if(!prop[l]){
- l++;
- continue;
- }
-
- for(r = l + 2; r < NumRunes; r += 2){
- if(!prop[r])
- break;
- prop[r] = 0;
- }
-
- if(r != l + 2){
- if(!start){
- printf("static rune %s[] = {\n", ident);
- start = 1;
- }
- prop[l] = 0;
- printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
- }
-
- l = r;
- }
-
- if(start)
- printf("};\n\n");
- return start;
-}
-
-int
-putsingle(char *ident, char *prop)
-{
- int i, start;
-
- start = 0;
- for(i = 0; i < NumRunes; i++) {
- if(!prop[i])
- continue;
-
- if(!start){
- printf("static rune %s[] = {\n", ident);
- start = 1;
- }
- prop[i] = 0;
- printf("\t0x%.4x,\n", i);
- }
-
- if(start)
- printf("};\n\n");
-
- return start;
-}
diff --git a/sys/libunicode/vendor/common.h b/sys/libunicode/vendor/common.h
deleted file mode 100644
index 17cb113..0000000
--- a/sys/libunicode/vendor/common.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
-
-enum
-{
- // Fields inside UnicodeData.txt
- Fcode,
- Fname,
- Fcategory,
- Fcombine,
- Fbidir,
- Fdecomp,
- Fdecimal,
- Fdigit,
- Fnumeric,
- Fmirror,
- Foldname,
- Fcomment,
- Fupper,
- Flower,
- Ftitle,
-
- NumFields,
- NumRunes = 1 << 21,
-};
-
-/* input functions */
-enum
-{
- ParseEOF,
- ParseOK,
- ParseSkip,
-};
-
-int parse(io·Stream *io, int nfield, char **field, int len, char *line);
-int codepoint(char *s);
-void codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop);
-
-/* output functions */
-void putsearch(void);
-int putrange(char *ident, char *prop, int force);
-int putpair(char *ident, char *prop);
-int putsingle(char *ident, char *prop);
diff --git a/sys/libunicode/vendor/mkgraphemedata.c b/sys/libunicode/vendor/mkgraphemedata.c
deleted file mode 100644
index f79d851..0000000
--- a/sys/libunicode/vendor/mkgraphemedata.c
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <libunicode.h>
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkgraphemedata <GraphemeBreakProperty.txt>\n");
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- io·Stream *utf8;
- char line[1024];
-
- ARGBEGIN{
- }ARGEND;
-}
diff --git a/sys/libunicode/vendor/mkrunetype.c b/sys/libunicode/vendor/mkrunetype.c
deleted file mode 100644
index 9f939f4..0000000
--- a/sys/libunicode/vendor/mkrunetype.c
+++ /dev/null
@@ -1,388 +0,0 @@
-#include "common.h"
-
-// -----------------------------------------------------------------------
-// globals
-
-#define OFFSET (1 << 20)
-#define DELTA(mapx, x) ((1 << 20) + (mapx) - (x))
-
-// TODO: use bitarrays. will reduce executable size 8x
-struct Table
-{
- /* properties */
- char isspace[NumRunes];
- char isalpha[NumRunes];
- char ismark[NumRunes];
- char isdigit[NumRunes];
- char isupper[NumRunes];
- char islower[NumRunes];
- char istitle[NumRunes];
- char ispunct[NumRunes];
- char issymbl[NumRunes];
- char iscntrl[NumRunes];
-
- char combine[NumRunes];
-
- /* transformations */
- int toupper[NumRunes];
- int tolower[NumRunes];
- int totitle[NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-int
-isrange(char *label, char *prop, int force)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_range", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putrange(ident, prop, force);
-}
-
-static
-int
-ispair(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_pair", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putpair(ident, prop);
-}
-
-static
-int
-issingle(char *label, char *prop)
-{
- char ident[128];
- if(snprintf(ident, arrlen(ident), "is%s_single", label) == arrlen(ident))
- panicf("out of identifier space\n");
-
- return putsingle(ident, prop);
-}
-
-static
-void
-makeis(char *label, char *table, int pairs, int onlyranges)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = isrange(label, table, onlyranges);
- if(!onlyranges && pairs)
- hasp = ispair(label, table);
- if(!onlyranges)
- hass = issingle(label, table);
-
- printf(
- "int\n"
- "utf8·is%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr){
- printf(
- " p = rangesearch(c, is%s_range, arrlen(is%s_range)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return 1;\n",
- label, label);
- }
-
- if(hasp){
- printf(
- " p = rangesearch(c, is%s_pair, arrlen(is%s_pair)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return 1;\n",
- label, label);
- }
-
- if(hass)
- printf(
- " p = rangesearch(c, is%s_single, arrlen(is%s_single), 1);\n"
- " if(p && c == p[0])\n"
- " return 1;\n",
- label, label);
-
- printf(
- " return 0;\n"
- "}\n"
- "\n");
-}
-
-static
-int
-torange(char *label, int *index, int force)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- for(r = l+1; r < NumRunes; r++){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(force || r != l + 1){
- if(!start){
- printf("static rune to%s_range[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-1, d);
- }
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-topair(char *label, int *index)
-{
- int l, r, d, start = 0;
-
- for(l = 0; l + 2 < NumRunes; ){
- if(index[l] == l){
- l++;
- continue;
- }
-
- d = DELTA(index[l], l);
- if(d != (rune)d)
- panicf("bad delta %d", d);
-
- for(r = l+2; r < NumRunes; r += 2){
- if(DELTA(index[r], r) != d)
- break;
- index[r] = r;
- }
-
- if(r > l+2){
- if(!start){
- printf("static rune to%s_pair[] = {\n", label);
- start = 1;
- }
- index[l] = l;
- printf("\t0x%.4x, 0x%.4x, %d,\n", l, r-2, d);
- }
-
- l = r;
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-int
-tosingle(char *label, int *index)
-{
- int i, d, start = 0;
-
- for(i=0; i < NumRunes; i++) {
- if(index[i] == i)
- continue;
-
- d = DELTA(index[i], i);
- if(d != (rune)d)
- panicf("bad map delta %d", d);
-
- if(!start){
- printf("static rune to%s_single[] = {\n", label);
- start = 1;
- }
- index[i] = i;
- printf("\t0x%.4x, %d,\n", i, d);
- }
- if(start)
- printf("};\n\n");
-
- return start;
-}
-
-static
-void
-mkto(char *label, int *index, int pairs, int onlyrange)
-{
- int hasr, hasp=0, hass=0;
-
- hasr = torange(label, index, !onlyrange);
- if(!onlyrange && pairs)
- hasp = topair(label, index);
- if(!onlyrange)
- hass = tosingle(label, index);
-
- printf(
- "rune\n"
- "utf8·to%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(hasr)
- printf(
- " p = rangesearch(c, to%s_range, arrlen(to%s_range)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hasp)
- printf(
- " p = rangesearch(c, to%s_pair, arrlen(to%s_pair)/3, 3);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return c + p[2] - %d;\n",
- label, label, OFFSET);
-
- if(hass)
- printf(
- " p = rangesearch(c, to%s_single, arrlen(to%s_single)/2, 2);\n"
- " if(p && c == p[0])\n"
- " return c + p[1] - %d;\n",
- label, label, OFFSET);
-
-
- printf(
- " return c;\n"
- "}\n"
- "\n"
- );
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkrunetype <UnicodeData.txt>\n");
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- int i, sc, c, ec;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
-
- ARGBEGIN{
- }ARGEND;
-
- if(argc != 1)
- usage();
-
- if(!(utf8 = io·open(argv[0], "r")))
- panicf("can't open %s\n", argv[0]);
-
- /* by default each character maps to itself */
- for(i = 0; i < NumRunes; i++) {
- table.toupper[i] = i;
- table.tolower[i] = i;
- table.totitle[i] = i;
- }
-
- /* ensure all C local white space characters pass */
- table.isspace['\t'] = 1;
- table.isspace['\n'] = 1;
- table.isspace['\r'] = 1;
- table.isspace['\f'] = 1;
- table.isspace['\v'] = 1;
- table.isspace[0x85] = 1;
-
- ec = -1;
- // NOTE: we don't check for comments here: assume UnicodeData.txt doesn't have any
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- /* parse unicode range */
- codepointrange(utf8, field, &sc, &ec);
- prop = field[Fcategory];
-
- for(c = sc; c <= ec; c++){
- /* grab properties */
- switch(prop[0]){
- case 'L':
- table.isalpha[c] = 1;
- switch(prop[1]){
- case 'u': table.isupper[c] = 1; break;
- case 'l': table.islower[c] = 1; break;
- case 't': table.istitle[c] = 1; break;
- case 'm': break; // modifier letters
- case 'o': break; // ideograph letters
- default:
- goto badproperty;
- }
- break;
-
- case 'Z':
- table.isspace[c] = 1;
- break;
-
- case 'M':
- table.ismark[c] = 1;
- break;
-
- case 'N':
- table.isdigit[c] = 1;
- break;
-
- case 'P':
- table.ispunct[c] = 1;
- break;
-
- case 'S':
- table.issymbl[c] = 1;
- break;
-
- case 'C':
- table.iscntrl[c] = 1;
- break;
-
- default: badproperty:
- panicf("unrecognized category '%s'", prop);
- }
- /* grab transformations */
- if(*field[Fupper])
- table.toupper[c] = codepoint(field[Fupper]);
- if(*field[Flower])
- table.tolower[c] = codepoint(field[Flower]);
- if(*field[Ftitle])
- table.totitle[c] = codepoint(field[Ftitle]);
- }
- }
- io·close(utf8);
-
- putsearch();
-
- makeis("space", table.isspace, 0, 1);
- makeis("digit", table.isdigit, 0, 1);
- makeis("alpha", table.isalpha, 0, 0);
- makeis("upper", table.isupper, 1, 0);
- makeis("lower", table.islower, 1, 0);
- makeis("title", table.istitle, 1, 0);
- makeis("punct", table.ispunct, 1, 0);
-
- mkto("upper", table.toupper, 1, 0);
- mkto("lower", table.tolower, 1, 0);
- mkto("title", table.totitle, 1, 0);
-}
diff --git a/sys/libunicode/vendor/mkrunewidth.c b/sys/libunicode/vendor/mkrunewidth.c
deleted file mode 100644
index 14e6973..0000000
--- a/sys/libunicode/vendor/mkrunewidth.c
+++ /dev/null
@@ -1,325 +0,0 @@
-#include "common.h"
-
-/*
- * inspired by design choices in utf8proc/charwidths.jl
- * all widths default to 1 unless they fall within the categories:
- * 1. Mn 2. Mc 3. Me 4. Zl
- * 5. Zp 6. Cc 7. Cf 8. Cs
- * these default to zero width
- */
-enum
-{
- /* width ? */
- WidthNeutral, /* (N) practially treated like narrow but unclear ... */
- WidthAmbiguous, /* (A) sometimes wide and sometimes not... */
- /* width 1 */
- WidthHalf, /* (H) = to narrow (compatability equivalent) */
- WidthNarrow, /* (Na) ASCII width */
- /* width 2 */
- WidthWide, /* (W) 2x width */
- WidthFull, /* (F) = to wide (compatability equivalent) */
-};
-
-struct Table
-{
- char width[3][NumRunes];
-};
-
-static struct Table table;
-
-// -----------------------------------------------------------------------
-// internal functions
-
-static
-void
-parse_category(char *path)
-{
- int sc, c, ec, w;
- io·Stream *utf8;
- char *prop, *field[NumFields], line[1024];
-
- if(!(utf8 = io·open(path, "r")))
- panicf("can't open %s\n", path);
-
- // NOTE: we don't check for comments here
- ec = -1;
- while(parse(utf8, arrlen(field), field, arrlen(line), line)){
- codepointrange(utf8, field, &sc, &ec);
-
- prop = field[Fcategory];
-
- switch(prop[0]){
- case 'M':
- switch(prop[1]){
- case 'n': case 'c': case 'e':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- break;
- case 'Z':
- switch(prop[1]){
- case 'l': case 'p':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- break;
- case 'C':
- switch(prop[1]){
- case 'c': case 'f': case 's':
- w = 0;
- break;
- default:
- w = 1;
- break;
- }
- default:
- w = 1;
- }
-
- for(c = sc; c <= ec; c++)
- table.width[w][c] = 1;
- }
-
- io·close(utf8);
-}
-
-static
-void
-coderange(char *field, int *l, int *r)
-{
- char *s;
-
- if(!(s = strstr(field, "..")))
- *l=*r=codepoint(field);
- else{
- *s++ = 0, *s++ = 0;
- *l=codepoint(field);
- *r=codepoint(s);
- }
-}
-
-static
-void
-parse_eawidths(char *path)
-{
- int at, w;
- int l, c, r;
- io·Stream *utf8;
- char *field[2], line[1024];
-
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
- if(at == ParseSkip)
- continue;
-
- switch(field[1][0]){
- case 'A': continue;
- case 'N':
- if(field[1][1] != 'a')
- continue;
- /* fallthrough */
- case 'H': w = 1; break;
-
- case 'W': /* fallthrough */
- case 'F': w = 2; break;
-
- default:
- panicf("malformed east asian width class: %s\n", field[1]);
- }
-
- coderange(field[0], &l, &r);
-
- for(c=l; c <= r; c++){
- /* ensure it only exists in one table */
- table.width[w][c] = 1;
- table.width[(w+1)%3][c] = 0;
- table.width[(w+2)%3][c] = 0;
- }
- }
- io·close(utf8);
-}
-
-static
-void
-parse_emoji(char *path)
-{
- int at, w;
- int l, c, r;
- io·Stream *utf8;
- char *s, *field[2], line[1024];
-
- utf8 = io·open(path, "r");
- while((at=parse(utf8, arrlen(field), field, arrlen(line), line)) != ParseEOF){
- if(at == ParseSkip)
- continue;
-
- /* only override emoji presentation */
- if(!strstr(field[1], "Emoji_Presentation"))
- continue;
-
- /* trim trailing space */
- for(s=field[0]; *s; s++){
- if(*s == ' ')
- *s = 0;
- }
-
- coderange(field[0], &l, &r);
-
- for(c=l; c <= r; c++){
- table.width[0][c] = 0;
- table.width[1][c] = 0;
- table.width[2][c] = 1;
- }
- }
-
- io·close(utf8);
-}
-
-/* output functions */
-static
-void
-maketable(char *label, char *table, int pairs, int onlyranges)
-{
- int r, p=0, s=0;
- char ident[3][128];
-
- enum
- {
- Irange,
- Ipair,
- Isingle,
- };
-
- /* ranges */
- if(snprintf(ident[Irange], arrlen(ident[Irange]), "%s_range", label) == arrlen(ident[Irange]))
- panicf("out of identifier space\n");
- r = putrange(ident[Irange], table, onlyranges);
-
- if(!onlyranges && pairs){
- if(snprintf(ident[Ipair], arrlen(ident[Ipair]), "%s_pair", label) == arrlen(ident[Ipair]))
- panicf("out of identifier space\n");
- p = putpair(ident[Ipair], table);
- }
- if(!onlyranges){
- if(snprintf(ident[Isingle], arrlen(ident[Isingle]), "%s_single", label) == arrlen(ident[Isingle]))
- panicf("out of identifier space\n");
-
- s = putsingle(ident[Isingle], table);
- }
-
- printf(
- "static int\n"
- "is%s(rune c)\n"
- "{\n"
- " rune *p;\n"
- "\n",
- label);
-
- if(r){
- printf(
- " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1])\n"
- " return 1;\n",
- ident[Irange], ident[Irange]);
- }
-
- if(p){
- printf(
- " p = rangesearch(c, %s, arrlen(%s)/2, 2);\n"
- " if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
- " return 1;\n",
- ident[Ipair], ident[Ipair]);
- }
-
- if(s)
- printf(
- " p = rangesearch(c, %s, arrlen(%s), 1);\n"
- " if(p && c == p[0])\n"
- " return 1;\n",
- ident[Isingle], ident[Isingle]);
-
- printf(
- " return 0;\n"
- "}\n"
- "\n");
-}
-
-// -----------------------------------------------------------------------
-// main point of entry
-
-static
-void
-usage(void)
-{
- fprintf(stderr, "usage: mkrunewidth <UnicodeData.txt> <EastAsianWidth.txt> <EmojiData.txt>\n");
- exit(1);
-}
-
-#define SETW0(c) \
- table.width[0][(c)] = 1, \
- table.width[1][(c)] = 0, \
- table.width[2][(c)] = 0;
-
-#define SETW1(c) \
- table.width[0][(c)] = 0, \
- table.width[1][(c)] = 1, \
- table.width[2][(c)] = 0;
-
-#define SETW2(c) \
- table.width[0][(c)] = 0, \
- table.width[1][(c)] = 0, \
- table.width[2][(c)] = 1;
-
-
-int
-main(int argc, char *argv[])
-{
- int c;
-
- ARGBEGIN{
- }ARGEND;
-
- if(argc != 3)
- usage();
-
- parse_category(*argv++);
- parse_eawidths(*argv++);
- parse_emoji(*argv);
-
- /* overrides */
- SETW0(0x2028);
- SETW0(0x2029);
-
- SETW1(0x00AD);
-
- /* simple checking */
- for(c=0; c<NumRunes; c++){
- if(table.width[0][c] + table.width[1][c] + table.width[2][c] > 1)
- panicf("improper table state");
- }
-
- putsearch();
-
- maketable("width0", table.width[0], 1, 0);
- maketable("width1", table.width[1], 1, 0);
- maketable("width2", table.width[2], 1, 0);
-
- puts(
- "\n"
- "int\n"
- "utf8·runewidth(rune c)\n"
- "{\n"
- " if(iswidth1(c))\n"
- " return 1;\n"
- " if(iswidth2(c))\n"
- " return 2;\n"
- " return 0;\n"
- "}"
- );
-}
diff --git a/sys/libunicode/canfit.c b/sys/libutf/canfit.c
index 4579ab3..4579ab3 100644
--- a/sys/libunicode/canfit.c
+++ b/sys/libutf/canfit.c
diff --git a/sys/libunicode/decode.c b/sys/libutf/decode.c
index 01797f1..01797f1 100644
--- a/sys/libunicode/decode.c
+++ b/sys/libutf/decode.c
diff --git a/sys/libunicode/decodeprev.c b/sys/libutf/decodeprev.c
index 27dced6..27dced6 100644
--- a/sys/libunicode/decodeprev.c
+++ b/sys/libutf/decodeprev.c
diff --git a/sys/libunicode/encode.c b/sys/libutf/encode.c
index fa7c93e..fa7c93e 100644
--- a/sys/libunicode/encode.c
+++ b/sys/libutf/encode.c
diff --git a/sys/libunicode/find.c b/sys/libutf/find.c
index d75feb8..d75feb8 100644
--- a/sys/libunicode/find.c
+++ b/sys/libutf/find.c
diff --git a/sys/libunicode/findlast.c b/sys/libutf/findlast.c
index ab25ab2..ab25ab2 100644
--- a/sys/libunicode/findlast.c
+++ b/sys/libutf/findlast.c
diff --git a/sys/libunicode/internal.h b/sys/libutf/internal.h
index df69310..9719977 100644
--- a/sys/libunicode/internal.h
+++ b/sys/libutf/internal.h
@@ -2,7 +2,7 @@
#include <u.h>
#include <base.h>
-#include <libunicode.h>
+#include <libutf.h>
/*
* NOTE: we use the preprocessor to ensure we have unsigned constants.
diff --git a/sys/libunicode/len.c b/sys/libutf/len.c
index 8fbd679..8fbd679 100644
--- a/sys/libunicode/len.c
+++ b/sys/libutf/len.c
diff --git a/sys/libunicode/rules.mk b/sys/libutf/rules.mk
index 1d714fe..53ff8cf 100644
--- a/sys/libunicode/rules.mk
+++ b/sys/libutf/rules.mk
@@ -14,7 +14,7 @@ SRCS_$(d) := \
$(d)/runetype-$(UNICODE).c \
$(d)/runewidth-$(UNICODE).c
-LIBS_$(d) := $(d)/libunicode.a
+LIBS_$(d) := $(d)/libutf.a
include share/paths.mk
diff --git a/sys/libunicode/runelen.c b/sys/libutf/runelen.c
index dac7f15..dac7f15 100644
--- a/sys/libunicode/runelen.c
+++ b/sys/libutf/runelen.c
diff --git a/sys/libutf/runetype-14.0.0.c b/sys/libutf/runetype-14.0.0.c
new file mode 100644
index 0000000..6f4469d
--- /dev/null
+++ b/sys/libutf/runetype-14.0.0.c
@@ -0,0 +1,111 @@
+#include <u.h>
+#include <libutf.h>
+
+static
+rune*
+rangesearch(rune c, rune *t, int n, int ne)
+{
+ rune *p;
+ int m;
+ while(n > 1) {
+ m = n >> 1;
+ p = t + m*ne;
+ if(c >= p[0]){
+ t = p;
+ n = n-m;
+ }else
+ n = m;
+ }
+ if(n && c >= t[0])
+ return t;
+ return 0;
+}
+
+static rune isspace_range[] = {
+ 0x0009, 0x000d,
+ 0x0085, 0x0085,
+};
+
+int
+utf8·isspace(rune c)
+{
+ rune *p;
+
+ p = rangesearch(c, isspace_range, arrlen(isspace_range)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+int
+utf8·isdigit(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+int
+utf8·isalpha(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+int
+utf8·isupper(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+int
+utf8·islower(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+int
+utf8·istitle(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+int
+utf8·ispunct(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+rune
+utf8·toupper(rune c)
+{
+ rune *p;
+
+ return c;
+}
+
+rune
+utf8·tolower(rune c)
+{
+ rune *p;
+
+ return c;
+}
+
+rune
+utf8·totitle(rune c)
+{
+ rune *p;
+
+ return c;
+}
+
diff --git a/sys/libutf/runewidth-14.0.0.c b/sys/libutf/runewidth-14.0.0.c
new file mode 100644
index 0000000..113c35e
--- /dev/null
+++ b/sys/libutf/runewidth-14.0.0.c
@@ -0,0 +1,71 @@
+#include <u.h>
+#include <libutf.h>
+
+static
+rune*
+rangesearch(rune c, rune *t, int n, int ne)
+{
+ rune *p;
+ int m;
+ while(n > 1) {
+ m = n >> 1;
+ p = t + m*ne;
+ if(c >= p[0]){
+ t = p;
+ n = n-m;
+ }else
+ n = m;
+ }
+ if(n && c >= t[0])
+ return t;
+ return 0;
+}
+
+static rune width0_range[] = {
+ 0x2028, 0x2029,
+};
+
+static int
+iswidth0(rune c)
+{
+ rune *p;
+
+ p = rangesearch(c, width0_range, arrlen(width0_range)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static rune width1_single[] = {
+ 0x00ad,
+};
+
+static int
+iswidth1(rune c)
+{
+ rune *p;
+
+ p = rangesearch(c, width1_single, arrlen(width1_single), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static int
+iswidth2(rune c)
+{
+ rune *p;
+
+ return 0;
+}
+
+
+int
+utf8·runewidth(rune c)
+{
+ if(iswidth1(c))
+ return 1;
+ if(iswidth2(c))
+ return 2;
+ return 0;
+}
diff --git a/sys/rules.mk b/sys/rules.mk
index c8c3dd3..cefa4a9 100644
--- a/sys/rules.mk
+++ b/sys/rules.mk
@@ -8,7 +8,7 @@ include $(DIR)/rules.mk
DIR := $(d)/base
include $(DIR)/rules.mk
-DIR := $(d)/libunicode
+DIR := $(d)/libutf
include $(DIR)/rules.mk
DIR := $(d)/libmath