aboutsummaryrefslogtreecommitdiff
path: root/src/libutf/vendor/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutf/vendor/common.c')
-rw-r--r--src/libutf/vendor/common.c220
1 files changed, 220 insertions, 0 deletions
diff --git a/src/libutf/vendor/common.c b/src/libutf/vendor/common.c
new file mode 100644
index 0000000..5a03a50
--- /dev/null
+++ b/src/libutf/vendor/common.c
@@ -0,0 +1,220 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// input functions
+
+int
+parse(io·Stream *io, int nfield, char **field, int len, char *line)
+{
+ int n;
+ if((n=io·readln(io, len, line)) <= 0)
+ return ParseEOF;
+
+ if(n == len)
+ panicf("line too long");
+
+ if(line[n-1] != '\n')
+ panicf("invalid line: expected '\n', found '%c'", line[n]);
+
+ line[n-1] = 0;
+
+ if(line[0] == '#' || line[0] == 0)
+ return ParseSkip;
+
+ /* tokenize line into fields */
+ n = 0;
+ field[n] = line;
+ while(*line){
+ if(*line == ';'){
+ *line = 0;
+ field[++n] = line+1;
+ }
+ line++;
+ }
+
+ if(n != nfield-1)
+ panicf("expected %d number of fields, got %d: %s", nfield, n, line);
+
+ return ParseOK;
+}
+
+int
+codepoint(char *s)
+{
+ int c, b;
+
+ c = 0;
+ while((b=*s++)){
+ c <<= 4;
+ if(b >= '0' && b <= '9')
+ c += b - '0';
+ else if(b >= 'A' && b <= 'F')
+ c += b - 'A' + 10;
+ else
+ panicf("bad codepoint char '%c'", b);
+ }
+
+ return c;
+}
+
+void
+codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
+{
+ int e, c;
+ char *other[NumFields], line[1024];
+
+ // XXX: the stop variable passes in the previous stopping character
+ e = *stop;
+ c = codepoint(field[Fcode]);
+
+ if(c >= NumRunes)
+ panicf("unexpected large codepoint %x", c);
+ if(c <= e)
+ panicf("bad code sequence: %x then %x", e, c);
+ e = c;
+
+ if(strstr(field[Fname], ", First>") != nil){
+ if(!parse(utf8, arrlen(other), other, arrlen(line), line))
+ panicf("range start at end of file");
+ if(strstr(other[Fname], ", Last>") == nil)
+ panicf("range start not followed by range end");
+
+ e = codepoint(other[Fcode]);
+
+ if(e <= c)
+ panicf("bad code sequence: %x then %x", c, e);
+ if(strcmp(field[Fcategory], other[Fcategory]) != 0)
+ panicf("range with mismatched category");
+ }
+
+ *start = c;
+ *stop = e;
+}
+
+// -----------------------------------------------------------------------
+// output functions
+
+void
+putsearch(void)
+{
+ puts(
+ "#include <u.h>\n"
+ "#include <libutf.h>\n"
+ "\n"
+ "static\n"
+ "rune*\n"
+ "rangesearch(rune c, rune *t, int n, int ne)\n"
+ "{\n"
+ " rune *p;\n"
+ " int m;\n"
+ " while(n > 1) {\n"
+ " m = n >> 1;\n"
+ " p = t + m*ne;\n"
+ " if(c >= p[0]){\n"
+ " t = p;\n"
+ " n = n-m;\n"
+ " }else\n"
+ " n = m;\n"
+ " }\n"
+ " if(n && c >= t[0])\n"
+ " return t;\n"
+ " return 0;\n"
+ "}\n"
+ );
+
+}
+
+int
+putrange(char *ident, char *prop, int force)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l = 0; l < NumRunes;) {
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l+1; r < NumRunes; r++){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(force || r > l + 1){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+int
+putpair(char *ident, char *prop)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l=0; l+2 < NumRunes; ){
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l + 2; r < NumRunes; r += 2){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(r != l + 2){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+ return start;
+}
+
+int
+putsingle(char *ident, char *prop)
+{
+ int i, start;
+
+ start = 0;
+ for(i = 0; i < NumRunes; i++) {
+ if(!prop[i])
+ continue;
+
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[i] = 0;
+ printf("\t0x%.4x,\n", i);
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}