aboutsummaryrefslogtreecommitdiff
path: root/src/base/utf/vendor/common.c
diff options
context:
space:
mode:
authorNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
committerNicholas <nbnoll@eml.cc>2021-11-20 10:53:19 -0800
commita9bfe650038afea8b751175cac16f6027345e45f (patch)
tree9a7f9feb76a64bb3efe573036d80b7bdbf8a59a5 /src/base/utf/vendor/common.c
parent1c8d4e69205fd875f6bec3fa3bd929c2e7f52f62 (diff)
Chore: reorganize libutf and libfmt into base
I found the split to be arbitrary. Better to include the functionality in the standard library. I also split the headers to allow for more granular inclusion (but the library is still monolithic). The only ugliness is the circular dependency introduced with libutf's generated functions. We put explicit prereqs with the necessary object files instead.
Diffstat (limited to 'src/base/utf/vendor/common.c')
-rw-r--r--src/base/utf/vendor/common.c220
1 files changed, 220 insertions, 0 deletions
diff --git a/src/base/utf/vendor/common.c b/src/base/utf/vendor/common.c
new file mode 100644
index 0000000..fcf1177
--- /dev/null
+++ b/src/base/utf/vendor/common.c
@@ -0,0 +1,220 @@
+#include "common.h"
+
+// -----------------------------------------------------------------------
+// input functions
+
+int
+parse(io·Stream *io, int nfield, char **field, int len, char *line)
+{
+ int n;
+ if((n=io·readln(io, len, line)) <= 0)
+ return ParseEOF;
+
+ if(n == len)
+ panicf("line too long");
+
+ if(line[n-1] != '\n')
+ panicf("invalid line: expected '\n', found '%c'", line[n]);
+
+ line[n-1] = 0;
+
+ if(line[0] == '#' || line[0] == 0)
+ return ParseSkip;
+
+ /* tokenize line into fields */
+ n = 0;
+ field[n] = line;
+ while(*line){
+ if(*line == ';'){
+ *line = 0;
+ field[++n] = line+1;
+ }
+ line++;
+ }
+
+ if(n != nfield-1)
+ panicf("expected %d number of fields, got %d: %s", nfield, n, line);
+
+ return ParseOK;
+}
+
+int
+codepoint(char *s)
+{
+ int c, b;
+
+ c = 0;
+ while((b=*s++)){
+ c <<= 4;
+ if(b >= '0' && b <= '9')
+ c += b - '0';
+ else if(b >= 'A' && b <= 'F')
+ c += b - 'A' + 10;
+ else
+ panicf("bad codepoint char '%c'", b);
+ }
+
+ return c;
+}
+
+void
+codepointrange(io·Stream *utf8, char *field[NumFields], int *start, int *stop)
+{
+ int e, c;
+ char *other[NumFields], line[1024];
+
+ // XXX: the stop variable passes in the previous stopping character
+ e = *stop;
+ c = codepoint(field[Fcode]);
+
+ if(c >= NumRunes)
+ panicf("unexpected large codepoint %x", c);
+ if(c <= e)
+ panicf("bad code sequence: %x then %x", e, c);
+ e = c;
+
+ if(strstr(field[Fname], ", First>") != nil){
+ if(!parse(utf8, arrlen(other), other, arrlen(line), line))
+ panicf("range start at end of file");
+ if(strstr(other[Fname], ", Last>") == nil)
+ panicf("range start not followed by range end");
+
+ e = codepoint(other[Fcode]);
+
+ if(e <= c)
+ panicf("bad code sequence: %x then %x", c, e);
+ if(strcmp(field[Fcategory], other[Fcategory]) != 0)
+ panicf("range with mismatched category");
+ }
+
+ *start = c;
+ *stop = e;
+}
+
+// -----------------------------------------------------------------------
+// output functions
+
+void
+putsearch(void)
+{
+ puts(
+ "#include <u.h>\n"
+ "#include <base/utf.h>\n"
+ "\n"
+ "static\n"
+ "rune*\n"
+ "rangesearch(rune c, rune *t, int n, int ne)\n"
+ "{\n"
+ " rune *p;\n"
+ " int m;\n"
+ " while(n > 1) {\n"
+ " m = n >> 1;\n"
+ " p = t + m*ne;\n"
+ " if(c >= p[0]){\n"
+ " t = p;\n"
+ " n = n-m;\n"
+ " }else\n"
+ " n = m;\n"
+ " }\n"
+ " if(n && c >= t[0])\n"
+ " return t;\n"
+ " return 0;\n"
+ "}\n"
+ );
+
+}
+
+int
+putrange(char *ident, char *prop, int force)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l = 0; l < NumRunes;) {
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l+1; r < NumRunes; r++){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(force || r > l + 1){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r-1);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}
+
+int
+putpair(char *ident, char *prop)
+{
+ int l, r, start;
+
+ start = 0;
+ for(l=0; l+2 < NumRunes; ){
+ if(!prop[l]){
+ l++;
+ continue;
+ }
+
+ for(r = l + 2; r < NumRunes; r += 2){
+ if(!prop[r])
+ break;
+ prop[r] = 0;
+ }
+
+ if(r != l + 2){
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[l] = 0;
+ printf("\t0x%.4x, 0x%.4x,\n", l, r - 2);
+ }
+
+ l = r;
+ }
+
+ if(start)
+ printf("};\n\n");
+ return start;
+}
+
+int
+putsingle(char *ident, char *prop)
+{
+ int i, start;
+
+ start = 0;
+ for(i = 0; i < NumRunes; i++) {
+ if(!prop[i])
+ continue;
+
+ if(!start){
+ printf("static rune %s[] = {\n", ident);
+ start = 1;
+ }
+ prop[i] = 0;
+ printf("\t0x%.4x,\n", i);
+ }
+
+ if(start)
+ printf("};\n\n");
+
+ return start;
+}