aboutsummaryrefslogtreecommitdiff
path: root/sys/base/genutf8.py
diff options
context:
space:
mode:
Diffstat (limited to 'sys/base/genutf8.py')
-rwxr-xr-xsys/base/genutf8.py140
1 files changed, 0 insertions, 140 deletions
diff --git a/sys/base/genutf8.py b/sys/base/genutf8.py
deleted file mode 100755
index 9ce9975..0000000
--- a/sys/base/genutf8.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/bin/python
-
-preamble = """
-Rune*
-rbsearch(Rune c, Rune *t, int n, int nelem)
-{
- Rune *p;
- int m;
-
- while (n > 1) {
- m = n >> 1;
- p = t + m*nelem;
- if (c >= p[0]) {
- t = p;
- n -= m;
- } else n = m;
- }
-
- if (n && c >= t[0]) return t;
- else return 0;
-}
-"""
-
-def findrange(s):
- rs = []
- ss = []
- b, c = None, None
- for elt in sorted(s):
- if b is None:
- b, c = elt, elt
- continue
- if elt == c + 1:
- c = elt
- continue
- else:
- if b == c:
- ss.append(b)
- else:
- rs.append( (b, c) )
- b, c = elt, elt
- return rs, ss
-
-def puttab(s, fd, name):
- lname = name.lower()
- rs, ss = findrange(s)
-
- rtabn = "%s_rtab"%lname
- stabn = "%s_stab"%lname
-
- fd.write("static Rune %s[] = {" % rtabn)
- for i, elts in enumerate(sorted(rs)):
- fd.write("0x%04x, 0x%04x,\n" % (elts[0], elts[1]))
- fd.write("};\n\n")
-
- if len(ss) > 0:
- fd.write("static Rune %s[] = {" % stabn)
- for i, elt in enumerate(sorted(ss)):
- if i % 2 == 0:
- fd.write("\n")
- fd.write("0x%04x, "%(elt))
- fd.write("\n};\n\n")
-
- fd.write("int\n")
- fd.write("utf8ยท%s(Rune c)\n"%name)
- fd.write("{\n")
- fd.write("\tRune *p;\n\n")
- fd.write("\tp = rbsearch(c, %s, arrlen(%s)/2, 2);\n"%(rtabn,rtabn))
- fd.write("\tif (p && c >= p[0] && c <= p[1])\n")
- fd.write("\t\t\treturn 1;\n")
- if len(ss) > 0:
- fd.write("\tp = rbsearch(c, %s, arrlen(%s), 1);\n"%(stabn,stabn))
- fd.write("\tif (p && c == p[0])\n")
- fd.write("\t\treturn 1;\n")
- fd.write("\n\treturn 0;\n")
- fd.write("}\n\n")
-
-RUNEMAX = 0x10FFFF
-
-if __name__ == "__main__":
- alphas = set()
- lowers = set()
- uppers = set()
- titles = set()
- digits = set()
- spaces = set()
-
- tolower = {}
- toupper = {}
- totitle = {}
-
- with open("data/UnicodeData.txt") as utf8:
- for runedata in utf8:
- fields = runedata[:-1].split(";")
- rune = int(fields[0], 16)
- kind = fields[2]
-
- assert rune < RUNEMAX
-
- if kind[0] == 'Z':
- spaces.add(rune)
- elif kind[0] == 'L':
- alphas.add(rune)
-
- if kind[1] == "l":
- lowers.add(rune)
- elif kind[1] == "u":
- uppers.add(rune)
- elif kind[1] == "t":
- titles.add(rune)
-
- if (fields[12] != ""):
- toupper[rune] = int(fields[12], 16)
-
- if (fields[13] != ""):
- tolower[rune] = int(fields[13], 16)
-
- if (fields[14] != ""):
- totitle[rune] = int(fields[14], 16)
-
- elif kind == 'Nd':
- digits.add(rune)
-
- # Some annoying exceptions...
- # These are counted as control characters
- spaces.add(ord('\t'))
- spaces.add(ord('\n'))
- spaces.add(ord('\f'))
- spaces.add(ord('\v'))
- spaces.add(0x85)
- spaces.add(0xfeff)
-
- # TODO: Add toLower/toUpper/toTitle...
- with open("utf8_gen.c", "w") as utf8:
- utf8.write(preamble)
- puttab(spaces, utf8, "IsSpace")
- puttab(lowers, utf8, "IsLower")
- puttab(uppers, utf8, "IsUpper")
- puttab(titles, utf8, "IsTitle")
- puttab(alphas, utf8, "IsLetter")
- puttab(digits, utf8, "IsDigit")