From c8e1e71eb526475dd431443345262c2e5a627831 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sat, 23 Oct 2021 11:17:25 -0700 Subject: chore(rename): libn -> base --- sys/base/.generated/utf8.c | 391 +++++++++++++++++++++++++++++++ sys/base/arg.c | 1 + sys/base/bufio.c | 190 +++++++++++++++ sys/base/coro.c | 66 ++++++ sys/base/coro_unix_x64.s | 113 +++++++++ sys/base/error.c | 57 +++++ sys/base/flate.c | 243 ++++++++++++++++++++ sys/base/fs.c | 192 ++++++++++++++++ sys/base/genutf8.py | 140 ++++++++++++ sys/base/gz.c | 108 +++++++++ sys/base/io.c | 176 ++++++++++++++ sys/base/memory.c | 216 +++++++++++++++++ sys/base/mmap.c | 43 ++++ sys/base/os.c | 30 +++ sys/base/random.c | 303 ++++++++++++++++++++++++ sys/base/rules.mk | 38 +++ sys/base/sort.c | 155 +++++++++++++ sys/base/string.c | 560 +++++++++++++++++++++++++++++++++++++++++++++ sys/base/test.c | 170 ++++++++++++++ 19 files changed, 3192 insertions(+) create mode 100644 sys/base/.generated/utf8.c create mode 100644 sys/base/arg.c create mode 100644 sys/base/bufio.c create mode 100644 sys/base/coro.c create mode 100644 sys/base/coro_unix_x64.s create mode 100644 sys/base/error.c create mode 100644 sys/base/flate.c create mode 100644 sys/base/fs.c create mode 100755 sys/base/genutf8.py create mode 100644 sys/base/gz.c create mode 100644 sys/base/io.c create mode 100644 sys/base/memory.c create mode 100644 sys/base/mmap.c create mode 100644 sys/base/os.c create mode 100644 sys/base/random.c create mode 100644 sys/base/rules.mk create mode 100644 sys/base/sort.c create mode 100644 sys/base/string.c create mode 100644 sys/base/test.c (limited to 'sys/base') diff --git a/sys/base/.generated/utf8.c b/sys/base/.generated/utf8.c new file mode 100644 index 0000000..e101e1a --- /dev/null +++ b/sys/base/.generated/utf8.c @@ -0,0 +1,391 @@ +rune* +rbsearch(rune c, rune* t, int n, int nelem) +{ + rune* p; + int m; + + while (n > 1) { + m = n >> 1; + p = t + m * nelem; + if (c >= p[0]) { + t = p; + n -= m; + } else + n = m; + } + + if (n && c >= t[0]) + return t; + else + return 0; +} +static rune isspace_rtab[] = { + 0x0009, 0x000c, 0x2000, 0x200a, 0x2028, 0x2029, +}; + +static rune isspace_stab[] = { + 0x0020, 0x0085, 0x00a0, 0x1680, 0x202f, 0x205f, 0x3000, +}; + +int +utf8·isspace(rune c) +{ + rune* p; + + p = rbsearch(c, isspace_rtab, arrlen(isspace_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + p = rbsearch(c, isspace_stab, arrlen(isspace_stab), 1); + if (p && c == p[0]) return 1; + + return 0; +} + +static rune islower_rtab[] = { + 0x0061, 0x007a, 0x00df, 0x00f6, 0x00f8, 0x00ff, 0x0137, 0x0138, 0x0148, 0x0149, + 0x017e, 0x0180, 0x018c, 0x018d, 0x0199, 0x019b, 0x01aa, 0x01ab, 0x01b9, 0x01ba, + 0x01bd, 0x01bf, 0x01dc, 0x01dd, 0x01ef, 0x01f0, 0x0233, 0x0239, 0x023f, 0x0240, + 0x024f, 0x0293, 0x0295, 0x02af, 0x037b, 0x037d, 0x03ac, 0x03ce, 0x03d0, 0x03d1, + 0x03d5, 0x03d7, 0x03ef, 0x03f3, 0x03fb, 0x03fc, 0x0430, 0x045f, 0x04ce, 0x04cf, + 0x0560, 0x0588, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x13f8, 0x13fd, 0x1c80, 0x1c88, + 0x1d00, 0x1d2b, 0x1d6b, 0x1d77, 0x1d79, 0x1d9a, 0x1e95, 0x1e9d, 0x1eff, 0x1f07, + 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f50, 0x1f57, + 0x1f60, 0x1f67, 0x1f70, 0x1f7d, 0x1f80, 0x1f87, 0x1f90, 0x1f97, 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fc2, 0x1fc4, 0x1fc6, 0x1fc7, 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, 0x1fe0, 0x1fe7, 0x1ff2, 0x1ff4, 0x1ff6, 0x1ff7, 0x210e, 0x210f, + 0x213c, 0x213d, 0x2146, 0x2149, 0x2c30, 0x2c5e, 0x2c65, 0x2c66, 0x2c73, 0x2c74, + 0x2c76, 0x2c7b, 0x2ce3, 0x2ce4, 0x2d00, 0x2d25, 0xa72f, 0xa731, 0xa771, 0xa778, + 0xa793, 0xa795, 0xab30, 0xab5a, 0xab60, 0xab65, 0xab70, 0xabbf, 0xfb00, 0xfb06, + 0xfb13, 0xfb17, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, + 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1d41a, 0x1d433, 0x1d44e, 0x1d454, 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, 0x1d4b6, 0x1d4b9, 0x1d4bd, 0x1d4c3, 0x1d4c5, 0x1d4cf, 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, 0x1d552, 0x1d56b, 0x1d586, 0x1d59f, 0x1d5ba, 0x1d5d3, 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, 0x1d656, 0x1d66f, 0x1d68a, 0x1d6a5, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, 0x1d716, 0x1d71b, 0x1d736, 0x1d74e, 0x1d750, 0x1d755, 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7c9, +}; + +static rune islower_stab[] = { + 0x00b5, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, 0x010d, 0x010f, 0x0111, 0x0113, + 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, + 0x012b, 0x012d, 0x012f, 0x0131, 0x0133, 0x0135, 0x013a, 0x013c, 0x013e, 0x0140, 0x0142, + 0x0144, 0x0146, 0x014b, 0x014d, 0x014f, 0x0151, 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, + 0x015d, 0x015f, 0x0161, 0x0163, 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, + 0x0173, 0x0175, 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x0192, 0x0195, 0x019e, + 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01ad, 0x01b0, 0x01b4, 0x01b6, 0x01c6, 0x01c9, 0x01cc, + 0x01ce, 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01df, 0x01e1, 0x01e3, 0x01e5, + 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, + 0x0203, 0x0205, 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, + 0x0219, 0x021b, 0x021d, 0x021f, 0x0221, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, 0x022d, + 0x022f, 0x0231, 0x023c, 0x0242, 0x0247, 0x0249, 0x024b, 0x024d, 0x0371, 0x0373, 0x0377, + 0x0390, 0x03d9, 0x03db, 0x03dd, 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, + 0x03ed, 0x03f5, 0x03f8, 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, + 0x0471, 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048b, 0x048d, + 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, 0x04a1, 0x04a3, + 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, 0x04b3, 0x04b5, 0x04b7, 0x04b9, + 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, 0x04c6, 0x04c8, 0x04ca, 0x04cc, 0x04d1, 0x04d3, + 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, + 0x04eb, 0x04ed, 0x04ef, 0x04f1, 0x04f3, 0x04f5, 0x04f7, 0x04f9, 0x04fb, 0x04fd, 0x04ff, + 0x0501, 0x0503, 0x0505, 0x0507, 0x0509, 0x050b, 0x050d, 0x050f, 0x0511, 0x0513, 0x0515, + 0x0517, 0x0519, 0x051b, 0x051d, 0x051f, 0x0521, 0x0523, 0x0525, 0x0527, 0x0529, 0x052b, + 0x052d, 0x052f, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, + 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, + 0x1e29, 0x1e2b, 0x1e2d, 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, + 0x1e3f, 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, 0x1e53, + 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, 0x1e65, 0x1e67, 0x1e69, + 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, + 0x1e81, 0x1e83, 0x1e85, 0x1e87, 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1e9f, + 0x1ea1, 0x1ea3, 0x1ea5, 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, + 0x1eb7, 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, 0x1ecb, + 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, 0x1edd, 0x1edf, 0x1ee1, + 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, + 0x1ef9, 0x1efb, 0x1efd, 0x1fbe, 0x210a, 0x2113, 0x212f, 0x2134, 0x2139, 0x214e, 0x2184, + 0x2c61, 0x2c68, 0x2c6a, 0x2c6c, 0x2c71, 0x2c81, 0x2c83, 0x2c85, 0x2c87, 0x2c89, 0x2c8b, + 0x2c8d, 0x2c8f, 0x2c91, 0x2c93, 0x2c95, 0x2c97, 0x2c99, 0x2c9b, 0x2c9d, 0x2c9f, 0x2ca1, + 0x2ca3, 0x2ca5, 0x2ca7, 0x2ca9, 0x2cab, 0x2cad, 0x2caf, 0x2cb1, 0x2cb3, 0x2cb5, 0x2cb7, + 0x2cb9, 0x2cbb, 0x2cbd, 0x2cbf, 0x2cc1, 0x2cc3, 0x2cc5, 0x2cc7, 0x2cc9, 0x2ccb, 0x2ccd, + 0x2ccf, 0x2cd1, 0x2cd3, 0x2cd5, 0x2cd7, 0x2cd9, 0x2cdb, 0x2cdd, 0x2cdf, 0x2ce1, 0x2cec, + 0x2cee, 0x2cf3, 0x2d27, 0x2d2d, 0xa641, 0xa643, 0xa645, 0xa647, 0xa649, 0xa64b, 0xa64d, + 0xa64f, 0xa651, 0xa653, 0xa655, 0xa657, 0xa659, 0xa65b, 0xa65d, 0xa65f, 0xa661, 0xa663, + 0xa665, 0xa667, 0xa669, 0xa66b, 0xa66d, 0xa681, 0xa683, 0xa685, 0xa687, 0xa689, 0xa68b, + 0xa68d, 0xa68f, 0xa691, 0xa693, 0xa695, 0xa697, 0xa699, 0xa69b, 0xa723, 0xa725, 0xa727, + 0xa729, 0xa72b, 0xa72d, 0xa733, 0xa735, 0xa737, 0xa739, 0xa73b, 0xa73d, 0xa73f, 0xa741, + 0xa743, 0xa745, 0xa747, 0xa749, 0xa74b, 0xa74d, 0xa74f, 0xa751, 0xa753, 0xa755, 0xa757, + 0xa759, 0xa75b, 0xa75d, 0xa75f, 0xa761, 0xa763, 0xa765, 0xa767, 0xa769, 0xa76b, 0xa76d, + 0xa76f, 0xa77a, 0xa77c, 0xa77f, 0xa781, 0xa783, 0xa785, 0xa787, 0xa78c, 0xa78e, 0xa791, + 0xa797, 0xa799, 0xa79b, 0xa79d, 0xa79f, 0xa7a1, 0xa7a3, 0xa7a5, 0xa7a7, 0xa7a9, 0xa7af, + 0xa7b5, 0xa7b7, 0xa7b9, 0xa7fa, 0x1d4bb, 0x1d7cb, +}; + +int +utf8·islower(rune c) +{ + rune* p; + + p = rbsearch(c, islower_rtab, arrlen(islower_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + p = rbsearch(c, islower_stab, arrlen(islower_stab), 1); + if (p && c == p[0]) return 1; + + return 0; +} + +static rune isupper_rtab[] = { + 0x0041, 0x005a, 0x00c0, 0x00d6, 0x00d8, 0x00de, 0x0178, 0x0179, 0x0181, 0x0182, + 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x0191, 0x0193, 0x0194, 0x0196, 0x0198, + 0x019c, 0x019d, 0x019f, 0x01a0, 0x01a6, 0x01a7, 0x01ae, 0x01af, 0x01b1, 0x01b3, + 0x01b7, 0x01b8, 0x01f6, 0x01f8, 0x023a, 0x023b, 0x023d, 0x023e, 0x0243, 0x0246, + 0x0388, 0x038a, 0x038e, 0x038f, 0x0391, 0x03a1, 0x03a3, 0x03ab, 0x03d2, 0x03d4, + 0x03f9, 0x03fa, 0x03fd, 0x042f, 0x04c0, 0x04c1, 0x0531, 0x0556, 0x10a0, 0x10c5, + 0x13a0, 0x13f5, 0x1c90, 0x1cba, 0x1cbd, 0x1cbf, 0x1f08, 0x1f0f, 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, 0x1f38, 0x1f3f, 0x1f48, 0x1f4d, 0x1f68, 0x1f6f, 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, 0x1fd8, 0x1fdb, 0x1fe8, 0x1fec, 0x1ff8, 0x1ffb, 0x210b, 0x210d, + 0x2110, 0x2112, 0x2119, 0x211d, 0x212a, 0x212d, 0x2130, 0x2133, 0x213e, 0x213f, + 0x2c00, 0x2c2e, 0x2c62, 0x2c64, 0x2c6d, 0x2c70, 0x2c7e, 0x2c80, 0xa77d, 0xa77e, + 0xa7aa, 0xa7ae, 0xa7b0, 0xa7b4, 0xff21, 0xff3a, 0x10400, 0x10427, 0x104b0, 0x104d3, + 0x10c80, 0x10cb2, 0x118a0, 0x118bf, 0x16e40, 0x16e5f, 0x1d400, 0x1d419, 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, 0x1d49e, 0x1d49f, 0x1d4a5, 0x1d4a6, 0x1d4a9, 0x1d4ac, 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, 0x1d504, 0x1d505, 0x1d507, 0x1d50a, 0x1d50d, 0x1d514, 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, 0x1d53b, 0x1d53e, 0x1d540, 0x1d544, 0x1d54a, 0x1d550, 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, 0x1d5d4, 0x1d5ed, 0x1d608, 0x1d621, 0x1d63c, 0x1d655, 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, 0x1d6e2, 0x1d6fa, 0x1d71c, 0x1d734, 0x1d756, 0x1d76e, 0x1d790, 0x1d7a8, +}; + +static rune isupper_stab[] = { + 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, 0x0112, 0x0114, + 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012a, + 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, + 0x0143, 0x0145, 0x0147, 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, + 0x015a, 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, 0x016e, + 0x0170, 0x0172, 0x0174, 0x0176, 0x017b, 0x017d, 0x0184, 0x01a2, 0x01a4, 0x01a9, 0x01ac, + 0x01b5, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, + 0x01d9, 0x01db, 0x01de, 0x01e0, 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, + 0x01f1, 0x01f4, 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, + 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, 0x021e, 0x0220, + 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, 0x0232, 0x0241, 0x0248, + 0x024a, 0x024c, 0x024e, 0x0370, 0x0372, 0x0376, 0x037f, 0x0386, 0x038c, 0x03cf, 0x03d8, + 0x03da, 0x03dc, 0x03de, 0x03e0, 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, + 0x03f4, 0x03f7, 0x0460, 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, + 0x0472, 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048a, 0x048c, 0x048e, + 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, 0x04a2, 0x04a4, + 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, 0x04b4, 0x04b6, 0x04b8, 0x04ba, + 0x04bc, 0x04be, 0x04c3, 0x04c5, 0x04c7, 0x04c9, 0x04cb, 0x04cd, 0x04d0, 0x04d2, 0x04d4, + 0x04d6, 0x04d8, 0x04da, 0x04dc, 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, + 0x04ec, 0x04ee, 0x04f0, 0x04f2, 0x04f4, 0x04f6, 0x04f8, 0x04fa, 0x04fc, 0x04fe, 0x0500, + 0x0502, 0x0504, 0x0506, 0x0508, 0x050a, 0x050c, 0x050e, 0x0510, 0x0512, 0x0514, 0x0516, + 0x0518, 0x051a, 0x051c, 0x051e, 0x0520, 0x0522, 0x0524, 0x0526, 0x0528, 0x052a, 0x052c, + 0x052e, 0x10c7, 0x10cd, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, 0x1e0a, 0x1e0c, 0x1e0e, + 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, + 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, + 0x1e3c, 0x1e3e, 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, + 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, 0x1e64, 0x1e66, + 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, + 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, + 0x1e94, 0x1e9e, 0x1ea0, 0x1ea2, 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, + 0x1eb2, 0x1eb4, 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, + 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, 0x1eda, 0x1edc, + 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, + 0x1ef4, 0x1ef6, 0x1ef8, 0x1efa, 0x1efc, 0x1efe, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 0x2102, + 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2145, 0x2183, 0x2c60, 0x2c67, 0x2c69, 0x2c6b, + 0x2c72, 0x2c75, 0x2c82, 0x2c84, 0x2c86, 0x2c88, 0x2c8a, 0x2c8c, 0x2c8e, 0x2c90, 0x2c92, + 0x2c94, 0x2c96, 0x2c98, 0x2c9a, 0x2c9c, 0x2c9e, 0x2ca0, 0x2ca2, 0x2ca4, 0x2ca6, 0x2ca8, + 0x2caa, 0x2cac, 0x2cae, 0x2cb0, 0x2cb2, 0x2cb4, 0x2cb6, 0x2cb8, 0x2cba, 0x2cbc, 0x2cbe, + 0x2cc0, 0x2cc2, 0x2cc4, 0x2cc6, 0x2cc8, 0x2cca, 0x2ccc, 0x2cce, 0x2cd0, 0x2cd2, 0x2cd4, + 0x2cd6, 0x2cd8, 0x2cda, 0x2cdc, 0x2cde, 0x2ce0, 0x2ce2, 0x2ceb, 0x2ced, 0x2cf2, 0xa640, + 0xa642, 0xa644, 0xa646, 0xa648, 0xa64a, 0xa64c, 0xa64e, 0xa650, 0xa652, 0xa654, 0xa656, + 0xa658, 0xa65a, 0xa65c, 0xa65e, 0xa660, 0xa662, 0xa664, 0xa666, 0xa668, 0xa66a, 0xa66c, + 0xa680, 0xa682, 0xa684, 0xa686, 0xa688, 0xa68a, 0xa68c, 0xa68e, 0xa690, 0xa692, 0xa694, + 0xa696, 0xa698, 0xa69a, 0xa722, 0xa724, 0xa726, 0xa728, 0xa72a, 0xa72c, 0xa72e, 0xa732, + 0xa734, 0xa736, 0xa738, 0xa73a, 0xa73c, 0xa73e, 0xa740, 0xa742, 0xa744, 0xa746, 0xa748, + 0xa74a, 0xa74c, 0xa74e, 0xa750, 0xa752, 0xa754, 0xa756, 0xa758, 0xa75a, 0xa75c, 0xa75e, + 0xa760, 0xa762, 0xa764, 0xa766, 0xa768, 0xa76a, 0xa76c, 0xa76e, 0xa779, 0xa77b, 0xa780, + 0xa782, 0xa784, 0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798, 0xa79a, 0xa79c, + 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7b6, 0xa7b8, 0x1d49c, 0x1d4a2, 0x1d546, + 0x1d7ca, +}; + +int +utf8·isupper(rune c) +{ + rune* p; + + p = rbsearch(c, isupper_rtab, arrlen(isupper_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + p = rbsearch(c, isupper_stab, arrlen(isupper_stab), 1); + if (p && c == p[0]) return 1; + + return 0; +} + +static rune istitle_rtab[] = { + 0x1f88, 0x1f8f, 0x1f98, 0x1f9f, 0x1fa8, 0x1faf, +}; + +static rune istitle_stab[] = { + 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1fbc, 0x1fcc, +}; + +int +utf8·istitle(rune c) +{ + rune* p; + + p = rbsearch(c, istitle_rtab, arrlen(istitle_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + p = rbsearch(c, istitle_stab, arrlen(istitle_stab), 1); + if (p && c == p[0]) return 1; + + return 0; +} + +static rune isletter_rtab[] = { + 0x0041, 0x005a, 0x0061, 0x007a, 0x00c0, 0x00d6, 0x00d8, 0x00f6, 0x00f8, 0x02c1, + 0x02c6, 0x02d1, 0x02e0, 0x02e4, 0x0370, 0x0374, 0x0376, 0x0377, 0x037a, 0x037d, + 0x0388, 0x038a, 0x038e, 0x03a1, 0x03a3, 0x03f5, 0x03f7, 0x0481, 0x048a, 0x052f, + 0x0531, 0x0556, 0x0560, 0x0588, 0x05d0, 0x05ea, 0x05ef, 0x05f2, 0x0620, 0x064a, + 0x066e, 0x066f, 0x0671, 0x06d3, 0x06e5, 0x06e6, 0x06ee, 0x06ef, 0x06fa, 0x06fc, + 0x0712, 0x072f, 0x074d, 0x07a5, 0x07ca, 0x07ea, 0x07f4, 0x07f5, 0x0800, 0x0815, + 0x0840, 0x0858, 0x0860, 0x086a, 0x08a0, 0x08b4, 0x08b6, 0x08bd, 0x0904, 0x0939, + 0x0958, 0x0961, 0x0971, 0x0980, 0x0985, 0x098c, 0x098f, 0x0990, 0x0993, 0x09a8, + 0x09aa, 0x09b0, 0x09b6, 0x09b9, 0x09dc, 0x09dd, 0x09df, 0x09e1, 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, 0x0a0f, 0x0a10, 0x0a13, 0x0a28, 0x0a2a, 0x0a30, 0x0a32, 0x0a33, + 0x0a35, 0x0a36, 0x0a38, 0x0a39, 0x0a59, 0x0a5c, 0x0a72, 0x0a74, 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, 0x0a93, 0x0aa8, 0x0aaa, 0x0ab0, 0x0ab2, 0x0ab3, 0x0ab5, 0x0ab9, + 0x0ae0, 0x0ae1, 0x0b05, 0x0b0c, 0x0b0f, 0x0b10, 0x0b13, 0x0b28, 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, 0x0b35, 0x0b39, 0x0b5c, 0x0b5d, 0x0b5f, 0x0b61, 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, 0x0b92, 0x0b95, 0x0b99, 0x0b9a, 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, 0x0bae, 0x0bb9, 0x0c05, 0x0c0c, 0x0c0e, 0x0c10, 0x0c12, 0x0c28, + 0x0c2a, 0x0c39, 0x0c58, 0x0c5a, 0x0c60, 0x0c61, 0x0c85, 0x0c8c, 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, 0x0caa, 0x0cb3, 0x0cb5, 0x0cb9, 0x0ce0, 0x0ce1, 0x0cf1, 0x0cf2, + 0x0d05, 0x0d0c, 0x0d0e, 0x0d10, 0x0d12, 0x0d3a, 0x0d54, 0x0d56, 0x0d5f, 0x0d61, + 0x0d7a, 0x0d7f, 0x0d85, 0x0d96, 0x0d9a, 0x0db1, 0x0db3, 0x0dbb, 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, 0x0e32, 0x0e33, 0x0e40, 0x0e46, 0x0e81, 0x0e82, 0x0e87, 0x0e88, + 0x0e94, 0x0e97, 0x0e99, 0x0e9f, 0x0ea1, 0x0ea3, 0x0eaa, 0x0eab, 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, 0x0ec0, 0x0ec4, 0x0edc, 0x0edf, 0x0f40, 0x0f47, 0x0f49, 0x0f6c, + 0x0f88, 0x0f8c, 0x1000, 0x102a, 0x1050, 0x1055, 0x105a, 0x105d, 0x1065, 0x1066, + 0x106e, 0x1070, 0x1075, 0x1081, 0x10a0, 0x10c5, 0x10d0, 0x10fa, 0x10fc, 0x1248, + 0x124a, 0x124d, 0x1250, 0x1256, 0x125a, 0x125d, 0x1260, 0x1288, 0x128a, 0x128d, + 0x1290, 0x12b0, 0x12b2, 0x12b5, 0x12b8, 0x12be, 0x12c2, 0x12c5, 0x12c8, 0x12d6, + 0x12d8, 0x1310, 0x1312, 0x1315, 0x1318, 0x135a, 0x1380, 0x138f, 0x13a0, 0x13f5, + 0x13f8, 0x13fd, 0x1401, 0x166c, 0x166f, 0x167f, 0x1681, 0x169a, 0x16a0, 0x16ea, + 0x16f1, 0x16f8, 0x1700, 0x170c, 0x170e, 0x1711, 0x1720, 0x1731, 0x1740, 0x1751, + 0x1760, 0x176c, 0x176e, 0x1770, 0x1780, 0x17b3, 0x1820, 0x1878, 0x1880, 0x1884, + 0x1887, 0x18a8, 0x18b0, 0x18f5, 0x1900, 0x191e, 0x1950, 0x196d, 0x1970, 0x1974, + 0x1980, 0x19ab, 0x19b0, 0x19c9, 0x1a00, 0x1a16, 0x1a20, 0x1a54, 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, 0x1b83, 0x1ba0, 0x1bae, 0x1baf, 0x1bba, 0x1be5, 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, 0x1c5a, 0x1c7d, 0x1c80, 0x1c88, 0x1c90, 0x1cba, 0x1cbd, 0x1cbf, + 0x1ce9, 0x1cec, 0x1cee, 0x1cf1, 0x1cf5, 0x1cf6, 0x1d00, 0x1dbf, 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, 0x1f20, 0x1f45, 0x1f48, 0x1f4d, 0x1f50, 0x1f57, 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, 0x1fb6, 0x1fbc, 0x1fc2, 0x1fc4, 0x1fc6, 0x1fcc, 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, 0x1fe0, 0x1fec, 0x1ff2, 0x1ff4, 0x1ff6, 0x1ffc, 0x2090, 0x209c, + 0x210a, 0x2113, 0x2119, 0x211d, 0x212a, 0x212d, 0x212f, 0x2139, 0x213c, 0x213f, + 0x2145, 0x2149, 0x2183, 0x2184, 0x2c00, 0x2c2e, 0x2c30, 0x2c5e, 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, 0x2cf2, 0x2cf3, 0x2d00, 0x2d25, 0x2d30, 0x2d67, 0x2d80, 0x2d96, + 0x2da0, 0x2da6, 0x2da8, 0x2dae, 0x2db0, 0x2db6, 0x2db8, 0x2dbe, 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, 0x2dd0, 0x2dd6, 0x2dd8, 0x2dde, 0x3005, 0x3006, 0x3031, 0x3035, + 0x303b, 0x303c, 0x3041, 0x3096, 0x309d, 0x309f, 0x30a1, 0x30fa, 0x30fc, 0x30ff, + 0x3105, 0x312f, 0x3131, 0x318e, 0x31a0, 0x31ba, 0x31f0, 0x31ff, 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, 0xa500, 0xa60c, 0xa610, 0xa61f, 0xa62a, 0xa62b, 0xa640, 0xa66e, + 0xa67f, 0xa69d, 0xa6a0, 0xa6e5, 0xa717, 0xa71f, 0xa722, 0xa788, 0xa78b, 0xa7b9, + 0xa7f7, 0xa801, 0xa803, 0xa805, 0xa807, 0xa80a, 0xa80c, 0xa822, 0xa840, 0xa873, + 0xa882, 0xa8b3, 0xa8f2, 0xa8f7, 0xa8fd, 0xa8fe, 0xa90a, 0xa925, 0xa930, 0xa946, + 0xa960, 0xa97c, 0xa984, 0xa9b2, 0xa9e0, 0xa9e4, 0xa9e6, 0xa9ef, 0xa9fa, 0xa9fe, + 0xaa00, 0xaa28, 0xaa40, 0xaa42, 0xaa44, 0xaa4b, 0xaa60, 0xaa76, 0xaa7e, 0xaaaf, + 0xaab5, 0xaab6, 0xaab9, 0xaabd, 0xaadb, 0xaadd, 0xaae0, 0xaaea, 0xaaf2, 0xaaf4, + 0xab01, 0xab06, 0xab09, 0xab0e, 0xab11, 0xab16, 0xab20, 0xab26, 0xab28, 0xab2e, + 0xab30, 0xab5a, 0xab5c, 0xab65, 0xab70, 0xabe2, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, 0xfa70, 0xfad9, 0xfb00, 0xfb06, 0xfb13, 0xfb17, 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, 0xfb38, 0xfb3c, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, 0xfd50, 0xfd8f, 0xfd92, 0xfdc7, 0xfdf0, 0xfdfb, 0xfe70, 0xfe74, + 0xfe76, 0xfefc, 0xff21, 0xff3a, 0xff41, 0xff5a, 0xff66, 0xffbe, 0xffc2, 0xffc7, + 0xffca, 0xffcf, 0xffd2, 0xffd7, 0xffda, 0xffdc, 0x10000, 0x1000b, 0x1000d, 0x10026, + 0x10028, 0x1003a, 0x1003c, 0x1003d, 0x1003f, 0x1004d, 0x10050, 0x1005d, 0x10080, 0x100fa, + 0x10280, 0x1029c, 0x102a0, 0x102d0, 0x10300, 0x1031f, 0x1032d, 0x10340, 0x10342, 0x10349, + 0x10350, 0x10375, 0x10380, 0x1039d, 0x103a0, 0x103c3, 0x103c8, 0x103cf, 0x10400, 0x1049d, + 0x104b0, 0x104d3, 0x104d8, 0x104fb, 0x10500, 0x10527, 0x10530, 0x10563, 0x10600, 0x10736, + 0x10740, 0x10755, 0x10760, 0x10767, 0x10800, 0x10805, 0x1080a, 0x10835, 0x10837, 0x10838, + 0x1083f, 0x10855, 0x10860, 0x10876, 0x10880, 0x1089e, 0x108e0, 0x108f2, 0x108f4, 0x108f5, + 0x10900, 0x10915, 0x10920, 0x10939, 0x10980, 0x109b7, 0x109be, 0x109bf, 0x10a10, 0x10a13, + 0x10a15, 0x10a17, 0x10a19, 0x10a35, 0x10a60, 0x10a7c, 0x10a80, 0x10a9c, 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, 0x10b00, 0x10b35, 0x10b40, 0x10b55, 0x10b60, 0x10b72, 0x10b80, 0x10b91, + 0x10c00, 0x10c48, 0x10c80, 0x10cb2, 0x10cc0, 0x10cf2, 0x10d00, 0x10d23, 0x10f00, 0x10f1c, + 0x10f30, 0x10f45, 0x11003, 0x11037, 0x11083, 0x110af, 0x110d0, 0x110e8, 0x11103, 0x11126, + 0x11150, 0x11172, 0x11183, 0x111b2, 0x111c1, 0x111c4, 0x11200, 0x11211, 0x11213, 0x1122b, + 0x11280, 0x11286, 0x1128a, 0x1128d, 0x1128f, 0x1129d, 0x1129f, 0x112a8, 0x112b0, 0x112de, + 0x11305, 0x1130c, 0x1130f, 0x11310, 0x11313, 0x11328, 0x1132a, 0x11330, 0x11332, 0x11333, + 0x11335, 0x11339, 0x1135d, 0x11361, 0x11400, 0x11434, 0x11447, 0x1144a, 0x11480, 0x114af, + 0x114c4, 0x114c5, 0x11580, 0x115ae, 0x115d8, 0x115db, 0x11600, 0x1162f, 0x11680, 0x116aa, + 0x11700, 0x1171a, 0x11800, 0x1182b, 0x118a0, 0x118df, 0x11a0b, 0x11a32, 0x11a5c, 0x11a83, + 0x11a86, 0x11a89, 0x11ac0, 0x11af8, 0x11c00, 0x11c08, 0x11c0a, 0x11c2e, 0x11c72, 0x11c8f, + 0x11d00, 0x11d06, 0x11d08, 0x11d09, 0x11d0b, 0x11d30, 0x11d60, 0x11d65, 0x11d67, 0x11d68, + 0x11d6a, 0x11d89, 0x11ee0, 0x11ef2, 0x12000, 0x12399, 0x12480, 0x12543, 0x13000, 0x1342e, + 0x14400, 0x14646, 0x16800, 0x16a38, 0x16a40, 0x16a5e, 0x16ad0, 0x16aed, 0x16b00, 0x16b2f, + 0x16b40, 0x16b43, 0x16b63, 0x16b77, 0x16b7d, 0x16b8f, 0x16e40, 0x16e7f, 0x16f00, 0x16f44, + 0x16f93, 0x16f9f, 0x16fe0, 0x16fe1, 0x18800, 0x18af2, 0x1b000, 0x1b11e, 0x1b170, 0x1b2fb, + 0x1bc00, 0x1bc6a, 0x1bc70, 0x1bc7c, 0x1bc80, 0x1bc88, 0x1bc90, 0x1bc99, 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, 0x1d49e, 0x1d49f, 0x1d4a5, 0x1d4a6, 0x1d4a9, 0x1d4ac, 0x1d4ae, 0x1d4b9, + 0x1d4bd, 0x1d4c3, 0x1d4c5, 0x1d505, 0x1d507, 0x1d50a, 0x1d50d, 0x1d514, 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, 0x1d53b, 0x1d53e, 0x1d540, 0x1d544, 0x1d54a, 0x1d550, 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6fa, 0x1d6fc, 0x1d714, 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, 0x1d750, 0x1d76e, 0x1d770, 0x1d788, 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, 0x1e800, 0x1e8c4, 0x1e900, 0x1e943, 0x1ee00, 0x1ee03, 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, 0x1ee29, 0x1ee32, 0x1ee34, 0x1ee37, 0x1ee4d, 0x1ee4f, 0x1ee51, 0x1ee52, + 0x1ee61, 0x1ee62, 0x1ee67, 0x1ee6a, 0x1ee6c, 0x1ee72, 0x1ee74, 0x1ee77, 0x1ee79, 0x1ee7c, + 0x1ee80, 0x1ee89, 0x1ee8b, 0x1ee9b, 0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, +}; + +static rune isletter_stab[] = { + 0x00aa, 0x00b5, 0x00ba, 0x02ec, 0x02ee, 0x037f, 0x0386, 0x038c, 0x0559, 0x06d5, + 0x06ff, 0x0710, 0x07b1, 0x07fa, 0x081a, 0x0824, 0x0828, 0x093d, 0x0950, 0x09b2, + 0x09bd, 0x09ce, 0x09fc, 0x0a5e, 0x0abd, 0x0ad0, 0x0af9, 0x0b3d, 0x0b71, 0x0b83, + 0x0b9c, 0x0bd0, 0x0c3d, 0x0c80, 0x0cbd, 0x0cde, 0x0d3d, 0x0d4e, 0x0dbd, 0x0e84, + 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0ebd, 0x0ec6, 0x0f00, 0x103f, 0x1061, 0x108e, + 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x17d7, 0x17dc, 0x18aa, 0x1aa7, 0x1f59, 0x1f5b, + 0x1f5d, 0x1fbe, 0x2071, 0x207f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, + 0x214e, 0x2d27, 0x2d2d, 0x2d6f, 0x2e2f, 0x3400, 0x4db5, 0x4e00, 0x9fef, 0xa8fb, + 0xa9cf, 0xaa7a, 0xaab1, 0xaac0, 0xaac2, 0xac00, 0xd7a3, 0xfb1d, 0xfb3e, 0x10808, + 0x1083c, 0x10a00, 0x10f27, 0x11144, 0x11176, 0x111da, 0x111dc, 0x11288, 0x1133d, 0x11350, + 0x114c7, 0x11644, 0x118ff, 0x11a00, 0x11a3a, 0x11a50, 0x11a9d, 0x11c40, 0x11d46, 0x11d98, + 0x16f50, 0x17000, 0x187f1, 0x1d4a2, 0x1d4bb, 0x1d546, 0x1ee24, 0x1ee27, 0x1ee39, 0x1ee3b, + 0x1ee42, 0x1ee47, 0x1ee49, 0x1ee4b, 0x1ee54, 0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f, + 0x1ee64, 0x1ee7e, 0x20000, 0x2a6d6, 0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2b820, 0x2cea1, + 0x2ceb0, 0x2ebe0, +}; + +int +utf8·isletter(rune c) +{ + rune* p; + + p = rbsearch(c, isletter_rtab, arrlen(isletter_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + p = rbsearch(c, isletter_stab, arrlen(isletter_stab), 1); + if (p && c == p[0]) return 1; + + return 0; +} + +static rune isdigit_rtab[] = { + 0x0030, 0x0039, 0x0660, 0x0669, 0x06f0, 0x06f9, 0x07c0, 0x07c9, 0x0966, 0x096f, + 0x09e6, 0x09ef, 0x0a66, 0x0a6f, 0x0ae6, 0x0aef, 0x0b66, 0x0b6f, 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, 0x0ce6, 0x0cef, 0x0d66, 0x0d6f, 0x0de6, 0x0def, 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, 0x0f20, 0x0f29, 0x1040, 0x1049, 0x1090, 0x1099, 0x17e0, 0x17e9, + 0x1810, 0x1819, 0x1946, 0x194f, 0x19d0, 0x19d9, 0x1a80, 0x1a89, 0x1a90, 0x1a99, + 0x1b50, 0x1b59, 0x1bb0, 0x1bb9, 0x1c40, 0x1c49, 0x1c50, 0x1c59, 0xa620, 0xa629, + 0xa8d0, 0xa8d9, 0xa900, 0xa909, 0xa9d0, 0xa9d9, 0xa9f0, 0xa9f9, 0xaa50, 0xaa59, + 0xabf0, 0xabf9, 0xff10, 0xff19, 0x104a0, 0x104a9, 0x10d30, 0x10d39, 0x11066, 0x1106f, + 0x110f0, 0x110f9, 0x11136, 0x1113f, 0x111d0, 0x111d9, 0x112f0, 0x112f9, 0x11450, 0x11459, + 0x114d0, 0x114d9, 0x11650, 0x11659, 0x116c0, 0x116c9, 0x11730, 0x11739, 0x118e0, 0x118e9, + 0x11c50, 0x11c59, 0x11d50, 0x11d59, 0x11da0, 0x11da9, 0x16a60, 0x16a69, 0x16b50, 0x16b59, + 0x1d7ce, 0x1d7ff, +}; + +int +utf8·isdigit(rune c) +{ + rune* p; + + p = rbsearch(c, isdigit_rtab, arrlen(isdigit_rtab) / 2, 2); + if (p && c >= p[0] && c <= p[1]) return 1; + + return 0; +} diff --git a/sys/base/arg.c b/sys/base/arg.c new file mode 100644 index 0000000..64e4dd6 --- /dev/null +++ b/sys/base/arg.c @@ -0,0 +1 @@ +char *argv0; diff --git a/sys/base/bufio.c b/sys/base/bufio.c new file mode 100644 index 0000000..7ebee68 --- /dev/null +++ b/sys/base/bufio.c @@ -0,0 +1,190 @@ +#include +#include + +// ----------------------------------------------------------------------- +// reader + +error +bufio·initreader(io·Buffer *buf, io·Reader rdr, void *h) +{ + if (buf->state) { + errorf("attemped to initialize an active buffer, state is '%d'", buf->state); + return bufio·err; + } + buf->state = bufio·rdr; + buf->runesize = 0; + buf->h = h; + buf->rdr = rdr; + buf->beg = buf->buf + bufio·ungets; + buf->pos = buf->beg; + buf->end = buf->pos; + buf->size = bufio·size - bufio·ungets; + + return 0; +} + +void +bufio·finireader(io·Buffer *buf) +{ + buf->state = bufio·nil; + buf->runesize = 0; + buf->rdr = (io·Reader){ .read = nil }; +} + +static +int +refill(io·Buffer *buf) +{ + int n; + + if (buf->state & bufio·end) { + return bufio·err; + } + memcpy(buf->buf, buf->pos - bufio·ungets, bufio·ungets); + + n = buf->rdr.read(buf->h, 1, buf->size, buf->beg); + if (n < 0) + return bufio·err; + if (n == 0) { + buf->state |= bufio·end; + return 0; + } + + buf->pos = buf->beg; + buf->end = buf->pos + n; + + // TEST: put a physical EOF byte at the end + // this would allow for an unget operation + if (n < buf->size) + *buf->end++ = EOF; + + return n; +} + +int +bufio·getbyte(io·Buffer *buf) +{ +getbyte: + if (buf->pos < buf->end) { + return *buf->pos++; + } + + memmove(buf->buf, buf->end - bufio·ungets, bufio·ungets); + + if (refill(buf) <= 0) + return bufio·eof; + + goto getbyte; +} + +error +bufio·ungetbyte(io·Buffer *buf, byte c) +{ + if (!(buf->state & bufio·rdr)) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } + + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } + + buf->pos--; + return 0; +} + +rune +bufio·getrune(io·Buffer *buf) +{ + ubyte b; + int i; + byte str[UTFmax+1]; + rune r; + + // NOTE: I'm worried about the sign here... + b = bufio·getbyte(buf); + if (b < RuneSelf) { + buf->runesize = 1; + return b; + } + + i = 0; + str[i++] = b; + +nextbyte: + b = bufio·getbyte(buf); + if (b < 0) return b; + if (i >= arrlen(str)) return RuneErr; + str[i++] = b; + if (!utf8·fullrune(str, i)) + goto nextbyte; + + buf->runesize = utf8·bytetorune(&r, str); + if (r == RuneErr && b == 1) { + errorf("illegal UTF-8 sequence"); + for (; i >= 0; i--) + errorf("%s%.2x", i > 0 ? " " : "", *(ubyte*)(str+i)); + errorf("\n"); + + buf->runesize = 0; + } else + for (; i > buf->runesize; i--) + bufio·ungetbyte(buf, str[i]); + + return r; +} + +// TODO: Check that we are given the correct rune! +error +bufio·ungetrune(io·Buffer *buf, rune r) +{ + if (buf->state & bufio·rdr) { + errorf("attempted to unget on non-active reader"); + return bufio·err; + } + + if (buf->pos == buf->buf) { + errorf("attempted to unget past end of buffer"); + return bufio·err; + } + + buf->pos -= buf->runesize; + return 0; +} + +int +bufio·read(io·Buffer *buf, int sz, int n, void *out) +{ + byte *wtr; + int nr, rem, diff; + + if (n == 0 || buf->state & bufio·end) + return bufio·err; + + assert(buf->state & bufio·rdr); + + wtr = out; + rem = n*sz; + while (rem > 0) { + diff = buf->end - buf->pos; + nr = MIN(diff, rem); + if (!nr) { + if (buf->state & bufio·end) + break; + if (refill(buf) <= 0) + break; + + continue; + } + memmove(wtr, buf->pos, nr); + wtr += nr; + buf->pos += nr; + rem -= nr; + } + + return n - rem/sz; +} + +// ----------------------------------------------------------------------- +// writer diff --git a/sys/base/coro.c b/sys/base/coro.c new file mode 100644 index 0000000..953dee0 --- /dev/null +++ b/sys/base/coro.c @@ -0,0 +1,66 @@ +#include +#include + +// ----------------------------------------------------------------------- +// Assembly routines + +extern void _newcoro(Coro *co, uintptr (*func)(Coro*, uintptr), void *stk); +extern uintptr _coroyield(Coro *co, uintptr arg); + +// ----------------------------------------------------------------------- +// Globals + +// static thread_local coro *CONTEXT; + +// ----------------------------------------------------------------------- +// C interface + +/* Co-routine context */ +struct Coro +{ + void* sp; + void* bp; + uintptr size; + void* user; +}; + +Coro* +coro·make(uintptr stk, uintptr (*func)(Coro*, uintptr)) +{ + if (!func) return nil; + if (stk == 0) stk = 8192; + + byte *block = malloc(stk); + Coro *co = (Coro*)&block[stk - sizeof(Coro)]; + co->bp = block; + co->size = stk; + + _newcoro(co, func, co); + return co; +} + +error +coro·free(Coro *co) +{ + enum + { + NIL, + GOOD, + EMPTY, + LOST, + }; + + if (!co) return NIL; + if (!co->bp) return LOST; + if (co->size == 0) return EMPTY; + + free(co->bp); + + return GOOD; +} + +uintptr +coro·yield(Coro *c, uintptr arg) +{ + return _coroyield(c, arg); +} diff --git a/sys/base/coro_unix_x64.s b/sys/base/coro_unix_x64.s new file mode 100644 index 0000000..d7de2a2 --- /dev/null +++ b/sys/base/coro_unix_x64.s @@ -0,0 +1,113 @@ +; Nicholas Noll 2019 +; +; =================================================================== +%use altreg + + bits 64 + default rel + global _newcoro + global _coroyield + +; =================================================================== + section .text +; ------------------------------------------------------------------- + +%assign L.coro -8 +%assign L.func -16 + +coroinit: + mov R7, [RBP + L.coro] + mov R6, R0 + call [RBP + L.func] + +rerun: + mov R7, [RBP + L.coro] + mov R6, R0 + call _coroyield + jmp rerun + +; ------------------------------------------------------------------- +; # Register Mapping +; +; R0 R1 R2 R3 R4 R5 R6 R7 R8 ... +; RAX RCX RDX RBX RSP RBP RSI RDI R8 ... +; +; # Sys V calling convention +; func(R7, R6, R2, R1, R8, R9, Z0-7): R0 +; +; # Stack layout of an in-flight coro +; *coro +; *func +; *bp (base pointer of stack) +; ....... STACK ......... +; Saved Clobbers +; +; ### +; Stack layout of an init coro +; Stores the func pointer to init +; Stores the clobber registers. +; +; L.coro [8] +; L.func [7] +; coroinit [6] +; RBP [5] +; R3 [4] +; R12 [3] +; R13 [2] +; R14 [1] +; R15 [0] + +%define WORDSZ 8 +%define NSAVES 9 + +; coro *coro·new(co *coro, fn func, bp *stack) +_newcoro: + lea R0, [coroinit] ; Store address of init function + lea R1, [R2 - NSAVES*WORDSZ] ; Store offset address of stack + + mov [R1 + 8*WORDSZ], R7 ; Store context pointer + mov [R1 + 7*WORDSZ], R6 ; Store function pointer + mov [R1 + 6*WORDSZ], R0 ; Store initializer pointer + mov [R1 + 5*WORDSZ], R2 ; Store stack base pointer + + xor R0, R0 + + ; Start of mutable stack + ; Blank out the clobbers + mov [R1 + 4*WORDSZ], R0 ; R3 + mov [R1 + 3*WORDSZ], R0 ; R12 + mov [R1 + 2*WORDSZ], R0 ; R13 + mov [R1 + 1*WORDSZ], R0 ; R14 + mov [R1 + 0*WORDSZ], R0 ; R15 + + mov [R7], R1 + ret + +; Saves register state +%macro pushclobs 0 + push RBP + push R3 + push R12 + push R13 + push R14 + push R15 +%endmacro + +; Restores register state +%macro popclobs 0 + pop R15 + pop R14 + pop R13 + pop R12 + pop R3 + pop RBP +%endmacro + +; uintptr coro.yield(co *coro, data uintptr) +_coroyield: + pushclobs + mov R0, R6 ; Move return value into return register. + xchg RSP, [R7] ; Atomically swap the stack pointer with the yieldee. + popclobs + + ret diff --git a/sys/base/error.c b/sys/base/error.c new file mode 100644 index 0000000..dc6421d --- /dev/null +++ b/sys/base/error.c @@ -0,0 +1,57 @@ +#include +#include + +void +exits(char *s) +{ + if (s == nil || *s == 0) + exit(0); + + fputs(s, stderr); + exit(1); +} + +void +errorf(byte* fmt, ...) +{ + va_list args; + va_start(args, fmt); + + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + + va_end(args); +} + +void +verrorf(byte* fmt, va_list args) +{ + printf("error: "); + vprintf(fmt, args); + printf("\n"); +} + +void +panicf(byte* fmt, ...) +{ + va_list args; + va_start(args, fmt); + + printf("panic: "); + vprintf(fmt, args); + printf("\n"); + + va_end(args); + + exit(1); +} + +void +vpanicf(byte* fmt, va_list args) +{ + printf("panic: "); + vprintf(fmt, args); + printf("\n"); + + exit(1); +} diff --git a/sys/base/flate.c b/sys/base/flate.c new file mode 100644 index 0000000..5930fd1 --- /dev/null +++ b/sys/base/flate.c @@ -0,0 +1,243 @@ +#include +#include + +#include + +typedef struct buffer +{ + union { + struct z_stream_s; + z_stream z; + }; + + ubyte buf[4098]; +} buffer; + +// ----------------------------------------------------------------------- +// Reading + +typedef struct flate·Reader +{ + io·Reader rdr; + void* impl; + + union { + struct buffer; + buffer b; + }; +} flate·Reader; + +flate·Reader* +flate·openreader(io·Reader rdr, void* r, mem·Allocator mem, void* m) +{ + error err; + flate·Reader *zrdr; + + zrdr = mem.alloc(m, 1, sizeof(*zrdr)); + + zrdr->zalloc = (void *(*)(void *, unsigned int, unsigned int))mem.alloc; + zrdr->zfree = mem.free; + zrdr->opaque = m; + zrdr->avail_in = rdr.read(r, 1, arrlen(zrdr->buf), zrdr->buf); + zrdr->next_in = zrdr->buf; + + err = inflateInit(&zrdr->b.z); + + switch (err) { + case Z_OK: + return zrdr; + + case Z_MEM_ERROR: + errorf("zlib: not enough memory"); + goto ERROR; + + case Z_VERSION_ERROR: + errorf("zlib: incompatible version"); + goto ERROR; + + case Z_STREAM_ERROR: + errorf("zlib: incorrect input parameters"); + goto ERROR; + + default: + errorf("zlib: unrecognized error code"); + } +ERROR: + errorf("zlib: msg: %s", zrdr->msg); + mem.free(m, zrdr); + return nil; +} + +error +flate·closereader(flate·Reader *rdr) +{ + int err; + flate·Reader zrdr; + + zrdr = *rdr; + err = inflateEnd(&zrdr.b.z); + if (err != Z_OK) { + errorf("zlib: failed to cleanup"); + return err; + } + rdr->zfree(rdr->opaque, rdr); + + return 0; +} + +int +flate·read(flate·Reader *rdr, int sz, int n, void *buf) +{ + int r; + int err; + flate·Reader zrdr; + + zrdr = *rdr; + zrdr.next_out = buf; + zrdr.avail_out = n*sz; + +READ: + err = inflate(&zrdr.b.z, Z_STREAM_END); + switch (err) { + case Z_OK: + return n; + + case Z_STREAM_END: + r = zrdr.next_out - (ubyte*)buf; + n -= r; + zrdr.avail_in = zrdr.rdr.read(zrdr.impl, 1, arrlen(zrdr.buf), zrdr.buf); + if (!zrdr.avail_in) { + return r; + } + zrdr.next_in = zrdr.buf; + goto READ; + + case Z_NEED_DICT: + errorf("zlib: need input dictionary"); + goto ERROR; + + case Z_STREAM_ERROR: + errorf("zlib: inconsistent stream structure"); + goto ERROR; + } +ERROR: + flate·closereader(rdr); + return -1; +} + +// ----------------------------------------------------------------------- +// Writing + +struct flate·Writer +{ + io·Writer wtr; + void* impl; + + union { + struct buffer; + buffer b; + }; +}; + +flate·Writer* +flate·openwriter(io·Writer wtr, void* w, mem·Allocator mem, void* m) +{ + error err; + flate·Writer *zwtr; + + zwtr = mem.alloc(m, 1, sizeof(*zwtr)); + zwtr->zalloc = (void *(*)(void *, unsigned int, unsigned int))mem.alloc; + zwtr->zfree = mem.free; + zwtr->opaque = m; + zwtr->avail_in = 0; + + err = deflateInit(&zwtr->b.z, Z_DEFAULT_COMPRESSION); + + switch (err) { + case Z_OK: + return zwtr; + + case Z_MEM_ERROR: + errorf("zlib: not enough memory"); + goto ERROR; + + case Z_VERSION_ERROR: + errorf("zlib: incompatible version"); + goto ERROR; + + case Z_STREAM_ERROR: + errorf("zlib: incorrect compression level"); + goto ERROR; + + default: + errorf("zlib: unrecognized error code"); + } +ERROR: + errorf("zlib: msg: %s", zwtr->msg); + mem.free(m, zwtr); + return nil; +} + +error +flate·closewriter(flate·Writer *wtr) +{ + int err; + flate·Writer zwtr; + + zwtr = *wtr; + err = deflateEnd(&zwtr.b.z); + if (err != Z_OK) { + errorf("zlib: failed to cleanup"); + return err; + } + zwtr.zfree(zwtr.opaque, wtr); + + return 0; +} + +int +flate·write(flate·Writer *wtr, int sz, int n, void *buf) +{ + int r; + int err; + flate·Writer zwtr; + + zwtr = *wtr; + zwtr.next_out = buf; +DEFLATE: + zwtr.avail_out = n*sz; + err = deflate(&zwtr.z, Z_NO_FLUSH); + + switch (err) { + case Z_STREAM_END: + return n; + + case Z_OK: + r = (zwtr.next_out - (ubyte*)buf)/sz; + n -= r; + if (!n) { + return r; + } + buf += n; + goto DEFLATE; + + case Z_STREAM_ERROR: + errorf("zlib: bad input"); + goto ERROR; + + case Z_BUF_ERROR: + if (!zwtr.avail_in) { + zwtr.avail_in += zwtr.wtr.write(zwtr.impl, 1, arrlen(zwtr.buf), buf); + if (!zwtr.avail_in) { + errorf("reader: failed read"); + goto ERROR; + } + goto DEFLATE; + } + } + + return 0; +ERROR: + errorf("zlib: %s", zwtr.msg); + return -1; +} diff --git a/sys/base/fs.c b/sys/base/fs.c new file mode 100644 index 0000000..6d5ee0f --- /dev/null +++ b/sys/base/fs.c @@ -0,0 +1,192 @@ +#include +#include +#include +#include + +/* + * path history + */ +struct Key +{ + ino_t ino; + dev_t dev; +}; + +#define hash(k) ((int32)k.ino ^ (int32)k.dev) +#define equal(k1, k2) (k1.ino == k2.ino && k1.dev == k2.dev) + +struct fs·History +{ + SET_STRUCT_BODY(struct Key); +}; + +static +int +morehistory(fs·History *h, int n) +{ + SET_GROW(h, struct Key, n, hash, sys·Memory, nil); +} + +static +int +addentry(fs·History *h, struct Key key, int *err) +{ + SET_PUT(h, key, hash, equal, morehistory, err); +} + +static +void +forget(fs·History *h) +{ + if (!h) + return; + + SET_RESET(h); +} + +static +void +delete(fs·History *h) +{ + SET_FREE(h, sys·Memory, nil); +} + +#undef hash +#undef equal + +static +char * +strcpyn(char *dst, char *src, int n) +{ + while(*src && n-- > 0) + *dst++ = *src++; + + *dst = 0; + return dst; +} + +/* + * main functions + */ + +int +fs·init(fs·Walker *fs, char *path) +{ + fs->base = fs->end = fs->path; + + if (!path || !path[0]) { + path = getcwd(fs->path, arrlen(fs->path)); + if (!path) + return 1; + fs->end += strlen(path); + } else + fs->end = strcpyn(fs->base, path, arrlen(fs->path)); + + if (fs->path[0] != '/') + fs->fd = AT_FDCWD; + + if (!fs->hist && !(fs->flags & fs·nolinks)) + fs->hist = calloc(1, sizeof(*fs->hist)); + + return 0; +} + +void +fs·fini(fs·Walker *fs) +{ + if (fs->hist) { + delete(fs->hist); + free(fs->hist); + } +} + +void +fs·walk(fs·Walker *fs) +{ + char *e, *b; + DIR *dir; + int new, fd, ofd, flags; + fs·History *h; + struct dirent *d; + io·Stat cwd; + struct fs·Entry *it; + + flags = 0; + if(fs->flags & fs·nolinks) + flags |= AT_SYMLINK_NOFOLLOW; + + /* get info for base relative to current fd */ + if(fstatat(fs->fd, fs->base, &cwd, flags) < 0){ + if(fs->flags & fs·verbose) + errorf("stat: %s", fs->path); + return; + } + + /* if we hit a file, finish! */ + if(!S_ISDIR(cwd.st_mode)) { + fs->func(fs->data, fs->base, fs->path, &cwd); + return; + } + + /* have we been here before? (cycle detection) */ + /* if not, add to our path history */ + if (!(fs->flags & fs·nolinks)) { + addentry(fs->hist, (struct Key){.dev=cwd.st_dev, .ino=cwd.st_ino}, &new); + if (!new) + return; + } + + /* + * operate on directory first if preorder traversal + * truncate recursion if callback returns an error code + */ + if (fs->flags & fs·preorder) { + if (fs->func(fs->data, fs->base, fs->path, &cwd)) + return; + } + + /* open directory */ + if(!fs->max || fs->lev + 1 < fs->max) { + fd = openat(fs->fd, fs->base, O_RDONLY | O_CLOEXEC | O_DIRECTORY); + if (fd < 0) + errorf("open %s:", fs->path); + + if (!(dir=fdopendir(fd))) { + if(fs->flags & fs·verbose) + errorf("fdopendir: %s", fs->path); + return; + } + + ofd = fs->fd, fs->fd = fd; + + /* traverse children */ + e = fs->end, b = fs->base; + if (fs->end[-1] != '/') + *fs->end++ = '/'; + + fs->base = fs->end; + while((d = readdir(dir))) { + if (*d->d_name == '.') + if (d->d_name[1] == 0 || /* . */ + (d->d_name[1] == '.' && d->d_name[2] == 0)) /* .. */ + continue; + + fs->end = strcpyn(fs->base, d->d_name, arrend(fs->path) - fs->base); + + fs->lev++; + fs·walk(fs); + fs->lev--; + } + *e = 0; + fs->fd = ofd; + fs->end = e, fs->base = b; + closedir(dir); + } + + /* operate on directory if postorder (default) traversal */ + if (!(fs->flags & fs·preorder)) + fs->func(fs->data, fs->base, fs->path, &cwd); + + if (!fs->lev) + forget(fs->hist); +} diff --git a/sys/base/genutf8.py b/sys/base/genutf8.py new file mode 100755 index 0000000..9ce9975 --- /dev/null +++ b/sys/base/genutf8.py @@ -0,0 +1,140 @@ +#!/bin/python + +preamble = """ +Rune* +rbsearch(Rune c, Rune *t, int n, int nelem) +{ + Rune *p; + int m; + + while (n > 1) { + m = n >> 1; + p = t + m*nelem; + if (c >= p[0]) { + t = p; + n -= m; + } else n = m; + } + + if (n && c >= t[0]) return t; + else return 0; +} +""" + +def findrange(s): + rs = [] + ss = [] + b, c = None, None + for elt in sorted(s): + if b is None: + b, c = elt, elt + continue + if elt == c + 1: + c = elt + continue + else: + if b == c: + ss.append(b) + else: + rs.append( (b, c) ) + b, c = elt, elt + return rs, ss + +def puttab(s, fd, name): + lname = name.lower() + rs, ss = findrange(s) + + rtabn = "%s_rtab"%lname + stabn = "%s_stab"%lname + + fd.write("static Rune %s[] = {" % rtabn) + for i, elts in enumerate(sorted(rs)): + fd.write("0x%04x, 0x%04x,\n" % (elts[0], elts[1])) + fd.write("};\n\n") + + if len(ss) > 0: + fd.write("static Rune %s[] = {" % stabn) + for i, elt in enumerate(sorted(ss)): + if i % 2 == 0: + fd.write("\n") + fd.write("0x%04x, "%(elt)) + fd.write("\n};\n\n") + + fd.write("int\n") + fd.write("utf8·%s(Rune c)\n"%name) + fd.write("{\n") + fd.write("\tRune *p;\n\n") + fd.write("\tp = rbsearch(c, %s, arrlen(%s)/2, 2);\n"%(rtabn,rtabn)) + fd.write("\tif (p && c >= p[0] && c <= p[1])\n") + fd.write("\t\t\treturn 1;\n") + if len(ss) > 0: + fd.write("\tp = rbsearch(c, %s, arrlen(%s), 1);\n"%(stabn,stabn)) + fd.write("\tif (p && c == p[0])\n") + fd.write("\t\treturn 1;\n") + fd.write("\n\treturn 0;\n") + fd.write("}\n\n") + +RUNEMAX = 0x10FFFF + +if __name__ == "__main__": + alphas = set() + lowers = set() + uppers = set() + titles = set() + digits = set() + spaces = set() + + tolower = {} + toupper = {} + totitle = {} + + with open("data/UnicodeData.txt") as utf8: + for runedata in utf8: + fields = runedata[:-1].split(";") + rune = int(fields[0], 16) + kind = fields[2] + + assert rune < RUNEMAX + + if kind[0] == 'Z': + spaces.add(rune) + elif kind[0] == 'L': + alphas.add(rune) + + if kind[1] == "l": + lowers.add(rune) + elif kind[1] == "u": + uppers.add(rune) + elif kind[1] == "t": + titles.add(rune) + + if (fields[12] != ""): + toupper[rune] = int(fields[12], 16) + + if (fields[13] != ""): + tolower[rune] = int(fields[13], 16) + + if (fields[14] != ""): + totitle[rune] = int(fields[14], 16) + + elif kind == 'Nd': + digits.add(rune) + + # Some annoying exceptions... + # These are counted as control characters + spaces.add(ord('\t')) + spaces.add(ord('\n')) + spaces.add(ord('\f')) + spaces.add(ord('\v')) + spaces.add(0x85) + spaces.add(0xfeff) + + # TODO: Add toLower/toUpper/toTitle... + with open("utf8_gen.c", "w") as utf8: + utf8.write(preamble) + puttab(spaces, utf8, "IsSpace") + puttab(lowers, utf8, "IsLower") + puttab(uppers, utf8, "IsUpper") + puttab(titles, utf8, "IsTitle") + puttab(alphas, utf8, "IsLetter") + puttab(digits, utf8, "IsDigit") diff --git a/sys/base/gz.c b/sys/base/gz.c new file mode 100644 index 0000000..d84397f --- /dev/null +++ b/sys/base/gz.c @@ -0,0 +1,108 @@ +#include +#include + +#include + +// ----------------------------------------------------------------------- +// interface implementations + +/* actual interfaces */ +io·Reader gz·Reader = (io·Reader){ gz·read }; +io·Peeker gz·Peeker = (io·Peeker){ gz·getbyte, gz·ungetbyte }; +io·Seeker gz·Seeker = (io·Seeker){ gz·seek, gz·tell }; +io·PeekReader gz·Peekreader = (io·PeekReader){ gz·read, gz·getbyte, gz·ungetbyte }; + +io·Writer gz·Writer = (io·Writer){ gz·write }; +io·Putter gz·Putter = (io·Putter){ gz·putbyte, gz·putstring }; +io·PutWriter gz·PutWriter = (io·PutWriter){ gz·write, gz·putbyte, gz·putstring }; + +io·ReadWriter gz·ReadWriter = (io·ReadWriter){ gz·read, gz·write }; + +// ----------------------------------------------------------------------- +// functions implementations + +gz·Stream* +gz·open(byte *path, byte *mode) +{ + return gzopen(path, mode); +} + +error +gz·close(gz·Stream* s) +{ + return gzclose(s); +} + +int +gz·read(gz·Stream *s, int sz, int n, void* buf) +{ + return gzread(s, buf, n*sz); +} + +int +gz·readln(gz·Stream *s, int n, byte *buf) +{ + byte* b; + b = gzgets(s, buf, n); + + return strlen(b); +} + +byte +gz·getbyte(gz·Stream *s) +{ + // NOTE: Can't call macro + byte b[2]; + gzread(s, b, 1); + + return b[0]; +} + +error +gz·ungetbyte(gz·Stream *s, byte c) +{ + return gzungetc(c, s); +} + +int +gz·write(gz·Stream *s, int sz, int n, void* buf) +{ + return gzwrite(s, buf, n*sz); +} + +error +gz·putbyte(gz·Stream *s, byte c) +{ + return gzputc(s, c); +} + +error +gz·putstring(gz·Stream *s, byte *str) +{ + return gzputs(s, str); +} + +int +gz·printf(gz·Stream *s, byte *fmt, ...) +{ + error err; + + va_list args; + va_start(args, fmt); + err = gzprintf(s, fmt, args); + va_end(args); + + return err; +} + +error +gz·flush(gz·Stream *s) +{ + return gzflush(s, Z_FINISH); +} + +int +gz·seek(gz·Stream *s, long off, enum SeekPos whence) +{ + return gzseek(s, off, whence); +} diff --git a/sys/base/io.c b/sys/base/io.c new file mode 100644 index 0000000..f34a10f --- /dev/null +++ b/sys/base/io.c @@ -0,0 +1,176 @@ +#include +#include + +// ----------------------------------------------------------------------- +// interface implementations + +/* casting functions */ +static +int +·read(void *rdr, int size, int n, void *buf) +{ + return io·read((io·Stream *)rdr, size, n, buf); +} + +static +byte +·get(void *rdr) +{ + return io·getbyte((io·Stream *)rdr); +} + +static +error +·unget(void *rdr, byte c) +{ + return io·ungetbyte((io·Stream *)rdr, c); +} + +static +int +·write(void *wtr, int sz, int n, void *buf) +{ + return io·write((io·Stream *)wtr, sz, n, buf); +} + +static +error +·put(void *wtr, byte c) +{ + return io·putbyte((io·Stream *)wtr, c); +} + +static +int +·puts(void *wtr, string s) +{ + return io·putstring((io·Stream *)wtr, s); +} + +static +int +·seek(void *skr, long off, enum SeekPos whence) +{ + return io·seek((io·Stream *)skr, off, whence); +} + +static +long +·tell(void *skr) +{ + return io·tell((io·Stream *)skr); +} + +/* actual interfaces */ +io·Reader sys·Reader = (io·Reader){ ·read }; +io·Seeker sys·Seeker = (io·Seeker){ ·seek, ·tell }; +io·Peeker sys·Peeker = (io·Peeker){ ·get, ·unget }; +io·SeekReader sys·SeekReader = (io·SeekReader){ ·seek, ·tell, ·read }; +io·PeekReader sys·PeekReader = (io·PeekReader){ ·read, ·get, ·unget }; + +io·Writer sys·Writer = (io·Writer){ ·write }; +io·Putter sys·Putter = (io·Putter){ ·put, ·puts }; +io·PutWriter sys·PutWriter = (io·PutWriter){ ·write, ·put, ·puts }; + +io·ReadWriter sys·ReadWriter = (io·ReadWriter){ ·read, ·write }; + +// ----------------------------------------------------------------------- +// open/close + +io·Stream* +io·open(byte *name, byte *mode) +{ + return fopen(name, mode); +} + +int +io·fd(io·Stream *s) +{ + return fileno(s); +} + +error +io·stat(io·Stream *s, io·Stat *buf) +{ + return fstat(fileno(s), buf); +} + +error +io·close(io·Stream *s) +{ + return fclose(s); +} + +// ----------------------------------------------------------------------- +// reading + +byte +io·getbyte(io·Stream *s) +{ + return fgetc(s); +} + +error +io·ungetbyte(io·Stream *s, byte c) +{ + return ungetc(c, s); +} + +int +io·read(io·Stream *s, int sz, int n, void *buf) +{ + return fread(buf, sz, n, s); +} + +int +io·readln(io·Stream *s, int n, byte* buf) +{ + byte* b; + b = fgets(buf, n+1, s); + if (b == nil) + return -1; + + return strlen(buf); +} + +// ----------------------------------------------------------------------- +// writing + +error +io·putbyte(io·Stream *s, byte c) +{ + return fputc(c, s); +} + +int +io·putstring(io·Stream *s, string str) +{ + return fputs(str, s); +} + +int +io·write(io·Stream *s, int sz, int n, void *buf) +{ + return fwrite(buf, sz, n, s); +} + +int +io·flush(io·Stream *s) +{ + return fflush(s); +} + +// ----------------------------------------------------------------------- +// seek + +int +io·seek(io·Stream *s, long off, enum SeekPos origin) +{ + return fseek(s, off, origin); +} + +long +io·tell(io·Stream *s) +{ + return ftell(s); +} diff --git a/sys/base/memory.c b/sys/base/memory.c new file mode 100644 index 0000000..a1a47c5 --- /dev/null +++ b/sys/base/memory.c @@ -0,0 +1,216 @@ +#include +#include + +static +void +·free(void *_, void *ptr) { + return free(ptr); +} + +static +void * +·alloc(void *_, uint n, ulong size) { + return malloc(n*size); +} + +static +void * +·calloc(void *_, uint n, ulong size) { + return calloc(n, size); +} + +static +void * +·realloc(void *_, void *ptr, uint n, ulong size) { + return realloc(ptr, n*size); +} + +mem·Allocator sys·Memory = { + .alloc = ·calloc, + .free = ·free +}; + + +// ------------------------------------------------------------------------- +// Dynamic buffer. + +/* Grow to particular size */ +void* +·bufgrow(void* buf, vlong newLen, vlong eltsize) +{ + assert(bufcap(buf) <= (SIZE_MAX - 1) / 2); + + vlong newCap = MAX(16, MAX(1 + 2 * bufcap(buf), newLen)); + + assert(newLen <= newCap); + assert(newCap <= (SIZE_MAX - offsetof(BufHdr, buf)) / eltsize); + + vlong newSize = offsetof(BufHdr, buf) + newCap * eltsize; + + BufHdr* newHdr; + if (buf) { + newHdr = bufhdr(buf); + newHdr = (BufHdr*)realloc((void*)newHdr, newSize); + } else { + newHdr = (BufHdr*)malloc(newSize); + newHdr->len = 0; + } + + newHdr->cap = newCap; + return (void*)newHdr->buf; +} + +/* Pop out a value */ +void +·bufdel(void *buf, int i, vlong eltsize) +{ + int n; + byte *b; + byte stk[1024]; + assert(eltsize < sizeof(stk)); + + b = (byte*)buf; + if(n = buflen(buf), i < n) { + memcpy(stk, b+eltsize*i, eltsize); + memcpy(b+eltsize*i, b+eltsize*(i+1), eltsize*(n-i-1)); + memcpy(b+eltsize*(n-1), stk, eltsize); + } + bufhdr(buf)->len--; +} + +// ------------------------------------------------------------------------- +// Arena allocator + +#define ARENA_ALIGN 8 +#define ARENA_BLOCK_SIZE 1024 * 1024 + +#define ALIGN_DOWN(n, a) ((n) & ~((a)-1)) +#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a)) +#define ALIGN_DOWN_PTR(p, a) ((void*)ALIGN_DOWN((uintptr)(p), (a))) +#define ALIGN_UP_PTR(p, a) ((void*)ALIGN_UP((uintptr)(p), (a))) + +struct Block +{ + struct Block *next; + byte buf[]; +}; + +struct mem·Arena +{ + void *heap; + mem·Allocator mem; + + byte *off; + byte *end; + struct Block *curr; + struct Block first; +}; + +mem·Arena* +mem·makearena(mem·Allocator from, void *impl) +{ + mem·Arena *a = from.alloc(impl, 1, sizeof(*a) + ARENA_BLOCK_SIZE); + a->mem = from; + a->heap = impl; + a->off = a->first.buf; + a->end = a->first.buf + ARENA_BLOCK_SIZE; + a->curr = &a->first; + a->first.next = nil; + + return a; +} + +static +void +grow(mem·Arena *a, vlong min) +{ + uintptr size; + struct Block *blk; + + size = ALIGN_UP(MAX(min, ARENA_BLOCK_SIZE), ARENA_ALIGN); + blk = a->mem.alloc(a->heap, 1, sizeof(*blk) + size); + a->off = blk->buf; + a->end = a->off + size; + + assert(a->curr->next == nil); + assert(a->off == ALIGN_DOWN_PTR(a->off, ARENA_ALIGN)); + + a->curr->next = blk; + a->curr = blk; +} + +void* +mem·arenaalloc(mem·Arena *a, uint n, ulong size) +{ + if(!n) { + return nil; + } + + void *ptr; + // TODO(nnoll): check for overflow + size = n * size; + + if (size > (ulong)(a->end - a->off)) { + grow(a, size); + assert(size <= (uintptr)(a->end - a->off)); + } + + ptr = a->off; + a->off = ALIGN_UP_PTR(a->off + size, ARENA_ALIGN); + + assert(a->off <= a->end); + assert(ptr == ALIGN_DOWN_PTR(ptr, ARENA_ALIGN)); + + return ptr; +} + +void +mem·freearena(mem·Arena *a) +{ + struct Block *it, *next; + + it = a->first.next; + while (it != nil) { + next = it->next; + a->mem.free(a->heap, it); + it = next; + } + + a->mem.free(a->heap, a); +} + +static +void* +·arenaalloc(void *heap, uint n, ulong size) +{ + return mem·arenaalloc(heap, n, size); +} + +static +void +·arenafree(void *heap, void *ptr) +{ + /* no-op */ +} + +mem·Allocator mem·ArenaAllocator = { + .alloc = ·arenaalloc, + .free = ·arenafree, +}; + +// ------------------------------------------------------------------------- +// Generalized memory helpers + +void +memset64(void *dst, uint64 val, uintptr size) +{ + intptr i; + + for (i = 0; i < (size & (~7)); i += 8) { + memcpy((byte*)dst + i, &val, 8); + } + + for (; i < size; i++) { + ((byte*)dst)[i] = ((byte*)&val)[i&7]; + } +} diff --git a/sys/base/mmap.c b/sys/base/mmap.c new file mode 100644 index 0000000..6edfe28 --- /dev/null +++ b/sys/base/mmap.c @@ -0,0 +1,43 @@ +#include +#include + +#include + +mmap·Reader +mmap·open(byte *filename) +{ + int fd; + int err; + void *buf; + io·Stream *s; + io·Stat st; + + s = io·open(filename, "r"); + fd = io·fd(s); + err = io·stat(s, &st); + if (err) { + errorf("file stat: error code %d", err); + goto ERROR; + } + + buf = mmap(nil, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (!buf) { + errorf("mmap: failed"); + goto ERROR; + } + // NOTE: posix systems require that reference kept to mmap file after fd is closed + io·close(s); + return (mmap·Reader){.len=st.st_size, .buf=buf}; + +ERROR: + io·close(s); + return (mmap·Reader){ 0 }; +} + +error +mmap·close(mmap·Reader rdr) +{ + munmap(rdr.buf, rdr.len); + + return 0; +} diff --git a/sys/base/os.c b/sys/base/os.c new file mode 100644 index 0000000..d5277b5 --- /dev/null +++ b/sys/base/os.c @@ -0,0 +1,30 @@ +#include +#include + +int +os·exists(byte *path, int flag) +{ + return access(path, flag) == 0; +} + +int +os·sep(void) +{ +#if defined(UNIX) || defined(__linux__) + return '/'; +#elif defined(WIN32) + return '\\'; +#else + panicf("unrecognized operating system"); + return '\0'; +#endif +} + +byte* +os·basename(byte *path) +{ + byte *sep; + + sep = utf8·findrrune(path, os·sep()); + return (sep == nil) ? path : sep+1; +} diff --git a/sys/base/random.c b/sys/base/random.c new file mode 100644 index 0000000..16a8737 --- /dev/null +++ b/sys/base/random.c @@ -0,0 +1,303 @@ +#include +#include + +// ---------------------------------------------------------------------------- +// Internal structure + +uint64 +rol64(uint64 x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +typedef struct Rng { + uint64 s[4]; +} Rng; + +uint64 +xoshiro256ss(Rng *state) +{ + uint64 *s = state->s; + uint64 result = rol64(s[1] * 5, 7) * 9; + uint64 t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + s[3] = rol64(s[3], 45); + + return result; +} + +typedef struct Mix +{ + uint64 s; +} Mix; + +uint64 +splitmix64(struct Mix *state) { + uint64 result = state->s; + + state->s = result + 0x9E3779B97f4A7C15; + result = (result ^ (result >> 30)) * 0xBF58476D1CE4E5B9; + result = (result ^ (result >> 27)) * 0x94D049BB133111EB; + return result ^ (result >> 31); +} + +static Rng RNG; + +// ---------------------------------------------------------------------------- +// Exported functions + +/* Initializes the global RNG */ + +error +rng·init(uint64 seed) +{ + Mix smstate = {seed}; + + for (int i=0; i < 4; i++) + RNG.s[i] = splitmix64(&smstate); + + return 0; +} + +/* Returns a random float64 between 0 and 1 */ +double +rng·random(void) +{ + uint64 r = xoshiro256ss(&RNG); + return (double)r / (double)UINT64_MAX; +} + +double +rng·exponential(double lambda) +{ + double f; + + f = rng·random(); + return -log(1 - f)/lambda; +} + +static inline +double +erfinv(double x) +{ + /* useful constants */ + static double + a0 = 1.1975323115670912564578e0, a1 = 4.7072688112383978012285e1, + a2 = 6.9706266534389598238465e2, a3 = 4.8548868893843886794648e3, + a4 = 1.6235862515167575384252e4, a5 = 2.3782041382114385731252e4, + a6 = 1.1819493347062294404278e4, a7 = 8.8709406962545514830200e2, + + b0 = 1.0000000000000000000e0, b1 = 4.2313330701600911252e1, + b2 = 6.8718700749205790830e2, b3 = 5.3941960214247511077e3, + b4 = 2.1213794301586595867e4, b5 = 3.9307895800092710610e4, + b6 = 2.8729085735721942674e4, b7 = 5.2264952788528545610e3, + + c0 = 1.42343711074968357734e0, c1 = 4.63033784615654529590e0, + c2 = 5.76949722146069140550e0, c3 = 3.64784832476320460504e0, + c4 = 1.27045825245236838258e0, c5 = 2.41780725177450611770e-1, + c6 = 2.27238449892691845833e-2, c7 = 7.74545014278341407640e-4, + + d0 = 1.4142135623730950488016887e0, d1 = 2.9036514445419946173133295e0, + d2 = 2.3707661626024532365971225e0, d3 = 9.7547832001787427186894837e-1, + d4 = 2.0945065210512749128288442e-1, d5 = 2.1494160384252876777097297e-2, + d6 = 7.7441459065157709165577218e-4, d7 = 1.4859850019840355905497876e-9, + + e0 = 6.65790464350110377720e0, e1 = 5.46378491116411436990e0, + e2 = 1.78482653991729133580e0, e3 = 2.96560571828504891230e-1, + e4 = 2.65321895265761230930e-2, e5 = 1.24266094738807843860e-3, + e6 = 2.71155556874348757815e-5, e7 = 2.01033439929228813265e-7, + + f0 = 1.414213562373095048801689e0, f1 = 8.482908416595164588112026e-1, + f2 = 1.936480946950659106176712e-1, f3 = 2.103693768272068968719679e-2, + f4 = 1.112800997078859844711555e-3, f5 = 2.611088405080593625138020e-5, + f6 = 2.010321207683943062279931e-7, f7 = 2.891024605872965461538222e-15, + + Ln2 = 0.693147180559945309417232121458176568075500134360255254120680009; + + int s; + double r, z1, z2; + + if(x < 0) { + s = -1; + x = -x; + } else { + s = +1; + } + + if(x <= 0.85) { + r = 0.180625 - 0.25*x*x; + z1 = ((((((a7*r+a6)*r+a5)*r+a4)*r+a3)*r+a2)*r+a1)*r + a0; + z2 = ((((((b7*r+b6)*r+b5)*r+b4)*r+b3)*r+b2)*r+b1)*r + b0; + return s*(x*z1) / z2; + } + r = sqrt(Ln2 - log(1.0-x)); + if(r <= 5.0) { + r -= 1.6; + z1 = ((((((c7*r+c6)*r+c5)*r+c4)*r+c3)*r+c2)*r+c1)*r + c0; + z2 = ((((((d7*r+d6)*r+d5)*r+d4)*r+d3)*r+d2)*r+d1)*r + d0; + } else { + r -= 5.0; + z1 = ((((((e7*r+e6)*r+e5)*r+e4)*r+e3)*r+e2)*r+e1)*r + e0; + z2 = ((((((f7*r+f6)*r+f5)*r+f4)*r+f3)*r+f2)*r+f1)*r + f0; + } + + return s*z1/z2; +} + + +double +rng·normal(void) +{ + double f; + f = rng·random(); + + return sqrt(2)*erfinv(2*f-1); +} + +/* Returns true or false on success of trial */ +bool +rng·bernoulli(double f) +{ + return rng·random() < f; +} + +/* Returns a random integer between 0 and max + * TODO: Modulo throws off uniformity + */ +uint64 +rng·randi(int max) +{ + uint64 r = xoshiro256ss(&RNG); + return r % max; +} + +/* + * Ahrens, J. H., & Dieter, U. (1982). + * Computer Generation of Poisson Deviates from Modified Normal Distributions. + */ +static double factorial[10] = {1., 1., 2., 6., 24., 120., 720., 5040., 40320., 362880.}; +static double coeffs[9] = { + -.500000000, +.333333333, -.249999856, + +.200011780, -.166684875, +.142187833, + -.124196313, +.125005956, -.114265030, +}; + +static inline +double +log1pmx(double x, double off) +{ + int i; + double r, t; + + if(-0.25 < x && x < 0.25) { + r = 0; + t = 1; + for(i=0;i=L) + return K; +stepS: + U = rng·random(); + if(d*U >= (mu-K)*(mu-K)*(mu-K)) + return K; +stepP: + if(G < 0) + goto stepE; +stepQ: + c = procf(mu, s, K, &px, &py, &fx, &fy); +stepE: + E = rng·exponential(1.0); + U = rng·random(); + U = U + U - 1; + T = 1.8 + copysign(E,U); + if(T < 0.6744) + goto stepE; + K = floor(mu + s*T); + c = procf(mu, s, K, &px, &py, &fx, &fy); +stepH: + if(c*fabs(U) > (py*exp(px + E) - fy*exp(fx + E))) + goto stepE; + return K; +} + +uint64 +rng·poisson(double mean) +{ + int64 n; + double z; + + if(mean<10.0) { + for(n=0, z=rng·exponential(1.0); z +#include +#include + +// ----------------------------------------------------------------------- +// ints + +void +sort·ints(uintptr sz, int arr[]) +{ + int tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·int8s(uintptr sz, int8 arr[]) +{ + int8 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·int16s(uintptr sz, int16 arr[]) +{ + int16 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·int32s(uintptr sz, int32 arr[]) +{ + int32 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·int64s(uintptr sz, int64 arr[]) +{ + int64 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·uints(uintptr sz, uint arr[]) +{ + uint tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·uint8s(uintptr sz, uint8 arr[]) +{ + uint8 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·uint16s(uintptr sz, uint16 arr[]) +{ + uint16 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·uint32s(uintptr sz, uint32 arr[]) +{ + uint32 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·uint64s(uintptr sz, uint64 arr[]) +{ + uint64 tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +// ----------------------------------------------------------------------- +// floats + +void +sort·floats(uintptr sz, float arr[]) +{ + float tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +void +sort·doubles(uintptr sz, double arr[]) +{ + double tmp; +#define LESS(i, j) (arr[i] < arr[j]) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} + +// ----------------------------------------------------------------------- +// strings + +void +sort·strings(uintptr sz, byte* arr[]) +{ + byte *tmp; +#define LESS(i, j) (strcmp(arr[i], arr[j]) < 0) +#define SWAP(i, j) (tmp = arr[i], arr[i] = arr[j], arr[j] = tmp) + QSORT(sz, LESS, SWAP); +#undef SWAP +#undef LESS +} diff --git a/sys/base/string.c b/sys/base/string.c new file mode 100644 index 0000000..8973a4e --- /dev/null +++ b/sys/base/string.c @@ -0,0 +1,560 @@ +#include +#include + +#define MAX_STRING_ALLOC 1024 * 1024 + +typedef struct Hdr +{ + vlong len; + vlong cap; + byte buf[]; +} Hdr; + +// ------------------------------------------------------------------------- +// UTF-8 functions + +#define Bit(i) (7-(i)) +/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */ +#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) +/* 0000 0000 0000 0111 1111 1111 */ +#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) + +enum +{ + Bitx = Bit(1), + Tx = Tbyte(1), + Rune1 = (1 << (Bit(0)+0*Bitx)) - 1, + + Maskx = (1 << Bitx) - 1, /* 0011 1111 */ + Testx = Maskx ^ 0xff, /* 1100 0000 */ + + SurrogateMin = 0xD800, + SurrogateMax = 0xDFFF, + Bad = RuneErr, +}; + +int +utf8·bytetorune(rune* r, byte* s) +{ + int c[UTFmax], i; + rune l; + + c[0] = *(ubyte*)(s); + if(c[0] < Tx) { + *r = c[0]; + return 1; + } + + l = c[0]; + for(i = 1; i < UTFmax; i++) { + c[i] = *(ubyte*)(s+i); + c[i] ^= Tx; + if (c[i] & Testx) goto bad; + + l = (l << Bitx) | c[i]; + if(c[0] < Tbyte(i + 2)) { + l &= RuneX(i + 1); + if (i == 1) { + if (c[0] < Tbyte(2) || l <= Rune1) + goto bad; + } else if (l <= RuneX(i) || l > RuneMax) + goto bad; + if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) + goto bad; + + *r = l; + return i + 1; + } + } +bad: + *r = RuneErr; + return 1; +} + +int +utf8·runetobyte(byte* s, rune* r) +{ + int i, j; + rune c; + + c = *r; + if (c <= Rune1) { + s[0] = c; + return 1; + } + + for (i = 2; i < UTFmax + 1; i++){ + if (i == 3){ + if (c > RuneMax) + c = RuneErr; + if (SurrogateMin <= c && c <= SurrogateMax) + c = RuneErr; + } + if (c <= RuneX(i) || i == UTFmax) { + s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); + for(j = 1; j < i; j++) + s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); + return i; + } + } + + return UTFmax; +} + +int +utf8·runelen(rune r) +{ + byte s[10]; + return utf8·runetobyte(s, &r); +} + +int +utf8·fullrune(byte* s, int n) +{ + int i; + rune c; + + if (n <= 0) return 0; + c = *(ubyte*) s; + if (c < Tx) return 1; + + for (i = 3; i < UTFmax + 1; i++) { + if (c < Tbyte(i)) return n >= i - 1; + } + + return n >= UTFmax; +} + +byte* +utf8·findrune(byte* s, long c) +{ + long c1; + rune r; + int n; + + if (c < RuneSync) return strchr(s, c); + + for (;;) { + c1 = *(ubyte*)s; + if (c1 < RuneSelf) { + if (c1 == 0) return nil; + if (c1 == c) return s; + s++; + continue; + } + n = utf8·bytetorune(&r, s); + if (r == c) return s; + s += n; + } + + return nil; +} + +byte* +utf8·findrrune(byte* s, long c) +{ + long c1; + rune r; + byte *l; + + if (c < RuneSync) + return strrchr(s, c); + + l = nil; + for (;;) { + c1 = *(ubyte*)s; + if (c1 < RuneSelf) { + if (c1 == 0) return l; + if (c1 == c) l = s; + s++; + continue; + } + c1 = utf8·bytetorune(&r, s); + if (r == c) + l = s; + s += c1; + } + + return nil; +} + +#undef Bit +#undef Tbyte +#undef RuneX + +#include ".generated/utf8.c" + +// ------------------------------------------------------------------------- +// Dynamic string functions + +// New returns a new dynamic string object, initialized from the given C string. +// len defines the length of the C substring that we will copy into our buffer. +// The backing buffer will have capacity cap. +string +str·makecap(const byte *s, vlong len, vlong cap) +{ + struct Hdr* h; + + h = malloc(sizeof(*h) + cap + 1); + if (s == nil) memset(h, 0, sizeof(*h)); + + if (h == nil) return nil; // Allocation failed. + + h->len = (s == nil) ? 0 : len; + h->cap = cap; + + if (cap < h->len) goto cleanup; + + if (s != nil && cap > 0) { + memcpy(h->buf, s, h->len); + memset(h->buf + h->len, '\0', h->cap - h->len + 1); + } + + return h->buf; + +cleanup: + free(h); + panicf("Attempted to create a string with less capacity than length"); + return nil; +} + +// New returns a new dynamic string object, initialized from the given C string. +// The backing buffer capacity is equivalent to the string length. +string +str·makelen(const byte *s, vlong len) +{ + vlong sl = (!s) ? 0 : strlen(s); + if (sl < len) panicf("attempted to take a bigger substring than string length"); + + vlong cap = (len == 0) ? 1 : len; + return str·makecap(s, len, cap); +} + +// New returns a new dynamic string object, initialized from the given C string. +// The backing buffer capacity is equivalent to the string length. +string +str·make(const byte *s) +{ + vlong len = (!s) ? 0 : strlen(s); + return str·makelen(s, len); +} + +// Newf returns a new dynamic string object +string +str·makef(const byte *fmt, ...) +{ + vlong n; + string s; + va_list args; + + va_start(args, fmt); + n = vsnprintf(nil, 0, fmt, args); + va_end(args); + + s = str·makecap(nil, 0, n); + + va_start(args, fmt); + vsnprintf(s, n + 1, fmt, args); + va_end(args); + + Hdr* h = (Hdr*)(s - sizeof(Hdr)); + h->len = n; + + return s; +} + +// Free returns memory associated to the buffer. +void +str·free(string s) +{ + free(s - sizeof(Hdr)); +} + +// Len returns the length of the string. +int +str·len(const string s) +{ + Hdr* h = (Hdr*)(s - sizeof(Hdr)); + return h->len; +} + +// Cap returns the capacity of the string buffer. +int +str·cap(const string s) +{ + Hdr* h = (Hdr*)(s - sizeof(Hdr)); + return h->cap; +} + +void +str·clear(string *s) +{ + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); + h->len = 0; + *s[0] = '\0'; +} + +// Grow ensures that the string can encompass AT LEAST delta bytes. +// If it already can, this is a NO OP. +// If it can't, the string will be reallocated. +void +str·grow(string *s, vlong delta) +{ + Hdr *h, *newh; + vlong cap = str·cap(*s); + vlong len = str·len(*s); + assert(cap >= len); // To prevent unsigned behavior + + if (cap - len >= delta) return; + + h = (Hdr*)(*s - sizeof(Hdr)); + + vlong newCap = cap + delta; + assert(newCap >= cap); // To prevent unsigned behavior + if (newCap < MAX_STRING_ALLOC) { + newCap *= 2; + } else + newCap += MAX_STRING_ALLOC; + + newh = (Hdr*)realloc(h, sizeof(*h) + newCap + 1); + if (newh == nil) return; + + memset(newh->buf + len, '\0', newCap - len); + newh->cap = newCap; + newh->len = len; + + *s = newh->buf; +} + +// Fit reallocates the string such that the buffer is exactly sized for the +// buffer. If the capacity equals the length, then the function is a NOOP. The +// byte array is unchanged. +void +str·fit(string *s) +{ + Hdr* h; + vlong cap = str·cap(*s); + vlong len = str·len(*s); + + if (cap == len) return; + + h = (Hdr*)(s - sizeof(Hdr)); + h = realloc(h, sizeof(*h) + len + 1); + h->cap = len; + + *s = h->buf; +} + +// Append will append the given null terminated C string to the string data +// structure. This variant can append a substring of length len of the given +// string to our buffer. The result is reallocated if not enough room is present +// in the buffer. +int +str·appendlen(string *s, vlong n, const byte* b) +{ + /* + bl = strlen(b); + if (n > bl) panicf("attempted to make a substring longer than string"); + */ + + str·grow(s, n); + if (*s == nil) return 0; + + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); + + memcpy(*s + str·len(*s), b, n); + h->len += n; + (*s)[h->len] = '\0'; + + return n; +} + +// Append will append the given null terminated C string to the string data +// structure. This variant will append the entire string. +int +str·append(string *s, const byte* b) +{ + return str·appendlen(s, strlen(b), b); +} + +// AppendByte will append the given byte to our string. +// NOTE: As the byte is on the stack, it is not null-terminated. +// Can not pass to the above functions. +int +str·appendbyte(string *s, const byte b) +{ + str·grow(s, 1); + if (*s == nil) return 0; + + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); + + *(*s + str·len(*s)) = b; + h->len++; + (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? + + return 1; +} + +/* + * Appendf will append the given formatted string to our buffer. + * Returns the newly minted string + */ + +int +str·appendf(string *s, const byte* fmt, ...) +{ + va_list args; + va_start(args, fmt); + int remain = str·cap(*s) - str·len(*s); + int n = vsnprintf(*s + str·len(*s), remain + 1, fmt, args); + va_end(args); + + if (n > remain) { + // If the first write was incomplete, we overwite the data again. + str·grow(s, n); + va_list args; + va_start(args, fmt); + n = vsnprintf(*s + str·len(*s), n + 1, fmt, args); + assert(n - remain <= str·cap(*s)); + va_end(args); + } + + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); + h->len += n; + + return n; +} + +// Equals returns true if string s and t are equivalent. +bool +str·equals(const string s, const string t) +{ + vlong sL = str·len(s); + vlong tL = str·len(t); + if (sL != tL) return false; + + return memcmp(s, t, sL) == 0; +} + +//------------------------------------------------------------------------ +// Utility Methods + +int +str·read(string s, int size, int n, void *buf) +{ + int len; + + len = MIN(n * size, str·len(s)); + memcpy(buf, s, len); + + return len; +} + +// Find will find the first occurence of +// substr in the string Returns -1 if nothing was found. +int +str·find(string s, const byte* substr) +{ + byte* loc = strstr(s, substr); + if (loc == nil) return -1; + return (int)(loc - s); +} + +// +// Lower will force all runes in the string to be lowercase. +void +str·lower(string s) +{ + byte *b, *e; + b = s; + e = b + str·len(s); + while (b++ != e) + *b = tolower(*b); +} + +// Upper will force all runes in the string to be uppercase. +void +str·upper(string s) +{ + byte *b, *e; + b = s; + e = b + str·len(s); + while (b++ != e) + *b = toupper(*b); +} + +// Replace will replace all occurences of the given bytes 'from' to bytes 'to' +// Edits are done in place and modify the string. +// NOTE: As of now strings from and to must be the same size. +void +str·replace(string s, const byte* from, const byte* to) +{ + vlong fromL = strlen(from); + vlong toL = strlen(to); + if (toL != fromL) { panicf("different sized replacement string not supported"); } + + vlong l = str·len(s); + vlong i = l; + vlong j = l; + + for (i = 0; i < l; i++) { + for (j = 0; j < toL; j++) { + if (s[i] == from[j]) { + s[i] = to[j]; + break; + } + } + } +} + +// Split will split the string by the given token. +// Returns a stretchy buffer of strings that result from the partition. +// It is the caller's responsibility to clean the memory. +string* +str·split(string s, const byte* tok) +{ + string* fields = nil; + vlong start = 0; + + vlong sL = str·len(s); + vlong tokL = strlen(tok); + if (sL == 0 || tokL == 0) return nil; + + buffit(fields, 5); + + for (vlong i = 0; i < sL - tokL; i++) { + if ((tokL == 1 && s[i] == tokL) || !memcmp(s + i, tok, tokL)) { + bufpush(fields, str·makelen(s + start, i - start)); + if (fields[buflen(fields) - 1] == nil) goto cleanup; + + start = i + tokL; + i += tokL - 1; + } + } + + bufpush(fields, str·makelen(s + start, sL - start)); + + return fields; + +cleanup: + for (vlong i = 0; i < buflen(fields); i++) { + str·free(fields[i]); + } + buffree(fields); + return nil; +} + +string +str·join(vlong len, byte** fields, const byte* sep) +{ + string s = str·makecap("", 0, 10); + int j = 0; + + for (j = 0; j < len; j++) { + str·append(&s, fields[j]); + if (j < len - 1) + str·appendlen(&s, 1, sep); + } + + return s; +} diff --git a/sys/base/test.c b/sys/base/test.c new file mode 100644 index 0000000..972f25a --- /dev/null +++ b/sys/base/test.c @@ -0,0 +1,170 @@ +#include +#include +#include + +#include + +uintptr +printtest(Coro *c, uintptr d) +{ + printf("--> Recieved %lu\n", d); + d = coro·yield(c, d+10); + printf("--> Now %lu\n", d); + + return d; +} + +uintptr +sequence(Coro *c, uintptr start) +{ + int d = start; + for (;;) { + coro·yield(c, d++); + } + + return d; +} + +struct PrimeMsg +{ + Coro *seq; + int p; +}; + +uintptr +filter(Coro *c, uintptr data) +{ + int x, p; + Coro *seq; + struct PrimeMsg *msg; + + // Need to copy relevant variables onto the local stack + // Data is volatile. + msg = (struct PrimeMsg*)data; + seq = msg->seq; + p = msg->p; + + for (;;) { + x = coro·yield(seq, x); + if (x % p != 0) { + x = coro·yield(c, x); + } + } + + return 0; +} + +error +test·coro() +{ + int i; + Coro *c[4]; + uintptr d; + + printf("Starting singleton test\n"); + + for (i = 0; i < arrlen(c); i++) { + c[i] = coro·make(0, &printtest); + } + + /* Singleton test */ + d = 0; + for (i = 0; i < 10; i++) { + d = coro·yield(c[0], d); + } + + printf("Starting triplet test\n"); + + /* Triplet test */ + for (i = 0; i < 10; i++) { + d = coro·yield(c[1], d); + d = coro·yield(c[2], d+100); + d = coro·yield(c[3], d+200); + } + + for (i = 0; i < arrlen(c); i++) { + coro·free(c[i]); + } + + /* Prime sieve */ + printf("Starting prime test\n"); + uintptr num; + Coro *cur, *seq[50]; + + num = 2; + seq[0] = coro·make(4096, &sequence); + cur = *seq; + + num = coro·yield(cur, num); + for (i = 1; i < arrlen(seq); i++) { + seq[i] = coro·make(4096, &filter); + struct PrimeMsg msg = { + .seq = cur, + .p = num, + }; + cur = seq[i]; + num = coro·yield(cur, (uintptr)&msg); + printf("--> prime number %lu\n", num); + } + return 0; +} + +int +less(void* a, void* b) +{ + int ai, bi; + ai = *(int*)a; + bi = *(int*)b; + + return ai - bi; +} + +error +test·sort() +{ + clock_t t; + int i, test[10000]; + for (i = 0; i < arrlen(test); i++) { + test[i] = rand(); + } + + t = clock(); + sort·ints(arrlen(test), test); + t = clock() - t; + printf("inlined code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); + + for (i = 0; i < arrlen(test); i++) { + test[i] = rand(); + } + + t = clock(); + qsort(test, arrlen(test), sizeof(int), (int (*)(const void *, const void *))less); + t = clock() - t; + printf("std qsort code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); + + /* + for (i = 1; i < arrlen(test); i++) { + if (test[i] >= test[i-1]) { + printf("%d is less that %d\n", test[i], test[i-1]); + } else { + printf("ERROR: %d is NOT less that %d\n", test[i], test[i-1]); + } + } + */ + + return 0; +} + +error +main() +{ + error err; +#if 0 + if (err = test·coro(), err) { + errorf("test fail: coroutine"); + } +#endif + if (err = test·sort(), err) { + errorf("test fail: coroutine"); + } +} -- cgit v1.2.1