aboutsummaryrefslogtreecommitdiff
path: root/sys/base/string.c
diff options
context:
space:
mode:
authorNicholas Noll <nnoll523@gmail.com>2021-10-23 11:17:25 -0700
committerNicholas Noll <nnoll523@gmail.com>2021-10-26 11:11:57 -0700
commitc8e1e71eb526475dd431443345262c2e5a627831 (patch)
treeea9f7bcbba18a13f7ba8b32fcb1433ac2f4dd8b4 /sys/base/string.c
parent40f4c7305a041d4214df117491593898d04d0134 (diff)
chore(rename): libn -> base
Diffstat (limited to 'sys/base/string.c')
-rw-r--r--sys/base/string.c560
1 files changed, 560 insertions, 0 deletions
diff --git a/sys/base/string.c b/sys/base/string.c
new file mode 100644
index 0000000..8973a4e
--- /dev/null
+++ b/sys/base/string.c
@@ -0,0 +1,560 @@
+#include <u.h>
+#include <base.h>
+
+#define MAX_STRING_ALLOC 1024 * 1024
+
+typedef struct Hdr
+{
+ vlong len;
+ vlong cap;
+ byte buf[];
+} Hdr;
+
+// -------------------------------------------------------------------------
+// UTF-8 functions
+
+#define Bit(i) (7-(i))
+/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */
+#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF)
+/* 0000 0000 0000 0111 1111 1111 */
+#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1)
+
+enum
+{
+ Bitx = Bit(1),
+ Tx = Tbyte(1),
+ Rune1 = (1 << (Bit(0)+0*Bitx)) - 1,
+
+ Maskx = (1 << Bitx) - 1, /* 0011 1111 */
+ Testx = Maskx ^ 0xff, /* 1100 0000 */
+
+ SurrogateMin = 0xD800,
+ SurrogateMax = 0xDFFF,
+ Bad = RuneErr,
+};
+
+int
+utf8·bytetorune(rune* r, byte* s)
+{
+ int c[UTFmax], i;
+ rune l;
+
+ c[0] = *(ubyte*)(s);
+ if(c[0] < Tx) {
+ *r = c[0];
+ return 1;
+ }
+
+ l = c[0];
+ for(i = 1; i < UTFmax; i++) {
+ c[i] = *(ubyte*)(s+i);
+ c[i] ^= Tx;
+ if (c[i] & Testx) goto bad;
+
+ l = (l << Bitx) | c[i];
+ if(c[0] < Tbyte(i + 2)) {
+ l &= RuneX(i + 1);
+ if (i == 1) {
+ if (c[0] < Tbyte(2) || l <= Rune1)
+ goto bad;
+ } else if (l <= RuneX(i) || l > RuneMax)
+ goto bad;
+ if (i == 2 && SurrogateMin <= l && l <= SurrogateMax)
+ goto bad;
+
+ *r = l;
+ return i + 1;
+ }
+ }
+bad:
+ *r = RuneErr;
+ return 1;
+}
+
+int
+utf8·runetobyte(byte* s, rune* r)
+{
+ int i, j;
+ rune c;
+
+ c = *r;
+ if (c <= Rune1) {
+ s[0] = c;
+ return 1;
+ }
+
+ for (i = 2; i < UTFmax + 1; i++){
+ if (i == 3){
+ if (c > RuneMax)
+ c = RuneErr;
+ if (SurrogateMin <= c && c <= SurrogateMax)
+ c = RuneErr;
+ }
+ if (c <= RuneX(i) || i == UTFmax) {
+ s[0] = Tbyte(i) | (c >> (i - 1)*Bitx);
+ for(j = 1; j < i; j++)
+ s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx);
+ return i;
+ }
+ }
+
+ return UTFmax;
+}
+
+int
+utf8·runelen(rune r)
+{
+ byte s[10];
+ return utf8·runetobyte(s, &r);
+}
+
+int
+utf8·fullrune(byte* s, int n)
+{
+ int i;
+ rune c;
+
+ if (n <= 0) return 0;
+ c = *(ubyte*) s;
+ if (c < Tx) return 1;
+
+ for (i = 3; i < UTFmax + 1; i++) {
+ if (c < Tbyte(i)) return n >= i - 1;
+ }
+
+ return n >= UTFmax;
+}
+
+byte*
+utf8·findrune(byte* s, long c)
+{
+ long c1;
+ rune r;
+ int n;
+
+ if (c < RuneSync) return strchr(s, c);
+
+ for (;;) {
+ c1 = *(ubyte*)s;
+ if (c1 < RuneSelf) {
+ if (c1 == 0) return nil;
+ if (c1 == c) return s;
+ s++;
+ continue;
+ }
+ n = utf8·bytetorune(&r, s);
+ if (r == c) return s;
+ s += n;
+ }
+
+ return nil;
+}
+
+byte*
+utf8·findrrune(byte* s, long c)
+{
+ long c1;
+ rune r;
+ byte *l;
+
+ if (c < RuneSync)
+ return strrchr(s, c);
+
+ l = nil;
+ for (;;) {
+ c1 = *(ubyte*)s;
+ if (c1 < RuneSelf) {
+ if (c1 == 0) return l;
+ if (c1 == c) l = s;
+ s++;
+ continue;
+ }
+ c1 = utf8·bytetorune(&r, s);
+ if (r == c)
+ l = s;
+ s += c1;
+ }
+
+ return nil;
+}
+
+#undef Bit
+#undef Tbyte
+#undef RuneX
+
+#include ".generated/utf8.c"
+
+// -------------------------------------------------------------------------
+// Dynamic string functions
+
+// New returns a new dynamic string object, initialized from the given C string.
+// len defines the length of the C substring that we will copy into our buffer.
+// The backing buffer will have capacity cap.
+string
+str·makecap(const byte *s, vlong len, vlong cap)
+{
+ struct Hdr* h;
+
+ h = malloc(sizeof(*h) + cap + 1);
+ if (s == nil) memset(h, 0, sizeof(*h));
+
+ if (h == nil) return nil; // Allocation failed.
+
+ h->len = (s == nil) ? 0 : len;
+ h->cap = cap;
+
+ if (cap < h->len) goto cleanup;
+
+ if (s != nil && cap > 0) {
+ memcpy(h->buf, s, h->len);
+ memset(h->buf + h->len, '\0', h->cap - h->len + 1);
+ }
+
+ return h->buf;
+
+cleanup:
+ free(h);
+ panicf("Attempted to create a string with less capacity than length");
+ return nil;
+}
+
+// New returns a new dynamic string object, initialized from the given C string.
+// The backing buffer capacity is equivalent to the string length.
+string
+str·makelen(const byte *s, vlong len)
+{
+ vlong sl = (!s) ? 0 : strlen(s);
+ if (sl < len) panicf("attempted to take a bigger substring than string length");
+
+ vlong cap = (len == 0) ? 1 : len;
+ return str·makecap(s, len, cap);
+}
+
+// New returns a new dynamic string object, initialized from the given C string.
+// The backing buffer capacity is equivalent to the string length.
+string
+str·make(const byte *s)
+{
+ vlong len = (!s) ? 0 : strlen(s);
+ return str·makelen(s, len);
+}
+
+// Newf returns a new dynamic string object
+string
+str·makef(const byte *fmt, ...)
+{
+ vlong n;
+ string s;
+ va_list args;
+
+ va_start(args, fmt);
+ n = vsnprintf(nil, 0, fmt, args);
+ va_end(args);
+
+ s = str·makecap(nil, 0, n);
+
+ va_start(args, fmt);
+ vsnprintf(s, n + 1, fmt, args);
+ va_end(args);
+
+ Hdr* h = (Hdr*)(s - sizeof(Hdr));
+ h->len = n;
+
+ return s;
+}
+
+// Free returns memory associated to the buffer.
+void
+str·free(string s)
+{
+ free(s - sizeof(Hdr));
+}
+
+// Len returns the length of the string.
+int
+str·len(const string s)
+{
+ Hdr* h = (Hdr*)(s - sizeof(Hdr));
+ return h->len;
+}
+
+// Cap returns the capacity of the string buffer.
+int
+str·cap(const string s)
+{
+ Hdr* h = (Hdr*)(s - sizeof(Hdr));
+ return h->cap;
+}
+
+void
+str·clear(string *s)
+{
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
+ h->len = 0;
+ *s[0] = '\0';
+}
+
+// Grow ensures that the string can encompass AT LEAST delta bytes.
+// If it already can, this is a NO OP.
+// If it can't, the string will be reallocated.
+void
+str·grow(string *s, vlong delta)
+{
+ Hdr *h, *newh;
+ vlong cap = str·cap(*s);
+ vlong len = str·len(*s);
+ assert(cap >= len); // To prevent unsigned behavior
+
+ if (cap - len >= delta) return;
+
+ h = (Hdr*)(*s - sizeof(Hdr));
+
+ vlong newCap = cap + delta;
+ assert(newCap >= cap); // To prevent unsigned behavior
+ if (newCap < MAX_STRING_ALLOC) {
+ newCap *= 2;
+ } else
+ newCap += MAX_STRING_ALLOC;
+
+ newh = (Hdr*)realloc(h, sizeof(*h) + newCap + 1);
+ if (newh == nil) return;
+
+ memset(newh->buf + len, '\0', newCap - len);
+ newh->cap = newCap;
+ newh->len = len;
+
+ *s = newh->buf;
+}
+
+// Fit reallocates the string such that the buffer is exactly sized for the
+// buffer. If the capacity equals the length, then the function is a NOOP. The
+// byte array is unchanged.
+void
+str·fit(string *s)
+{
+ Hdr* h;
+ vlong cap = str·cap(*s);
+ vlong len = str·len(*s);
+
+ if (cap == len) return;
+
+ h = (Hdr*)(s - sizeof(Hdr));
+ h = realloc(h, sizeof(*h) + len + 1);
+ h->cap = len;
+
+ *s = h->buf;
+}
+
+// Append will append the given null terminated C string to the string data
+// structure. This variant can append a substring of length len of the given
+// string to our buffer. The result is reallocated if not enough room is present
+// in the buffer.
+int
+str·appendlen(string *s, vlong n, const byte* b)
+{
+ /*
+ bl = strlen(b);
+ if (n > bl) panicf("attempted to make a substring longer than string");
+ */
+
+ str·grow(s, n);
+ if (*s == nil) return 0;
+
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
+
+ memcpy(*s + str·len(*s), b, n);
+ h->len += n;
+ (*s)[h->len] = '\0';
+
+ return n;
+}
+
+// Append will append the given null terminated C string to the string data
+// structure. This variant will append the entire string.
+int
+str·append(string *s, const byte* b)
+{
+ return str·appendlen(s, strlen(b), b);
+}
+
+// AppendByte will append the given byte to our string.
+// NOTE: As the byte is on the stack, it is not null-terminated.
+// Can not pass to the above functions.
+int
+str·appendbyte(string *s, const byte b)
+{
+ str·grow(s, 1);
+ if (*s == nil) return 0;
+
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
+
+ *(*s + str·len(*s)) = b;
+ h->len++;
+ (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
+
+ return 1;
+}
+
+/*
+ * Appendf will append the given formatted string to our buffer.
+ * Returns the newly minted string
+ */
+
+int
+str·appendf(string *s, const byte* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ int remain = str·cap(*s) - str·len(*s);
+ int n = vsnprintf(*s + str·len(*s), remain + 1, fmt, args);
+ va_end(args);
+
+ if (n > remain) {
+ // If the first write was incomplete, we overwite the data again.
+ str·grow(s, n);
+ va_list args;
+ va_start(args, fmt);
+ n = vsnprintf(*s + str·len(*s), n + 1, fmt, args);
+ assert(n - remain <= str·cap(*s));
+ va_end(args);
+ }
+
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
+ h->len += n;
+
+ return n;
+}
+
+// Equals returns true if string s and t are equivalent.
+bool
+str·equals(const string s, const string t)
+{
+ vlong sL = str·len(s);
+ vlong tL = str·len(t);
+ if (sL != tL) return false;
+
+ return memcmp(s, t, sL) == 0;
+}
+
+//------------------------------------------------------------------------
+// Utility Methods
+
+int
+str·read(string s, int size, int n, void *buf)
+{
+ int len;
+
+ len = MIN(n * size, str·len(s));
+ memcpy(buf, s, len);
+
+ return len;
+}
+
+// Find will find the first occurence of
+// substr in the string Returns -1 if nothing was found.
+int
+str·find(string s, const byte* substr)
+{
+ byte* loc = strstr(s, substr);
+ if (loc == nil) return -1;
+ return (int)(loc - s);
+}
+
+//
+// Lower will force all runes in the string to be lowercase.
+void
+str·lower(string s)
+{
+ byte *b, *e;
+ b = s;
+ e = b + str·len(s);
+ while (b++ != e)
+ *b = tolower(*b);
+}
+
+// Upper will force all runes in the string to be uppercase.
+void
+str·upper(string s)
+{
+ byte *b, *e;
+ b = s;
+ e = b + str·len(s);
+ while (b++ != e)
+ *b = toupper(*b);
+}
+
+// Replace will replace all occurences of the given bytes 'from' to bytes 'to'
+// Edits are done in place and modify the string.
+// NOTE: As of now strings from and to must be the same size.
+void
+str·replace(string s, const byte* from, const byte* to)
+{
+ vlong fromL = strlen(from);
+ vlong toL = strlen(to);
+ if (toL != fromL) { panicf("different sized replacement string not supported"); }
+
+ vlong l = str·len(s);
+ vlong i = l;
+ vlong j = l;
+
+ for (i = 0; i < l; i++) {
+ for (j = 0; j < toL; j++) {
+ if (s[i] == from[j]) {
+ s[i] = to[j];
+ break;
+ }
+ }
+ }
+}
+
+// Split will split the string by the given token.
+// Returns a stretchy buffer of strings that result from the partition.
+// It is the caller's responsibility to clean the memory.
+string*
+str·split(string s, const byte* tok)
+{
+ string* fields = nil;
+ vlong start = 0;
+
+ vlong sL = str·len(s);
+ vlong tokL = strlen(tok);
+ if (sL == 0 || tokL == 0) return nil;
+
+ buffit(fields, 5);
+
+ for (vlong i = 0; i < sL - tokL; i++) {
+ if ((tokL == 1 && s[i] == tokL) || !memcmp(s + i, tok, tokL)) {
+ bufpush(fields, str·makelen(s + start, i - start));
+ if (fields[buflen(fields) - 1] == nil) goto cleanup;
+
+ start = i + tokL;
+ i += tokL - 1;
+ }
+ }
+
+ bufpush(fields, str·makelen(s + start, sL - start));
+
+ return fields;
+
+cleanup:
+ for (vlong i = 0; i < buflen(fields); i++) {
+ str·free(fields[i]);
+ }
+ buffree(fields);
+ return nil;
+}
+
+string
+str·join(vlong len, byte** fields, const byte* sep)
+{
+ string s = str·makecap("", 0, 10);
+ int j = 0;
+
+ for (j = 0; j < len; j++) {
+ str·append(&s, fields[j]);
+ if (j < len - 1)
+ str·appendlen(&s, 1, sep);
+ }
+
+ return s;
+}