diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-04-17 18:14:35 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-04-17 18:14:35 -0700 |
commit | dca72268ae6bcbd411aa8660604451226f76b4eb (patch) | |
tree | 741be97c97ef4fe1a023882267b06ec7eac78527 /src/str.c | |
parent | 998ee2ce7110987bb5cbeb11a55781a271a1ca47 (diff) |
chore: basic maintenance
Diffstat (limited to 'src/str.c')
-rw-r--r-- | src/str.c | 504 |
1 files changed, 0 insertions, 504 deletions
diff --git a/src/str.c b/src/str.c deleted file mode 100644 index 9aa29b3..0000000 --- a/src/str.c +++ /dev/null @@ -1,504 +0,0 @@ -#include <u.h> - -#define MAX_STRING_ALLOC 1024 * 1024 - -// ------------------------------------------------------------------------- -// UTF-8 functions - -#define Bit(i) (7-(i)) -/* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */ -#define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) -/* 0000 0000 0000 0111 1111 1111 */ -#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) - -enum -{ - Bitx = Bit(1), - Tx = Tbyte(1), - Rune1 = (1 << (Bit(0)+0*Bitx)) - 1, - - Maskx = (1 << Bitx) - 1, /* 0011 1111 */ - Testx = Maskx ^ 0xff, /* 1100 0000 */ - - SurrogateMin = 0xD800, - SurrogateMax = 0xDFFF, - Bad = RuneErr, -}; - -int -utf8·CharToRune(Rune* r, byte* s) -{ - int c[UTFmax], i; - Rune l; - - c[0] = *(ubyte*)(s); - if (c[0] < Tx) { - *r = c[0]; - return 1; - } - - l = c[0]; - for (i = 1; i < UTFmax; i++) { - c[i] = *(ubyte*)(s+i); - c[i] ^= Tx; - if (c[i] & Testx) goto bad; - - l = (l << Bitx) | c[i]; - if (c[0] < Tbyte(i + 2)) { - l &= RuneX(i + 1); - if (i == 1) { - if (c[0] < Tbyte(2) || l <= Rune1) - goto bad; - } else if (l <= RuneX(i) || l > RuneMax) - goto bad; - if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) - goto bad; - - *r = l; - return i + 1; - } - } -bad: - *r = RuneErr; - return 1; -} - -int -utf8·RuneToChar(byte* s, Rune* r) -{ - int i, j; - Rune c; - - c = *r; - if (c <= Rune1) { - s[0] = c; - return 1; - } - - for (i = 2; i < UTFmax + 1; i++){ - if (i == 3){ - if (c > RuneMax) - c = RuneErr; - if (SurrogateMin <= c && c <= SurrogateMax) - c = RuneErr; - } - if (c <= RuneX(i) || i == UTFmax) { - s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); - for(j = 1; j < i; j++) - s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); - return i; - } - } - - return UTFmax; -} - -int -utf8·RuneLen(Rune r) -{ - byte s[10]; - return utf8·RuneToChar(s, &r); -} - -int -utf8·FullRune(byte* s, int n) -{ - int i; - Rune c; - - if (n <= 0) return 0; - c = *(ubyte*) s; - if (c < Tx) return 1; - - for (i = 3; i < UTFmax + 1; i++) { - if (c < Tbyte(i)) return n >= i - 1; - } - - return n >= UTFmax; -} - -byte* -utf8·FindRune(byte* s, long c) -{ - long c1; - Rune r; - int n; - - if (c < RuneSync) return strchr(s, c); - - for (;;) { - c1 = *(ubyte*)s; - if (c1 < RuneSelf) { - if (c1 == 0) return nil; - if (c1 == c) return s; - s++; - continue; - } - n = utf8·CharToRune(&r, s); - if (r == c) return s; - s += n; - } - - return nil; -} - -#undef Bit -#undef Tbyte -#undef RuneX - -#include "generated/utf8.c" - -// ------------------------------------------------------------------------- -// Dynamic string functions - -// New returns a new dynamic string object, initialized from the given C string. -// len defines the length of the C substring that we will copy into our buffer. -// The backing buffer will have capacity cap. -string -str·NewCap(const byte* s, vlong len, vlong cap) -{ - struct str·Hdr* h; - - h = malloc(sizeof(*h) + cap + 1); - if (s == nil) memset(h, 0, sizeof(*h)); - - if (h == nil) return nil; // Allocation failed. - - h->len = (s == nil) ? 0 : len; - h->cap = cap; - - if (cap < h->len) goto cleanup; - - if (s != nil && cap > 0) { - memcpy(h->buf, s, h->len); - memset(h->buf + h->len, '\0', h->cap - h->len + 1); - } - - return h->buf; - -cleanup: - free(h); - panicf("Attempted to create a string with less capacity than length"); -} - -// New returns a new dynamic string object, initialized from the given C string. -// The backing buffer capacity is equivalent to the string length. -string -str·NewLen(const byte* s, vlong len) -{ - vlong sl = (s == nil) ? 0 : strlen(s); - if (sl < len) panicf("attempted to take a bigger substring than string length"); - - vlong cap = (len == 0) ? 1 : len; - return str·NewCap(s, len, cap); -} - -// New returns a new dynamic string object, initialized from the given C string. -// The backing buffer capacity is equivalent to the string length. -string -str·New(const byte* s) -{ - vlong len = (s == nil) ? 0 : strlen(s); - return str·NewLen(s, len); -} - -// Newf returns a new dynamic string object -string -str·Newf(const byte* fmt, ...) -{ - va_list args; - va_start(args, fmt); - vlong n = vsnprintf(nil, 0, fmt, args); - va_end(args); - - string s = str·NewCap(nil, 0, n); - - va_start(args, fmt); - vsnprintf(s, n + 1, fmt, args); - va_end(args); - - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - h->len = n; - - return s; -} - -// Free returns memory associated to the buffer. -void -str·Free(string s) -{ - free(s - sizeof(str·Hdr)); -} - -// Len returns the length of the string. -int -str·Len(const string s) -{ - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - return h->len; -} - -// Cap returns the capacity of the string buffer. -int -str·Cap(const string s) -{ - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - return h->cap; -} - -string -str·Clear(string s) -{ - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - h->len = 0; - *s = 0; - - return s; -} - -// Grow ensures that the string can encompass AT LEAST delta bytes. -// If it already can, this is a NO OP. -// If it can't, the string will be reallocated. -string -str·Grow(string s, vlong delta) -{ - str·Hdr *h, *newh; - vlong cap = str·Cap(s); - vlong len = str·Len(s); - Assert(cap >= len); // To prevent unsigned behavior - - if (cap - len >= delta) return s; - - h = (str·Hdr*)(s - sizeof(str·Hdr)); - - vlong newCap = cap + delta; - Assert(newCap >= cap); // To prevent unsigned behavior - if (newCap < MAX_STRING_ALLOC) { - newCap *= 2; - } else - newCap += MAX_STRING_ALLOC; - - newh = (str·Hdr*)realloc(h, sizeof(*h) + newCap + 1); - if (newh == nil) return nil; - - memset(newh->buf + len, '\0', newCap - len); - newh->cap = newCap; - newh->len = len; - - return newh->buf; -} - -// Fit reallocates the string such that the buffer is exactly sized for the -// buffer. If the capacity equals the length, then the function is a NOOP. The -// byte array is unchanged. -string -str·Fit(string s) -{ - str·Hdr* h; - vlong cap = str·Cap(s); - vlong len = str·Len(s); - - if (cap == len) return s; - - h = (str·Hdr*)(s - sizeof(str·Hdr)); - h = realloc(h, sizeof(*h) + len + 1); - h->cap = len; - - return h->buf; -} - -// Append will append the given null terminated C string to the string data -// structure. This variant can append a substring of length len of the given -// string to our buffer. The result is reallocated if not enough room is present -// in the buffer. -string -str·AppendCount(string s, const byte* b, vlong n) -{ - vlong bl = strlen(b); - if (n > bl) panicf("attempted to make a substring longer than string"); - - s = str·Grow(s, n); - if (s == nil) return nil; - - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - - memcpy(s + str·Len(s), b, n); - h->len += n; - s[h->len] = '\0'; - - return s; -} - -// Append will append the given null terminated C string to the string data -// structure. This variant will append the entire string. -string -str·Append(string s, const byte* b) -{ - return str·AppendCount(s, b, strlen(b)); -} - -// AppendByte will append the given byte to our string. -// NOTE: As the byte is on the stack, it is not null-terminated. -// Can not pass to the above functions. -string -str·AppendByte(string s, const byte b) -{ - s = str·Grow(s, 1); - if (s == nil) return nil; - - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - - *(s + str·Len(s)) = b; - h->len++; - s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? - - return s; -} - -// Equals returns true if string s and t are equivalent. -bool -str·Equals(const string s, const string t) -{ - vlong sL = str·Len(s); - vlong tL = str·Len(t); - if (sL != tL) return false; - - return memcmp(s, t, sL) == 0; -} - -// Utility Methods ------------------------------------ -// -// Appendf will append the given formatted string to our buffer. -// Returns the newly minted string. -string -str·Appendf(string s, const byte* fmt, ...) -{ - va_list args; - va_start(args, fmt); - int remain = str·Cap(s) - str·Len(s); - int n = vsnprintf(s + str·Len(s), remain + 1, fmt, args); - va_end(args); - - if (n > remain) { - // If the first write was incomplete, we overwite the data again. - s = str·Grow(s, n); - va_list args; - va_start(args, fmt); - n = vsnprintf(s + str·Len(s), n + 1, fmt, args); - Assert(n - remain <= str·Cap(s)); - va_end(args); - } - - str·Hdr* h = (str·Hdr*)(s - sizeof(str·Hdr)); - h->len += n; - - return s; -} - -// Find will find the first occurence of -// substr in the string Returns -1 if nothing was found. -int -str·Find(string s, const byte* substr) -{ - byte* loc = strstr(s, substr); - if (loc == nil) return -1; - return (int)(loc - s); -} - -// -// Lower will force all runes in the string to be lowercase. -void -str·Lower(string s) -{ - byte *b, *e; - b = s; - e = b + str·Len(s); - while (b++ != e) - *b = tolower(*b); -} - -// Upper will force all runes in the string to be uppercase. -void -str·Upper(string s) -{ - byte *b, *e; - b = s; - e = b + str·Len(s); - while (b++ != e) - *b = toupper(*b); -} - -// Replace will replace all occurences of the given bytes 'from' to bytes 'to' -// Edits are done in place and modify the string. -// NOTE: As of now strings from and to must be the same size. -void -str·Replace(string s, const byte* from, const byte* to) -{ - vlong fromL = strlen(from); - vlong toL = strlen(to); - if (toL != fromL) { panicf("different sized replacement string not supported"); } - - vlong l = str·Len(s); - vlong i = l; - vlong j = l; - - for (i = 0; i < l; i++) { - for (j = 0; j < toL; j++) { - if (s[i] == from[j]) { - s[i] = to[j]; - break; - } - } - } -} - -// Split will split the string by the given token. -// Returns a stretchy buffer of strings that result from the partition. -// It is the caller's responsibility to clean the memory. -string* -str·Split(string s, const byte* tok) -{ - string* fields = nil; - vlong start = 0; - - vlong sL = str·Len(s); - vlong tokL = strlen(tok); - if (sL == 0 || tokL == 0) return nil; - - buffit(fields, 5); - - for (vlong i = 0; i < sL - tokL; i++) { - if ((tokL == 1 && s[i] == tokL) || !memcmp(s + i, tok, tokL)) { - bufpush(fields, str·NewLen(s + start, i - start)); - if (fields[buflen(fields) - 1] == nil) goto cleanup; - - start = i + tokL; - i += tokL - 1; - } - } - - bufpush(fields, str·NewLen(s + start, sL - start)); - - return fields; - -cleanup: - for (vlong i = 0; i < buflen(fields); i++) { - str·Free(fields[i]); - } - buffree(fields); - return nil; -} - -string -str·Join(byte** fields, vlong numFields, const byte* sep) -{ - string s = str·NewCap(nil, 0, 10); - int j = 0; - - for (j = 0; j < numFields; j++) { - s = str·Append(s, fields[j]); - if (j < numFields - 1) { s = str·AppendCount(s, sep, 1); } - } - - return s; -} |