#include #include #define MAX_STRING_ALLOC 1024 * 1024 typedef struct Hdr { vlong len; vlong cap; byte buf[]; } Hdr; // ------------------------------------------------------------------------- // UTF-8 functions #define Bit(i) (7-(i)) /* N 0's preceded by i 1's e.g. T(Bit(2)) is 1100 0000 */ #define Tbyte(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) /* 0000 0000 0000 0111 1111 1111 */ #define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) enum { Bitx = Bit(1), Tx = Tbyte(1), Rune1 = (1 << (Bit(0)+0*Bitx)) - 1, Maskx = (1 << Bitx) - 1, /* 0011 1111 */ Testx = Maskx ^ 0xff, /* 1100 0000 */ SurrogateMin = 0xD800, SurrogateMax = 0xDFFF, Bad = RuneErr, }; int utf8·chartorune(rune* r, byte* s) { int c[UTFmax], i; rune l; c[0] = *(ubyte*)(s); if (c[0] < Tx) { *r = c[0]; return 1; } l = c[0]; for (i = 1; i < UTFmax; i++) { c[i] = *(ubyte*)(s+i); c[i] ^= Tx; if (c[i] & Testx) goto bad; l = (l << Bitx) | c[i]; if (c[0] < Tbyte(i + 2)) { l &= RuneX(i + 1); if (i == 1) { if (c[0] < Tbyte(2) || l <= Rune1) goto bad; } else if (l <= RuneX(i) || l > RuneMax) goto bad; if (i == 2 && SurrogateMin <= l && l <= SurrogateMax) goto bad; *r = l; return i + 1; } } bad: *r = RuneErr; return 1; } int utf8·runetochar(byte* s, rune* r) { int i, j; rune c; c = *r; if (c <= Rune1) { s[0] = c; return 1; } for (i = 2; i < UTFmax + 1; i++){ if (i == 3){ if (c > RuneMax) c = RuneErr; if (SurrogateMin <= c && c <= SurrogateMax) c = RuneErr; } if (c <= RuneX(i) || i == UTFmax) { s[0] = Tbyte(i) | (c >> (i - 1)*Bitx); for(j = 1; j < i; j++) s[j] = Tx | ((c >> (i - j - 1)*Bitx) & Maskx); return i; } } return UTFmax; } int utf8·runelen(rune r) { byte s[10]; return utf8·runetochar(s, &r); } int utf8·fullrune(byte* s, int n) { int i; rune c; if (n <= 0) return 0; c = *(ubyte*) s; if (c < Tx) return 1; for (i = 3; i < UTFmax + 1; i++) { if (c < Tbyte(i)) return n >= i - 1; } return n >= UTFmax; } byte* utf8·findrune(byte* s, long c) { long c1; rune r; int n; if (c < RuneSync) return strchr(s, c); for (;;) { c1 = *(ubyte*)s; if (c1 < RuneSelf) { if (c1 == 0) return nil; if (c1 == c) return s; s++; continue; } n = utf8·chartorune(&r, s); if (r == c) return s; s += n; } return nil; } byte* utf8·findrrune(byte* s, long c) { long c1; rune r; byte *l; if (c < RuneSync) return strrchr(s, c); l = nil; for (;;) { c1 = *(ubyte*)s; if (c1 < RuneSelf) { if (c1 == 0) return l; if (c1 == c) l = s; s++; continue; } c1 = utf8·chartorune(&r, s); if (r == c) l = s; s += c1; } return nil; } #undef Bit #undef Tbyte #undef RuneX #include ".generated/utf8.c" // ------------------------------------------------------------------------- // Dynamic string functions // New returns a new dynamic string object, initialized from the given C string. // len defines the length of the C substring that we will copy into our buffer. // The backing buffer will have capacity cap. string str·makecap(const byte *s, vlong len, vlong cap) { struct Hdr* h; h = malloc(sizeof(*h) + cap + 1); if (s == nil) memset(h, 0, sizeof(*h)); if (h == nil) return nil; // Allocation failed. h->len = (s == nil) ? 0 : len; h->cap = cap; if (cap < h->len) goto cleanup; if (s != nil && cap > 0) { memcpy(h->buf, s, h->len); memset(h->buf + h->len, '\0', h->cap - h->len + 1); } return h->buf; cleanup: free(h); panicf("Attempted to create a string with less capacity than length"); return nil; } // New returns a new dynamic string object, initialized from the given C string. // The backing buffer capacity is equivalent to the string length. string str·makelen(const byte *s, vlong len) { vlong sl = (!s) ? 0 : strlen(s); if (sl < len) panicf("attempted to take a bigger substring than string length"); vlong cap = (len == 0) ? 1 : len; return str·makecap(s, len, cap); } // New returns a new dynamic string object, initialized from the given C string. // The backing buffer capacity is equivalent to the string length. string str·make(const byte *s) { vlong len = (!s) ? 0 : strlen(s); return str·makelen(s, len); } // Newf returns a new dynamic string object string str·makef(const byte *fmt, ...) { va_list args; va_start(args, fmt); vlong n = vsnprintf(nil, 0, fmt, args); va_end(args); string s = str·makecap(nil, 0, n); va_start(args, fmt); vsnprintf(s, n + 1, fmt, args); va_end(args); Hdr* h = (Hdr*)(s - sizeof(Hdr)); h->len = n; return s; } // Free returns memory associated to the buffer. void str·free(string s) { free(s - sizeof(Hdr)); } // Len returns the length of the string. int str·len(const string s) { Hdr* h = (Hdr*)(s - sizeof(Hdr)); return h->len; } // Cap returns the capacity of the string buffer. int str·cap(const string s) { Hdr* h = (Hdr*)(s - sizeof(Hdr)); return h->cap; } void str·clear(string *s) { Hdr* h = (Hdr*)(*s - sizeof(Hdr)); h->len = 0; *s[0] = '\0'; } // Grow ensures that the string can encompass AT LEAST delta bytes. // If it already can, this is a NO OP. // If it can't, the string will be reallocated. void str·grow(string *s, vlong delta) { Hdr *h, *newh; vlong cap = str·cap(*s); vlong len = str·len(*s); assert(cap >= len); // To prevent unsigned behavior if (cap - len >= delta) return; h = (Hdr*)(*s - sizeof(Hdr)); vlong newCap = cap + delta; assert(newCap >= cap); // To prevent unsigned behavior if (newCap < MAX_STRING_ALLOC) { newCap *= 2; } else newCap += MAX_STRING_ALLOC; newh = (Hdr*)realloc(h, sizeof(*h) + newCap + 1); if (newh == nil) return; memset(newh->buf + len, '\0', newCap - len); newh->cap = newCap; newh->len = len; *s = newh->buf; } // Fit reallocates the string such that the buffer is exactly sized for the // buffer. If the capacity equals the length, then the function is a NOOP. The // byte array is unchanged. void str·fit(string *s) { Hdr* h; vlong cap = str·cap(*s); vlong len = str·len(*s); if (cap == len) return; h = (Hdr*)(s - sizeof(Hdr)); h = realloc(h, sizeof(*h) + len + 1); h->cap = len; *s = h->buf; } // Append will append the given null terminated C string to the string data // structure. This variant can append a substring of length len of the given // string to our buffer. The result is reallocated if not enough room is present // in the buffer. void str·appendlen(string *s, vlong n, const byte* b) { vlong bl = strlen(b); if (n > bl) panicf("attempted to make a substring longer than string"); str·grow(s, n); if (*s == nil) return; Hdr* h = (Hdr*)(*s - sizeof(Hdr)); memcpy(*s + str·len(*s), b, n); h->len += n; (*s)[h->len] = '\0'; } // Append will append the given null terminated C string to the string data // structure. This variant will append the entire string. void str·append(string *s, const byte* b) { return str·appendlen(s, strlen(b), b); } // AppendByte will append the given byte to our string. // NOTE: As the byte is on the stack, it is not null-terminated. // Can not pass to the above functions. void str·appendbyte(string *s, const byte b) { str·grow(s, 1); if (*s == nil) return; Hdr* h = (Hdr*)(*s - sizeof(Hdr)); *(*s + str·len(*s)) = b; h->len++; (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? } /* * Appendf will append the given formatted string to our buffer. * Returns the newly minted string */ void str·appendf(string *s, const byte* fmt, ...) { va_list args; va_start(args, fmt); int remain = str·cap(*s) - str·len(*s); int n = vsnprintf(*s + str·len(*s), remain + 1, fmt, args); va_end(args); if (n > remain) { // If the first write was incomplete, we overwite the data again. str·grow(s, n); va_list args; va_start(args, fmt); n = vsnprintf(*s + str·len(*s), n + 1, fmt, args); assert(n - remain <= str·cap(*s)); va_end(args); } Hdr* h = (Hdr*)(*s - sizeof(Hdr)); h->len += n; } // Equals returns true if string s and t are equivalent. bool str·equals(const string s, const string t) { vlong sL = str·len(s); vlong tL = str·len(t); if (sL != tL) return false; return memcmp(s, t, sL) == 0; } //------------------------------------------------------------------------ // Utility Methods int str·read(string s, int size, int n, void *buf) { int len; len = MIN(n * size, str·len(s)); memcpy(buf, s, len); return len; } // Find will find the first occurence of // substr in the string Returns -1 if nothing was found. int str·find(string s, const byte* substr) { byte* loc = strstr(s, substr); if (loc == nil) return -1; return (int)(loc - s); } // // Lower will force all runes in the string to be lowercase. void str·lower(string s) { byte *b, *e; b = s; e = b + str·len(s); while (b++ != e) *b = tolower(*b); } // Upper will force all runes in the string to be uppercase. void str·upper(string s) { byte *b, *e; b = s; e = b + str·len(s); while (b++ != e) *b = toupper(*b); } // Replace will replace all occurences of the given bytes 'from' to bytes 'to' // Edits are done in place and modify the string. // NOTE: As of now strings from and to must be the same size. void str·replace(string s, const byte* from, const byte* to) { vlong fromL = strlen(from); vlong toL = strlen(to); if (toL != fromL) { panicf("different sized replacement string not supported"); } vlong l = str·len(s); vlong i = l; vlong j = l; for (i = 0; i < l; i++) { for (j = 0; j < toL; j++) { if (s[i] == from[j]) { s[i] = to[j]; break; } } } } // Split will split the string by the given token. // Returns a stretchy buffer of strings that result from the partition. // It is the caller's responsibility to clean the memory. string* str·split(string s, const byte* tok) { string* fields = nil; vlong start = 0; vlong sL = str·len(s); vlong tokL = strlen(tok); if (sL == 0 || tokL == 0) return nil; buffit(fields, 5); for (vlong i = 0; i < sL - tokL; i++) { if ((tokL == 1 && s[i] == tokL) || !memcmp(s + i, tok, tokL)) { bufpush(fields, str·makelen(s + start, i - start)); if (fields[buflen(fields) - 1] == nil) goto cleanup; start = i + tokL; i += tokL - 1; } } bufpush(fields, str·makelen(s + start, sL - start)); return fields; cleanup: for (vlong i = 0; i < buflen(fields); i++) { str·free(fields[i]); } buffree(fields); return nil; } string str·join(vlong len, byte** fields, const byte* sep) { string s = str·makecap("", 0, 10); int j = 0; for (j = 0; j < len; j++) { str·append(&s, fields[j]); if (j < len - 1) str·appendlen(&s, 1, sep); } return s; }