From ec21325b36adc7f52179ea010ff7bb19d121a6c1 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 26 Apr 2020 17:36:49 -0700 Subject: chore: moved string into main libn header --- include/.include/str.h | 62 --------------------------- include/libn.h | 60 +++++++++++++++++++++++++- sys/libbio/test.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++- sys/libn/memory.c | 17 ++++++++ 4 files changed, 187 insertions(+), 64 deletions(-) delete mode 100644 include/.include/str.h diff --git a/include/.include/str.h b/include/.include/str.h deleted file mode 100644 index 29b5e1e..0000000 --- a/include/.include/str.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once - -typedef byte* string; - -typedef struct str·Hdr -{ - vlong len; - vlong cap; - byte buf[]; -} str·Hdr; - -// ------------------------------------------------------------------------- -// UTF-8 functions. -// Perhaps break into own unit -// TODO: Add to(upper|lower|title) - -typedef uint32 Rune; - -enum -{ - UTFmax = 4, - RuneSync = 0x80, - RuneSelf = 0x80, - RuneErr = 0xFFFD, - RuneMax = 0x10FFFF, -}; - -int utf8·fullrune(byte *s, int n); -byte *utf8·findrune(byte *s, long i); -int utf8·chartorune(Rune *r, byte *s); -int utf8·runetochar(byte *s, Rune *r); -int utf8·len(byte *s); -int utf8·runelen(Rune r); -int utf8·isletter(Rune r); -int utf8·isdigit(Rune r); -int utf8·isspace(Rune r); -int utf8·istitle(Rune r); - -// ------------------------------------------------------------------------- -// Dynamic string functions - -string str·newcap(const byte *s, vlong len, vlong cap); -string str·newlen(const byte *s, vlong len); -string str·new(const byte *s); -string str·newf(const byte *fmt, ...); -void str·free(string s); -int str·len(const string s); -int str·cap(const string s); -string str·clear(string s); -string str·grow(string s, vlong delta); -string str·fit(string s); -string str·appendcount(string s, vlong len, const byte *b); -string str·append(string s, const byte* b); -string str·appendf(string s, const byte* fmt, ...); -string str·appendbyte(string s, const byte b); -bool str·equals(const string s, const string t); -int str·find(string s, const byte* substr); -void str·lower(string s); -void str·upper(string s); -void str·replace(string s, const byte* from, const byte* to); -string* str·split(string s, const byte* tok); -string str·join(vlong len, byte** fields, const byte* sep); diff --git a/include/libn.h b/include/libn.h index 66eabaf..b994159 100644 --- a/include/libn.h +++ b/include/libn.h @@ -106,7 +106,65 @@ error coro·free(Coro *c); // Strings // TODO(nnoll): Move here? -#include ".include/str.h" +typedef byte* string; + +typedef struct str·Hdr +{ + vlong len; + vlong cap; + byte buf[]; +} str·Hdr; + +// ------------------------------------------------------------------------- +// UTF-8 functions. +// Perhaps break into own unit +// TODO: Add to(upper|lower|title) + +typedef uint32 Rune; + +enum +{ + UTFmax = 4, + RuneSync = 0x80, + RuneSelf = 0x80, + RuneErr = 0xFFFD, + RuneMax = 0x10FFFF, +}; + +/* utf8 helpers */ +int utf8·fullrune(byte *s, int n); +byte *utf8·findrune(byte *s, long i); +int utf8·chartorune(Rune *r, byte *s); +int utf8·runetochar(byte *s, Rune *r); +int utf8·len(byte *s); +int utf8·runelen(Rune r); +int utf8·isletter(Rune r); +int utf8·isdigit(Rune r); +int utf8·isspace(Rune r); +int utf8·istitle(Rune r); + +/* string helpers */ +string str·newcap(const byte *s, vlong len, vlong cap); +string str·newlen(const byte *s, vlong len); +string str·new(const byte *s); +string str·newf(const byte *fmt, ...); +void str·free(string s); +int str·len(const string s); +int str·cap(const string s); +string str·clear(string s); +string str·grow(string s, vlong delta); +string str·fit(string s); +string str·appendcount(string s, vlong len, const byte *b); +string str·append(string s, const byte* b); +string str·appendf(string s, const byte* fmt, ...); +string str·appendbyte(string s, const byte b); +bool str·equals(const string s, const string t); +int str·find(string s, const byte* substr); +void str·lower(string s); +void str·upper(string s); +void str·replace(string s, const byte* from, const byte* to); +string* str·split(string s, const byte* tok); +string str·join(vlong len, byte** fields, const byte* sep); // ----------------------------------------------------------------------------- // I/O diff --git a/sys/libbio/test.c b/sys/libbio/test.c index e943153..fec0a0c 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -140,10 +140,119 @@ test·fastq() return err <= 0 ? 0 : 1; } +static byte *SEQ[] = { +"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC" +"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" +"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" +"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" +"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA" +"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" +"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" +"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA" +"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" +"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC" +"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" +"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" +"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT" +"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" +"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" +"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" +"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" +"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" +"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" +"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" +"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" +"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT" +"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" +"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC" +"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" +"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" +"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" +"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" +"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" +"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" +"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" +"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", + +"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAAATAC" +"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" +"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" +"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" +"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCAAAGTCA" +"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" +"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" +"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" +"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCCCCTGCC" +"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" +"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" +"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGCGGCCTT" +"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" +"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" +"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" +"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" +"ACAGTCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" +"ATCCTCTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" +"CCGCTGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" +"CTGTTAATCGCAAGCTCCAGCAGCCCCCCCCCCCCTTTTCTGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" +"TCCATGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATAAAAAGAGCTCAGT" +"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" +"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATTATTAGCGAAGAAC" +"AAGGTTCCAACTCTCATTCCCGCCATTTACTGGGAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" +"CTCTTCAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" +"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" +"TGCATTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" +"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" +"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" +"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" +"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", +}; + error main() { - error err; + double f; + error err; + int i, l, n; + + uint64 mem[aln·N][arrlen(SEQ)][aln·L]; + uint64 *phi[aln·N]; + int loc[aln·N][arrlen(SEQ)][aln·L]; + int *pos[aln·N]; + + for (i = 0; i < arrlen(SEQ); i++) { + for (n = 0; n < aln·N; n++) { + phi[n] = mem[n][i]; + pos[n] = loc[n][i]; + } + + err = aln·sketch(SEQ[i], aln·L, phi, pos); + } + + f = 0; + for (n = 0; n < aln·N; n++) { + aln·sort(arrlen(SEQ), aln·L, (uint64*)mem[n]); + + if (!memcmp(mem[n][0], mem[n][1], sizeof(uint64)*aln·L)) { + f += 1.; + printf("True : "); + } else { + printf("False: "); + } + for (i = 0; i < arrlen(SEQ); i++) { + printf("["); + for (l = 0; l < aln·L; l++) { + printf("%d,", loc[n][i][l]); + } + printf("]"); + if (i == 0) printf(" ~ "); + } + printf("\n"); + } + + printf("Fraction hits %f\n", f/aln·N); + return err; +#if 0 + if (err = test·newick(), err) { errorf("test fail: newick"); } @@ -155,5 +264,6 @@ main() if (err = test·fastq(), err) { errorf("test fail: fastq"); } +#endif } diff --git a/sys/libn/memory.c b/sys/libn/memory.c index 8081569..7a0ca30 100644 --- a/sys/libn/memory.c +++ b/sys/libn/memory.c @@ -147,3 +147,20 @@ mem·freearena(mem·Arena *a) a->heap.free(a->impl, a); } + +// ------------------------------------------------------------------------- +// Generalized memory helpers + +void +memset64(void *dst, uint64 val, uintptr size) +{ + intptr i; + + for (i = 0; i < (size & (~7)); i += 8) { + memcpy((byte*)dst + i, &val, 8); + } + + for (; i < size; i++) { + ((byte*)dst)[i] = ((byte*)&val)[i&7]; + } +} -- cgit v1.2.1