aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-09-08 15:51:40 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-09-08 15:51:53 -0700
commitc0a7b53baf2a6e7bf9bc1fbec7ac05e43ac59154 (patch)
tree385b83f4d505da960820932f7357eb46b31abac3
parent8c0475f97675245d1bcbb112dc79c9f490fad361 (diff)
checkin
-rw-r--r--Makefile4
-rw-r--r--include/libbio.h19
-rw-r--r--include/libn.h39
-rw-r--r--sys/libbio/fasta.c95
-rw-r--r--sys/libbio/newick.c1
-rw-r--r--sys/libbio/phylo.c32
-rw-r--r--sys/libn/error.c5
-rw-r--r--sys/libn/memory.c22
-rw-r--r--sys/libn/random.c35
9 files changed, 158 insertions, 94 deletions
diff --git a/Makefile b/Makefile
index 19db73a..deac25a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
# Compiler, Linker, and Assembler
-CC := clang
+CC := gcc
AR := ar
AS := nasm
PKG := pkg-config
@@ -19,7 +19,7 @@ CFINI := `gcc --print-file-name=crtendS.o` $(LIB_DIR)/crt/x86_64/crtn.o
# Flags, Libraries and Includes
CFLAGS := -g -march=native -fno-strict-aliasing -fwrapv -fms-extensions -Wno-microsoft-anon-tag
-STATIC := -nodefaultlibs -nostartfiles -static
+STATIC := -nodefaultlibs -nostartfiles -nostdinc -static
AFLAGS := -f elf64
INCS := -I $(INC_DIR) -isystem $(INC_DIR)/vendor/libc
ELIBS := -L$(LIB_DIR) -lc
diff --git a/include/libbio.h b/include/libbio.h
index 558c163..b3d0426 100644
--- a/include/libbio.h
+++ b/include/libbio.h
@@ -5,15 +5,12 @@
typedef struct bio·Node
{
- string name;
- string comment;
- double dist;
- double support;
- int nnode;
- int nchild;
- struct bio·Node *parent;
- struct bio·Node *sibling;
- struct bio·Node *child;
+ string name, comment;
+ double dist, support;
+ int nnode, nchild;
+ struct bio·Node *parent, *sibling, *child;
+
+ void *data;
} bio·Node;
typedef struct bio·Tree
@@ -53,7 +50,6 @@ error bio·writenewick(bio·Tree tree, io·Putter out, void*);
/* i/o */
typedef struct bio·SeqReader bio·SeqReader;
-typedef struct bio·SeqWriter bio·SeqWriter;
typedef struct bio·Seq
{
@@ -68,3 +64,6 @@ error bio·closeseq(bio·SeqReader *rdr);
error bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq);
error bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq);
+
+error bio·writefasta(io·Writer io, void *wtr, bio·Seq seq);
+error bio·writefastq(io·Writer io, void *wtr, bio·Seq seq);
diff --git a/include/libn.h b/include/libn.h
index 2b8dc4c..5d557ce 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -22,34 +22,31 @@ typedef wchar_t wchar;
// ----------------------------------------------------------------------------
// dynamic array
-typedef struct bufHdr
+typedef struct BufHdr
{
vlong len;
vlong cap;
byte buf[];
-} bufHdr;
+} BufHdr;
-#define _bufHdr(s) ((bufHdr*)((uint8*)(s)-offsetof(bufHdr, buf)))
-#define buflen(s) ((s) ? (_bufHdr(s)->len) : 0)
-#define bufcap(s) ((s) ? (_bufHdr(s)->cap) : 0)
-#define bufend(s) ((s) + buflen(s))
-#define bufsize(s) ((s) ? (buflen(s) * sizeof((s)[0])) : 0)
+#define bufhdr(b) ((BufHdr*)((uint8*)(b)-offsetof(BufHdr, buf)))
+#define buflen(b) ((b) ? (bufhdr(b)->len) : 0)
+#define bufcap(b) ((b) ? (bufhdr(b)->cap) : 0)
+#define bufend(b) ((b) + buflen(b))
+#define bufsize(b) ((b) ? (buflen(b) * sizeof((b)[0])) : 0)
-#define buffree(s) ((s) ? (free(_bufHdr(s)), (s) = nil) : 0)
-#define buffit(s, n) ((n) <= bufcap(s) ? 0 : ((s) = bufgrow((s), (n), sizeof(*(s)))))
+#define buffree(b) ((b) ? (free(bufhdr(b)), (b) = nil) : 0)
+#define buffit(b, n) ((n) <= bufcap(b) ? 0 : ((b) = ·bufgrow((b), (n), sizeof(*(b)))))
-#define bufresize(s, n) \
- do { \
- (buffit(s, n)); \
- ((_bufHdr(s)->len) = (n)); \
- } while (0)
+#define bufpush(b, ...) (buffit((b), 1 + buflen(b)), (b)[bufhdr(b)->len++] = (__VA_ARGS__))
+#define bufaddn(b, n) (buffit(b, buflen(b)+n), bufhdr(b)->len += n, b+bufhdr(b)->len-n)
-#define bufpush(s, ...) (buffit((s), 1 + buflen(s)), (s)[_bufHdr(s)->len++] = (__VA_ARGS__))
+#define bufpop(b) ((b)[--bufhdr(b)->len])
+#define bufdel(b, i) bufdeln((b), (i), 1)
+#define bufdeln(b, i, n) (memmove((b)+(i), (b)+(i)+(n), sizeof(*(b))*(bufhdr(b)->len-(n)-(i)), bufhdr(b)->len -= (n))
+#define bufdelswap(b, i) ((b)[i] = bufend(b)[-1], bufhdr(b)->len-=1)
-#define bufpop(s, i) (_bufpop((s), (i), sizeof(*(s))), (s)[_bufHdr(s)->len])
-
-void* bufgrow(void*, vlong, vlong);
-void _bufpop(void*, int, vlong);
+void* ·bufgrow(void*, vlong, vlong);
// -----------------------------------------------------------------------------
// memory allocation
@@ -408,9 +405,11 @@ void sort·strings(uintptr n, byte* arr[]);
// fast random number generation
error rng·init(uint64 seed);
-double rng·random();
+double rng·random(void);
+double rng·exponential(double lambda);
bool rng·bernoulli(double f);
uint64 rng·randi(int max);
+uint64 rng·poisson(double mean);
// -----------------------------------------------------------------------------
// variable arguments
diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c
index 6b40e99..078325a 100644
--- a/sys/libbio/fasta.c
+++ b/sys/libbio/fasta.c
@@ -40,8 +40,8 @@ grow(struct SeqBuf **sb, int min)
newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min));
assert(newcap >= (*sb)->cap+min);
- if (new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) {
- errorf("memory: could not allocate new buffer");
+ if(new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) {
+ errorf("memory: could not allocate new buffer\n");
return 1;
}
@@ -63,13 +63,13 @@ put(struct SeqBuf **sb, byte c)
struct SeqBuf *sq;
sq = *sb;
- if (sq->it < (sq->b + sq->cap)) {
+ if(sq->it < (sq->b + sq->cap)) {
*sq->it++ = c;
return 0;
}
- if (err = grow(sb, 1), err) {
- errorf("memory fail: could not allocate more buffer");
+ if(err = grow(sb, 1), err) {
+ errorf("memory fail: could not allocate more buffer\n");
sq->mem.free(sq->heap, sq);
return 1;
}
@@ -85,21 +85,11 @@ push(struct SeqBuf **sb, int n, void *buf)
int d, err;
struct SeqBuf *seq;
- char *cb = buf;
- for(d=0; d < n; d++) {
- if(cb[d] == 0) {
- printf("ERROR: zero byte being copied @ pos %d/%d\n", d, n);
- printf("ERROR: string afterwards is %s\n", cb+n+1);
- printf("ERROR: string afterwards is %s\n", cb+n+2);
- exit(1);
- }
- }
-
seq = *sb;
- if(d = seq->cap - (seq->it - seq->b), d < n) {
- assert(d > 0);
+ if(d = (seq->cap - (seq->it - seq->b)), d < n) {
+ assert(d >= 0);
if (err = grow(sb, n-d), err) {
- errorf("memory fail: could not allocate more buffer");
+ errorf("memory fail: could not allocate more buffer\n");
seq->mem.free(seq->heap, seq);
return 1;
}
@@ -137,7 +127,7 @@ fill(bio·SeqReader *rdr)
n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf);
if(n < 0) {
- errorf("read: no data obtained from reader");
+ errorf("read: no data obtained from reader\n");
return 1;
}
rdr->b = rdr->buf;
@@ -166,7 +156,7 @@ bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap)
r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE;
if (err=fill(r), err) {
- errorf("fill: could not populate buffer");
+ errorf("fill: could not populate buffer\n");
goto ERROR;
}
@@ -209,7 +199,7 @@ readfasta(bio·SeqReader *rdr, bio·Seq *seq, byte hdr, byte stop)
// NOTE: Can this case happen?
assert(rdr->b != rdr->bend);
if(*rdr->b++ != hdr) {
- errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--);
+ errorf("fasta/q format: expected '%c', found '%c'\n", hdr, *rdr->b--);
return 1;
}
@@ -224,7 +214,7 @@ NAME:
push(&rdr->seq, rdr->b - beg, beg);
if(err=fill(rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto NAME;
@@ -246,16 +236,18 @@ SEQLOOP:
rdr->b++;
}
+#if 0
for(byte *cb = rdr->seq->b+rdr->seq->off; cb != rdr->seq->it; ++cb) {
if(*cb == 0) {
printf("ERROR @ pos=%ld: Found zero byte\n", cb - rdr->seq->b+rdr->seq->off);
}
}
+#endif
push(&rdr->seq, rdr->b - beg, beg);
if(err=fill(rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto SEQLOOP;
@@ -278,13 +270,13 @@ bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq)
err = readfasta(rdr, seq, '>', '>');
if(err && err != EOF) {
- errorf("parse fail: could not read sequence of record");
+ errorf("parse fail: could not read sequence of record\n");
return err;
}
seq->name = rdr->seq->b;
seq->s = rdr->seq->b + rdr->seq->off;
- seq->len = rdr->seq->it - seq->s - 1;
+ seq->len = rdr->seq->it - seq->s - 1; // shift by 1 as we pushed a '0' to end
seq->q = nil;
return err;
@@ -303,14 +295,14 @@ bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq)
err = readfasta(rdr, seq, '@', '+');
if(err) {
- errorf("parse fail: could not read sequence of record");
+ errorf("parse fail: could not read sequence of record\n");
return err;
}
seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off);
if(*rdr->b++ != '+') {
- errorf("format error: no '+' character seperator found");
+ errorf("format error: no '+' character seperator found\n");
return -1;
}
@@ -323,7 +315,7 @@ EATLN:
}
if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto EATLN;
@@ -347,7 +339,7 @@ QUAL:
push(&rdr->seq, rdr->b - beg, beg);
if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto QUAL;
@@ -358,8 +350,51 @@ SUCCESS:
put(&rdr->seq, '\0');
seq->name = rdr->seq->b;
- seq->s = rdr->seq->b + rdr->seq->off;
+ seq->s = rdr->seq->b + rdr->seq->off - 1;
seq->q = seq->s + seq->len + 1;
return err;
}
+
+// -----------------------------------------------------------------------
+// sequence writing
+
+error
+bio·writefasta(io·Writer io, void *wtr, bio·Seq seq)
+{
+ int i, j, d;
+ char buf[2048], *b = buf, *e = arrend(buf);
+
+ *b++ = '>';
+ while(*seq.name) {
+ *b++ = *seq.name++;
+ if(b == e) {
+ io.write(wtr, 1, arrlen(buf), buf);
+ b = buf;
+ }
+ }
+
+ for(i=0; i<seq.len; i = j) {
+ j = MIN(i+70, seq.len);
+ d = j - i;
+ if((e-b) <= d+1) {
+ io.write(wtr, 1, b-buf, buf);
+ b = buf;
+ }
+ *b++ = '\n';
+ memcpy(b, seq.s+i, d);
+ b += d;
+ }
+
+ *b++ = '\n';
+ io.write(wtr, 1, b-buf, buf);
+
+ return 0;
+}
+
+error
+bio·writefastq(io·Writer io, void *wtr, bio·Seq seq)
+{
+ panicf("need to implement");
+ return 1;
+}
diff --git a/sys/libbio/newick.c b/sys/libbio/newick.c
index a838278..98d30f2 100644
--- a/sys/libbio/newick.c
+++ b/sys/libbio/newick.c
@@ -282,6 +282,7 @@ parse(struct Parser *p)
node = p->heap.alloc(p->himpl, 1, sizeof(*node));
memset(node, 0, sizeof(*node));
+
node->name = str·make(tok.lit.s);
phylo·addchild(p->root, node);
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
index 1bb5bc7..920102b 100644
--- a/sys/libbio/phylo.c
+++ b/sys/libbio/phylo.c
@@ -107,7 +107,7 @@ phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
{
bio·Node *it;
- for (it = clade->child; it != nil; it = it->sibling) {
+ for(it = clade->child; it != nil; it = it->sibling) {
ctx = phylo·postorder(it, op, ctx);
}
@@ -120,13 +120,26 @@ phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
bio·Node *it;
ctx = op(clade, ctx);
- for (it = clade->child; it != nil; it = it->sibling) {
+ for(it = clade->child; it != nil; it = it->sibling) {
ctx = phylo·preorder(it, op, ctx);
}
return ctx;
}
+int
+phylo·collectpostorder(bio·Node *clade, bio·Node **list)
+{
+ bio·Node *it;
+ int n;
+
+ for(n = 0, it = clade->child; it != nil; it = it->sibling) {
+ n += phylo·collectpostorder(it, list+n);
+ }
+
+ return n;
+}
+
static
inline
void*
@@ -412,18 +425,3 @@ phylo·reroot(bio·Tree *tree, bio·Node *node, double d)
return 0;
}
#undef PREC
-
-// -----------------------------------------------------------------------
-// ancestral inference
-
-struct phylo·InferOpts
-{
- int nstates;
- double *Q;
-};
-
-error
-phylo·inferancestral(bio·Tree *tree, struct phylo·InferOpts opts)
-{
- return 0;
-}
diff --git a/sys/libn/error.c b/sys/libn/error.c
index a9d684c..508136c 100644
--- a/sys/libn/error.c
+++ b/sys/libn/error.c
@@ -17,9 +17,8 @@ errorf(byte* fmt, ...)
va_list args;
va_start(args, fmt);
- printf("error: ");
- vprintf(fmt, args);
- printf("\n");
+ fprintf(stderr, "error: ");
+ vfprintf(stderr, fmt, args);
va_end(args);
}
diff --git a/sys/libn/memory.c b/sys/libn/memory.c
index d7df8e3..1c7ab07 100644
--- a/sys/libn/memory.c
+++ b/sys/libn/memory.c
@@ -36,23 +36,23 @@ mem·Allocator sys·Memory = {
/* Grow to particular size */
void*
-bufgrow(void* buf, vlong newLen, vlong eltsize)
+·bufgrow(void* buf, vlong newLen, vlong eltsize)
{
assert(bufcap(buf) <= (SIZE_MAX - 1) / 2);
vlong newCap = MAX(16, MAX(1 + 2 * bufcap(buf), newLen));
assert(newLen <= newCap);
- assert(newCap <= (SIZE_MAX - offsetof(bufHdr, buf)) / eltsize);
+ assert(newCap <= (SIZE_MAX - offsetof(BufHdr, buf)) / eltsize);
- vlong newSize = offsetof(bufHdr, buf) + newCap * eltsize;
+ vlong newSize = offsetof(BufHdr, buf) + newCap * eltsize;
- bufHdr* newHdr;
+ BufHdr* newHdr;
if (buf) {
- newHdr = _bufHdr(buf);
- newHdr = (bufHdr*)realloc((void*)newHdr, newSize);
+ newHdr = bufhdr(buf);
+ newHdr = (BufHdr*)realloc((void*)newHdr, newSize);
} else {
- newHdr = (bufHdr*)malloc(newSize);
+ newHdr = (BufHdr*)malloc(newSize);
newHdr->len = 0;
}
@@ -62,20 +62,20 @@ bufgrow(void* buf, vlong newLen, vlong eltsize)
/* Pop out a value */
void
-_bufpop(void *buf, int i, vlong eltsize)
+·bufdel(void *buf, int i, vlong eltsize)
{
int n;
byte *b;
byte stk[1024];
assert(eltsize < sizeof(stk));
- b = (byte*) buf;
- if (n = buflen(buf), i < n) {
+ b = (byte*)buf;
+ if(n = buflen(buf), i < n) {
memcpy(stk, b+eltsize*i, eltsize);
memcpy(b+eltsize*i, b+eltsize*(i+1), eltsize*(n-i-1));
memcpy(b+eltsize*(n-1), stk, eltsize);
}
- _bufHdr(buf)->len--;
+ bufhdr(buf)->len--;
}
// -------------------------------------------------------------------------
diff --git a/sys/libn/random.c b/sys/libn/random.c
index 558641e..551d1e9 100644
--- a/sys/libn/random.c
+++ b/sys/libn/random.c
@@ -1,4 +1,5 @@
#include <u.h>
+#include <libn.h>
// ----------------------------------------------------------------------------
// Internal structure
@@ -66,12 +67,29 @@ rng·init(uint64 seed)
/* Returns a random float64 between 0 and 1 */
double
-rng·random()
+rng·random(void)
{
uint64 r = xoshiro256ss(&RNG);
return (double)r / (double)UINT64_MAX;
}
+double
+rng·exponential(double lambda)
+{
+ double f;
+
+ f = rng·random();
+ return -log(1 - f)/lambda;
+}
+
+double
+rng·normal(void)
+{
+ double f;
+ f = rng·random();
+
+}
+
/* Returns true or false on success of trial */
bool
rng·bernoulli(double f)
@@ -88,3 +106,18 @@ rng·randi(int max)
uint64 r = xoshiro256ss(&RNG);
return r % max;
}
+
+uint64
+rng·poisson(double mean)
+{
+ uint64 n;
+ double c;
+
+ if(mean<10.0) {
+ for(n=0, c=rng·exponential(1.0); c<mean; ++n, c+=rng·exponential(1.0))
+ ;
+ return n;
+ }
+
+ return 0;
+}