diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-11 11:34:33 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-11 11:34:33 -0700 |
commit | eddc70e299ca2822680f8b4ac9f277d88af002e5 (patch) | |
tree | a2d9d021d6d4eee43ff04997f5815d795eb10720 | |
parent | e1fe2c46651bc53b5ab0006cca07d446b357c79a (diff) |
feat: blocks now store their position (not modulo length) along the path
-rw-r--r-- | pangraph/block.py | 9 | ||||
-rw-r--r-- | pangraph/graph.py | 18 |
2 files changed, 14 insertions, 13 deletions
diff --git a/pangraph/block.py b/pangraph/block.py index 05b7806..e3953bd 100644 --- a/pangraph/block.py +++ b/pangraph/block.py @@ -26,6 +26,7 @@ class Block(object): super(Block, self).__init__() self.id = randomid() if gen else 0 self.seq = None + self.pos = {} self.muts = {} def __str__(self): @@ -49,6 +50,10 @@ class Block(object): def isolates(self): return dict(Counter([k[0] for k in self.muts])) + @property + def positions(self): + return { tag:(pos, pos+self.len_of(*tag)) for tag, pos in self.pos.items() } + # ------------------ # static methods @@ -56,6 +61,7 @@ class Block(object): def from_seq(cls, name, seq): new_blk = cls() new_blk.seq = as_array(seq) + new_blk.pos = {(name, 0): 0} new_blk.muts = {(name, 0):{}} return new_blk @@ -69,6 +75,7 @@ class Block(object): B = Block() B.id = d['id'] B.seq = as_array(d['seq']) + B.pos = d['pos'] B.muts = {unpack(k):v for k, v in d['muts'].items()} return B @@ -222,6 +229,7 @@ class Block(object): return {'id' : self.id, 'seq' : "".join(str(n) for n in self.seq), + 'pos' : self.pos, 'muts' : {pack(k) : fix(v) for k, v in self.muts.items()}} def __len__(self): @@ -233,6 +241,7 @@ class Block(object): start = val.start or 0 stop = val.stop or len(self.seq) b.seq = self.seq[start:stop] + b.pos = { iso : start+val.start for iso,start in self.pos.items() } for s, _ in self.muts.items(): b.muts[s] = {p-start:c for p,c in self.muts[s].items() if p>=start and p<stop} return b diff --git a/pangraph/graph.py b/pangraph/graph.py index bc1e9d9..70fd102 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -319,19 +319,11 @@ class Graph(object): or not accepted(hit): continue - merged = True - new_blks = self.merge(proc(hit)) + merged = True + self.merge(proc(hit)) merged_blks.add(hit['ref']['name']) merged_blks.add(hit['qry']['name']) - # for blk in new_blks: - # for iso in blk.isolates: - # path = self.seqs[iso] - # x, n = path.position_of(blk) - # lb, ub = max(0, x-EXTEND), min(x+blk.len_of(iso, n)+EXTEND, len(path)) - # subpath = path[lb:ub] - # print(subpath, file=sys.stderr) - # breakpoint("stop") self.remove_transitives() for path in self.seqs.values(): @@ -407,9 +399,6 @@ class Graph(object): # TODO: check that isos is constant along the chain for iso in self.blks[c[0][0]].isolates.keys(): self.seqs[iso].merge(c[0], c[-1], new_blk) - # for n in self.seqs[iso].nodes: - # if n.blk.id in [e[0] for e in c]: - # breakpoint("bad deletion") self.blks[new_blk.id] = new_blk for b, _ in c: @@ -429,6 +418,9 @@ class Graph(object): # This is why in from_aln(aln) we set the start index to 0 ref = old_ref[hit['ref']['start']:hit['ref']['end']] qry = old_qry[hit['qry']['start']:hit['qry']['end']] + print(ref.positions) + print(qry.positions) + breakpoint("test positions") if hit["orientation"] == Strand.Minus: qry = qry.rev_cmpl() |