aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-11 11:34:33 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-11 11:34:33 -0700
commiteddc70e299ca2822680f8b4ac9f277d88af002e5 (patch)
treea2d9d021d6d4eee43ff04997f5815d795eb10720
parente1fe2c46651bc53b5ab0006cca07d446b357c79a (diff)
feat: blocks now store their position (not modulo length) along the path
-rw-r--r--pangraph/block.py9
-rw-r--r--pangraph/graph.py18
2 files changed, 14 insertions, 13 deletions
diff --git a/pangraph/block.py b/pangraph/block.py
index 05b7806..e3953bd 100644
--- a/pangraph/block.py
+++ b/pangraph/block.py
@@ -26,6 +26,7 @@ class Block(object):
super(Block, self).__init__()
self.id = randomid() if gen else 0
self.seq = None
+ self.pos = {}
self.muts = {}
def __str__(self):
@@ -49,6 +50,10 @@ class Block(object):
def isolates(self):
return dict(Counter([k[0] for k in self.muts]))
+ @property
+ def positions(self):
+ return { tag:(pos, pos+self.len_of(*tag)) for tag, pos in self.pos.items() }
+
# ------------------
# static methods
@@ -56,6 +61,7 @@ class Block(object):
def from_seq(cls, name, seq):
new_blk = cls()
new_blk.seq = as_array(seq)
+ new_blk.pos = {(name, 0): 0}
new_blk.muts = {(name, 0):{}}
return new_blk
@@ -69,6 +75,7 @@ class Block(object):
B = Block()
B.id = d['id']
B.seq = as_array(d['seq'])
+ B.pos = d['pos']
B.muts = {unpack(k):v for k, v in d['muts'].items()}
return B
@@ -222,6 +229,7 @@ class Block(object):
return {'id' : self.id,
'seq' : "".join(str(n) for n in self.seq),
+ 'pos' : self.pos,
'muts' : {pack(k) : fix(v) for k, v in self.muts.items()}}
def __len__(self):
@@ -233,6 +241,7 @@ class Block(object):
start = val.start or 0
stop = val.stop or len(self.seq)
b.seq = self.seq[start:stop]
+ b.pos = { iso : start+val.start for iso,start in self.pos.items() }
for s, _ in self.muts.items():
b.muts[s] = {p-start:c for p,c in self.muts[s].items() if p>=start and p<stop}
return b
diff --git a/pangraph/graph.py b/pangraph/graph.py
index bc1e9d9..70fd102 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -319,19 +319,11 @@ class Graph(object):
or not accepted(hit):
continue
- merged = True
- new_blks = self.merge(proc(hit))
+ merged = True
+ self.merge(proc(hit))
merged_blks.add(hit['ref']['name'])
merged_blks.add(hit['qry']['name'])
- # for blk in new_blks:
- # for iso in blk.isolates:
- # path = self.seqs[iso]
- # x, n = path.position_of(blk)
- # lb, ub = max(0, x-EXTEND), min(x+blk.len_of(iso, n)+EXTEND, len(path))
- # subpath = path[lb:ub]
- # print(subpath, file=sys.stderr)
- # breakpoint("stop")
self.remove_transitives()
for path in self.seqs.values():
@@ -407,9 +399,6 @@ class Graph(object):
# TODO: check that isos is constant along the chain
for iso in self.blks[c[0][0]].isolates.keys():
self.seqs[iso].merge(c[0], c[-1], new_blk)
- # for n in self.seqs[iso].nodes:
- # if n.blk.id in [e[0] for e in c]:
- # breakpoint("bad deletion")
self.blks[new_blk.id] = new_blk
for b, _ in c:
@@ -429,6 +418,9 @@ class Graph(object):
# This is why in from_aln(aln) we set the start index to 0
ref = old_ref[hit['ref']['start']:hit['ref']['end']]
qry = old_qry[hit['qry']['start']:hit['qry']['end']]
+ print(ref.positions)
+ print(qry.positions)
+ breakpoint("test positions")
if hit["orientation"] == Strand.Minus:
qry = qry.rev_cmpl()