aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-11 15:57:50 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-11 15:57:50 -0700
commitc796f164ea6856af354b879abefceb342de2f27c (patch)
treea9d0e3f5d06774f96ee0f08f50dbc412f3131151
parente5c6778ca74ac0e8eb5bc09d6e7ddc54e460e951 (diff)
fix: compute position of each block using path class to ensure its accurate
-rw-r--r--pangraph/block.py1
-rw-r--r--pangraph/graph.py31
-rw-r--r--pangraph/sequence.py27
3 files changed, 44 insertions, 15 deletions
diff --git a/pangraph/block.py b/pangraph/block.py
index ad4cd7c..5c649b9 100644
--- a/pangraph/block.py
+++ b/pangraph/block.py
@@ -92,6 +92,7 @@ class Block(object):
for s in nblk.muts:
nblk.muts[s].update({p+offset:c for p,c in b.muts[s].items()})
offset += len(b)
+ nblk.pos = { k:v for k,v in blks[0].pos.items() }
return nblk
diff --git a/pangraph/graph.py b/pangraph/graph.py
index 6b29b1e..c2c3cdb 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -419,14 +419,6 @@ class Graph(object):
ref = old_ref[hit['ref']['start']:hit['ref']['end']]
qry = old_qry[hit['qry']['start']:hit['qry']['end']]
- for tag, item in ref.positions.items():
- blks = self.seqs[tag[0]][item[0]-EXTEND:item[1]+EXTEND]
- if len(blks) >= 3:
- print(ref.positions)
- print(qry.positions)
- print(f"Isolate: {tag[0]} blocks: {','.join(b.id for b in blks)}")
- breakpoint("test positions")
-
if hit["orientation"] == Strand.Minus:
qry = qry.rev_cmpl()
@@ -473,6 +465,29 @@ class Graph(object):
new_blocks.extend(update(old_qry, new_qrys, hit['qry'], hit['orientation']))
self.prune_blks()
+ shared_blks = set()
+ first = True
+ for tag in ref.muts.keys():
+ pos = self.seqs[tag[0]].position_of(new_refs[0], tag[1])
+ blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND]
+ if first:
+ shared_blks = set([b.id for b in blks])
+ first = False
+ else:
+ shared_blks.intersection_update(set([b.id for b in blks]))
+ first = True
+ for tag in qry.muts.keys():
+ pos = self.seqs[tag[0]].position_of(new_qrys[0], tag[1])
+ blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND]
+ if first:
+ shared_blks = set([b.id for b in blks])
+ first = False
+ else:
+ shared_blks.intersection_update(set([b.id for b in blks]))
+
+ print(f"LEN: {len(shared_blks)}")
+ if len(shared_blks) > 20:
+ breakpoint("big number of blocks")
return [b[0] for b in new_blocks]
def extract(self, name, strip_gaps=True, verbose=False):
diff --git a/pangraph/sequence.py b/pangraph/sequence.py
index baff6a2..185dd38 100644
--- a/pangraph/sequence.py
+++ b/pangraph/sequence.py
@@ -88,11 +88,11 @@ class Path(object):
return seq
- def position_of(self, blk):
- for i, n in enumerate(self.nodes):
- if n.blk == blk:
- return i, n.num
- raise ValueError("block not found in path")
+ # def position_of(self, blk):
+ # for i, n in enumerate(self.nodes):
+ # if n.blk == blk:
+ # return i, n.num
+ # raise ValueError("block not found in path")
def rm_nil_blks(self):
good, popped = [], set()
@@ -153,6 +153,17 @@ class Path(object):
self.nodes = new
self.position = np.cumsum([0] + [n.length(self.name) for n in self.nodes])
+ def position_of(self, blk, num):
+ index = [i for i, n in enumerate(self.nodes) if n.blk == blk]
+ if len(index) <= num:
+ # print(f"NODES: {self.nodes}")
+ # print(f"BLOCK: {blk}")
+ # print(f"POSITION: {self.position}")
+ # print(f"INDEX: {index}")
+ # breakpoint("break")
+ return None
+ return (self.position[index[num]], self.position[index[num]+1])
+
def __getitem__(self, index):
if isinstance(index, slice):
beg = index.start or 0
@@ -164,12 +175,14 @@ class Path(object):
beg = 0
if end > self.position[-1]:
if len(self.nodes) > 1:
- r = self[0:end-self.position[-1]]
+ r = self[0:(end-self.position[-1])]
end = self.position[-1]
+ if beg > end:
+ beg, end = end, beg
i = np.searchsorted(self.position, beg, side='right') - 1
j = np.searchsorted(self.position, end, side='left')
- assert i < j, "sorted"
+ assert i < j, f"not sorted, {beg}-{end}"
return l + [n.blk for n in self.nodes[i:j]] + r
elif isinstance(index, int):
i = np.searchsorted(self.position, index, side='right') - 1