diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-11 15:57:50 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-11 15:57:50 -0700 |
commit | c796f164ea6856af354b879abefceb342de2f27c (patch) | |
tree | a9d0e3f5d06774f96ee0f08f50dbc412f3131151 | |
parent | e5c6778ca74ac0e8eb5bc09d6e7ddc54e460e951 (diff) |
fix: compute position of each block using path class to ensure its accurate
-rw-r--r-- | pangraph/block.py | 1 | ||||
-rw-r--r-- | pangraph/graph.py | 31 | ||||
-rw-r--r-- | pangraph/sequence.py | 27 |
3 files changed, 44 insertions, 15 deletions
diff --git a/pangraph/block.py b/pangraph/block.py index ad4cd7c..5c649b9 100644 --- a/pangraph/block.py +++ b/pangraph/block.py @@ -92,6 +92,7 @@ class Block(object): for s in nblk.muts: nblk.muts[s].update({p+offset:c for p,c in b.muts[s].items()}) offset += len(b) + nblk.pos = { k:v for k,v in blks[0].pos.items() } return nblk diff --git a/pangraph/graph.py b/pangraph/graph.py index 6b29b1e..c2c3cdb 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -419,14 +419,6 @@ class Graph(object): ref = old_ref[hit['ref']['start']:hit['ref']['end']] qry = old_qry[hit['qry']['start']:hit['qry']['end']] - for tag, item in ref.positions.items(): - blks = self.seqs[tag[0]][item[0]-EXTEND:item[1]+EXTEND] - if len(blks) >= 3: - print(ref.positions) - print(qry.positions) - print(f"Isolate: {tag[0]} blocks: {','.join(b.id for b in blks)}") - breakpoint("test positions") - if hit["orientation"] == Strand.Minus: qry = qry.rev_cmpl() @@ -473,6 +465,29 @@ class Graph(object): new_blocks.extend(update(old_qry, new_qrys, hit['qry'], hit['orientation'])) self.prune_blks() + shared_blks = set() + first = True + for tag in ref.muts.keys(): + pos = self.seqs[tag[0]].position_of(new_refs[0], tag[1]) + blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND] + if first: + shared_blks = set([b.id for b in blks]) + first = False + else: + shared_blks.intersection_update(set([b.id for b in blks])) + first = True + for tag in qry.muts.keys(): + pos = self.seqs[tag[0]].position_of(new_qrys[0], tag[1]) + blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND] + if first: + shared_blks = set([b.id for b in blks]) + first = False + else: + shared_blks.intersection_update(set([b.id for b in blks])) + + print(f"LEN: {len(shared_blks)}") + if len(shared_blks) > 20: + breakpoint("big number of blocks") return [b[0] for b in new_blocks] def extract(self, name, strip_gaps=True, verbose=False): diff --git a/pangraph/sequence.py b/pangraph/sequence.py index baff6a2..185dd38 100644 --- a/pangraph/sequence.py +++ b/pangraph/sequence.py @@ -88,11 +88,11 @@ class Path(object): return seq - def position_of(self, blk): - for i, n in enumerate(self.nodes): - if n.blk == blk: - return i, n.num - raise ValueError("block not found in path") + # def position_of(self, blk): + # for i, n in enumerate(self.nodes): + # if n.blk == blk: + # return i, n.num + # raise ValueError("block not found in path") def rm_nil_blks(self): good, popped = [], set() @@ -153,6 +153,17 @@ class Path(object): self.nodes = new self.position = np.cumsum([0] + [n.length(self.name) for n in self.nodes]) + def position_of(self, blk, num): + index = [i for i, n in enumerate(self.nodes) if n.blk == blk] + if len(index) <= num: + # print(f"NODES: {self.nodes}") + # print(f"BLOCK: {blk}") + # print(f"POSITION: {self.position}") + # print(f"INDEX: {index}") + # breakpoint("break") + return None + return (self.position[index[num]], self.position[index[num]+1]) + def __getitem__(self, index): if isinstance(index, slice): beg = index.start or 0 @@ -164,12 +175,14 @@ class Path(object): beg = 0 if end > self.position[-1]: if len(self.nodes) > 1: - r = self[0:end-self.position[-1]] + r = self[0:(end-self.position[-1])] end = self.position[-1] + if beg > end: + beg, end = end, beg i = np.searchsorted(self.position, beg, side='right') - 1 j = np.searchsorted(self.position, end, side='left') - assert i < j, "sorted" + assert i < j, f"not sorted, {beg}-{end}" return l + [n.blk for n in self.nodes[i:j]] + r elif isinstance(index, int): i = np.searchsorted(self.position, index, side='right') - 1 |