From 02d03656460253609daf2883de5518582b8b7af7 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 10 Sep 2020 15:04:40 -0700 Subject: fix: properly deal with precondition for edge repair. now only if block lists show diversity --- Makefile | 2 +- pangraph/block.py | 2 +- pangraph/graph.py | 68 +++++++++++++++++++++++++++++++++------------------- pangraph/sequence.py | 2 -- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 85c1b24..d1a401d 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 -e 25 -w 1000 data/staph/guide.json + pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json # 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log # figures diff --git a/pangraph/block.py b/pangraph/block.py index 1bc501d..92f9107 100644 --- a/pangraph/block.py +++ b/pangraph/block.py @@ -146,8 +146,8 @@ class Block(object): qryblks = [nb for i, nb in enumerate(newblks) if qrys[i] is not None] if aln['orientation'] == -1: qryblks = qryblks[::-1] - refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None] + refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None] sharedblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None and qrys[i] is not None] return newblks, qryblks, refblks, sharedblks, isomap diff --git a/pangraph/graph.py b/pangraph/graph.py index 96cb721..096d62a 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -475,22 +475,24 @@ class Graph(object): for tag in shared_blks[0].muts.keys(): pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks] strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks] + beg, end = pos[0], pos[-1] if strand[0] == Strand.Plus: - beg, end = pos[0], pos[-1] + lwindow = min(window, shared_blks[0].len_of(*tag)) + rwindow = min(window, shared_blks[-1].len_of(*tag)) - lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window] - rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend] + lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+lwindow] + rblks_x = self.seqs[tag[0]][end[1]-rwindow:end[1]+extend] - lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] - rblks_s = self.seqs[tag[0]][end[1]-window:end[1]] + # lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] + # rblks_s = self.seqs[tag[0]][end[1]-window:end[1]] elif strand[0] == Strand.Minus: - beg, end = pos[0], pos[-1] + lwindow = min(window, shared_blks[-1].len_of(*tag)) + rwindow = min(window, shared_blks[0].len_of(*tag)) + rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+rwindow] + lblks_x = self.seqs[tag[0]][end[1]-lwindow:end[1]+extend] - rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window] - lblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend] - - rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] - lblks_s = self.seqs[tag[0]][end[1]-window:end[1]] + # rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] + # lblks_s = self.seqs[tag[0]][end[1]-window:end[1]] else: raise ValueError("unrecognized strand polarity") @@ -498,23 +500,28 @@ class Graph(object): lblks_set_x = set([b.id for b in lblks_x]) rblks_set_x = set([b.id for b in rblks_x]) - lblks_set_s = set([b.id for b in lblks_s]) - rblks_set_s = set([b.id for b in rblks_s]) + # lblks_set_s = set([b.id for b in lblks_s]) + # rblks_set_s = set([b.id for b in rblks_s]) + + lblks_set_s = set([b.id for b in lblks_x]) + rblks_set_s = set([b.id for b in rblks_x]) first = False else: lblks_set_x.intersection_update(set([b.id for b in lblks_x])) rblks_set_x.intersection_update(set([b.id for b in rblks_x])) - lblks_set_s.intersection_update(set([b.id for b in lblks_s])) - rblks_set_s.intersection_update(set([b.id for b in rblks_s])) + lblks_set_s.update(set([b.id for b in lblks_x])) + rblks_set_s.update(set([b.id for b in rblks_x])) + # lblks_set_s.intersection_update(set([b.id for b in lblks_s])) + # rblks_set_s.intersection_update(set([b.id for b in rblks_s])) num_seqs += 1 def emit(side): if side == 'left': - delta = len(lblks_set_x)-len(lblks_set_s) + delta = len(lblks_set_s)-len(lblks_set_x) elif side == 'right': - delta = len(rblks_set_x)-len(rblks_set_s) + delta = len(lblks_set_s)-len(rblks_set_x) else: raise ValueError(f"unrecognized argument '{side}' for side") @@ -528,18 +535,29 @@ class Graph(object): strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks] beg, end = pos[0], pos[-1] - if side == 'left': - left, right = beg[0]-extend,beg[0]+window - elif side == 'right': - left, right = end[1]-window,end[1]+extend - else: - raise ValueError(f"unrecognized argument '{side}' for side") + if strand[0] == Strand.Plus: + if side == 'left': + left, right = beg[0]-extend,beg[0]+min(window,shared_blks[0].len_of(*tag)) + elif side == 'right': + left, right = end[1]-min(window,shared_blks[-1].len_of(*tag)),end[1]+extend + else: + raise ValueError(f"unrecognized argument '{side}' for side") + + elif strand[0] == Strand.Minus: + if side == 'left': + left, right = end[1]-min(window,shared_blks[-1].len_of(*tag)),end[1]+extend + elif side == 'right': + left, right = beg[0]-extend,beg[0]+min(window, shared_blks[0].len_of(*tag)) + else: + raise ValueError(f"unrecognized argument '{side}' for side") iso_blks = self.seqs[tag[0]][left:right] print("POSITIONS", pos) print("STRAND", strand) - print("LIST", lblks_set_x if side == 'left' else rblks_set_x) - print("SHARED", lblks_set_s if side == 'left' else rblks_set_s) + print("LIST", shared_blks) + print("MERGED", merged_blks) + print("INTERSECTION", lblks_set_x if side == 'left' else rblks_set_x) + print("UNION", lblks_set_s if side == 'left' else rblks_set_s) print("ISO", iso_blks) breakpoint("stop") tmp.write(f">isolate_{i:04d} {','.join(b.id for b in iso_blks)}\n") diff --git a/pangraph/sequence.py b/pangraph/sequence.py index 98170b4..db2b3e6 100644 --- a/pangraph/sequence.py +++ b/pangraph/sequence.py @@ -167,8 +167,6 @@ class Path(object): beg = start or 0 end = stop or self.position[-1] l, r = "", "" - # if start == 159 and stop == 3659: - # breakpoint("test") if beg < 0: if len(self.nodes) > 1: l = self.sequence_range(self.position[-1]+beg,self.position[-1]) -- cgit v1.2.1