aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-09-10 15:04:40 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-09-10 15:04:40 -0700
commit02d03656460253609daf2883de5518582b8b7af7 (patch)
tree8f2be999fad9c78ae19cfa8302653f6b2b9019c4
parente24f680eeb4aecefec3e06f4e9857657fc9e8d1c (diff)
fix: properly deal with precondition for edge repair. now only if block lists show diversity
-rw-r--r--Makefile2
-rw-r--r--pangraph/block.py2
-rw-r--r--pangraph/graph.py68
-rw-r--r--pangraph/sequence.py2
4 files changed, 45 insertions, 29 deletions
diff --git a/Makefile b/Makefile
index 85c1b24..d1a401d 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 -e 25 -w 1000 data/staph/guide.json
+ pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json
# 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log
# figures
diff --git a/pangraph/block.py b/pangraph/block.py
index 1bc501d..92f9107 100644
--- a/pangraph/block.py
+++ b/pangraph/block.py
@@ -146,8 +146,8 @@ class Block(object):
qryblks = [nb for i, nb in enumerate(newblks) if qrys[i] is not None]
if aln['orientation'] == -1:
qryblks = qryblks[::-1]
- refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None]
+ refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None]
sharedblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None and qrys[i] is not None]
return newblks, qryblks, refblks, sharedblks, isomap
diff --git a/pangraph/graph.py b/pangraph/graph.py
index 96cb721..096d62a 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -475,22 +475,24 @@ class Graph(object):
for tag in shared_blks[0].muts.keys():
pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks]
strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks]
+ beg, end = pos[0], pos[-1]
if strand[0] == Strand.Plus:
- beg, end = pos[0], pos[-1]
+ lwindow = min(window, shared_blks[0].len_of(*tag))
+ rwindow = min(window, shared_blks[-1].len_of(*tag))
- lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window]
- rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend]
+ lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+lwindow]
+ rblks_x = self.seqs[tag[0]][end[1]-rwindow:end[1]+extend]
- lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
- rblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
+ # lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
+ # rblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
elif strand[0] == Strand.Minus:
- beg, end = pos[0], pos[-1]
+ lwindow = min(window, shared_blks[-1].len_of(*tag))
+ rwindow = min(window, shared_blks[0].len_of(*tag))
+ rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+rwindow]
+ lblks_x = self.seqs[tag[0]][end[1]-lwindow:end[1]+extend]
- rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window]
- lblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend]
-
- rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
- lblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
+ # rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
+ # lblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
else:
raise ValueError("unrecognized strand polarity")
@@ -498,23 +500,28 @@ class Graph(object):
lblks_set_x = set([b.id for b in lblks_x])
rblks_set_x = set([b.id for b in rblks_x])
- lblks_set_s = set([b.id for b in lblks_s])
- rblks_set_s = set([b.id for b in rblks_s])
+ # lblks_set_s = set([b.id for b in lblks_s])
+ # rblks_set_s = set([b.id for b in rblks_s])
+
+ lblks_set_s = set([b.id for b in lblks_x])
+ rblks_set_s = set([b.id for b in rblks_x])
first = False
else:
lblks_set_x.intersection_update(set([b.id for b in lblks_x]))
rblks_set_x.intersection_update(set([b.id for b in rblks_x]))
- lblks_set_s.intersection_update(set([b.id for b in lblks_s]))
- rblks_set_s.intersection_update(set([b.id for b in rblks_s]))
+ lblks_set_s.update(set([b.id for b in lblks_x]))
+ rblks_set_s.update(set([b.id for b in rblks_x]))
+ # lblks_set_s.intersection_update(set([b.id for b in lblks_s]))
+ # rblks_set_s.intersection_update(set([b.id for b in rblks_s]))
num_seqs += 1
def emit(side):
if side == 'left':
- delta = len(lblks_set_x)-len(lblks_set_s)
+ delta = len(lblks_set_s)-len(lblks_set_x)
elif side == 'right':
- delta = len(rblks_set_x)-len(rblks_set_s)
+ delta = len(lblks_set_s)-len(rblks_set_x)
else:
raise ValueError(f"unrecognized argument '{side}' for side")
@@ -528,18 +535,29 @@ class Graph(object):
strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks]
beg, end = pos[0], pos[-1]
- if side == 'left':
- left, right = beg[0]-extend,beg[0]+window
- elif side == 'right':
- left, right = end[1]-window,end[1]+extend
- else:
- raise ValueError(f"unrecognized argument '{side}' for side")
+ if strand[0] == Strand.Plus:
+ if side == 'left':
+ left, right = beg[0]-extend,beg[0]+min(window,shared_blks[0].len_of(*tag))
+ elif side == 'right':
+ left, right = end[1]-min(window,shared_blks[-1].len_of(*tag)),end[1]+extend
+ else:
+ raise ValueError(f"unrecognized argument '{side}' for side")
+
+ elif strand[0] == Strand.Minus:
+ if side == 'left':
+ left, right = end[1]-min(window,shared_blks[-1].len_of(*tag)),end[1]+extend
+ elif side == 'right':
+ left, right = beg[0]-extend,beg[0]+min(window, shared_blks[0].len_of(*tag))
+ else:
+ raise ValueError(f"unrecognized argument '{side}' for side")
iso_blks = self.seqs[tag[0]][left:right]
print("POSITIONS", pos)
print("STRAND", strand)
- print("LIST", lblks_set_x if side == 'left' else rblks_set_x)
- print("SHARED", lblks_set_s if side == 'left' else rblks_set_s)
+ print("LIST", shared_blks)
+ print("MERGED", merged_blks)
+ print("INTERSECTION", lblks_set_x if side == 'left' else rblks_set_x)
+ print("UNION", lblks_set_s if side == 'left' else rblks_set_s)
print("ISO", iso_blks)
breakpoint("stop")
tmp.write(f">isolate_{i:04d} {','.join(b.id for b in iso_blks)}\n")
diff --git a/pangraph/sequence.py b/pangraph/sequence.py
index 98170b4..db2b3e6 100644
--- a/pangraph/sequence.py
+++ b/pangraph/sequence.py
@@ -167,8 +167,6 @@ class Path(object):
beg = start or 0
end = stop or self.position[-1]
l, r = "", ""
- # if start == 159 and stop == 3659:
- # breakpoint("test")
if beg < 0:
if len(self.nodes) > 1:
l = self.sequence_range(self.position[-1]+beg,self.position[-1])