aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-07-28 16:39:07 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-07-28 16:39:07 -0700
commite1fe2c46651bc53b5ab0006cca07d446b357c79a (patch)
treeaa5c91c61edb39d67d1bbbb27c992a66b6ad1681
parent8e60f8b5688404fc75d91a6716494782a50b93a8 (diff)
fix: must strip gaps upon sequence extraction to have offset correct
-rw-r--r--pangraph/graph.py6
-rw-r--r--pangraph/sequence.py6
-rw-r--r--pangraph/tree.py1
3 files changed, 7 insertions, 6 deletions
diff --git a/pangraph/graph.py b/pangraph/graph.py
index 58cb4cc..bc1e9d9 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -407,9 +407,9 @@ class Graph(object):
# TODO: check that isos is constant along the chain
for iso in self.blks[c[0][0]].isolates.keys():
self.seqs[iso].merge(c[0], c[-1], new_blk)
- for n in self.seqs[iso].nodes:
- if n.blk.id in [e[0] for e in c]:
- breakpoint("bad deletion")
+ # for n in self.seqs[iso].nodes:
+ # if n.blk.id in [e[0] for e in c]:
+ # breakpoint("bad deletion")
self.blks[new_blk.id] = new_blk
for b, _ in c:
diff --git a/pangraph/sequence.py b/pangraph/sequence.py
index 9374335..b2b5ce7 100644
--- a/pangraph/sequence.py
+++ b/pangraph/sequence.py
@@ -77,7 +77,7 @@ class Path(object):
def sequence(self, verbose=False):
seq = ""
for n in self.nodes:
- s = n.blk.extract(self.name, n.num, strip_gaps=False, verbose=verbose)
+ s = n.blk.extract(self.name, n.num, strip_gaps=True, verbose=verbose)
if n.strand == Strand.Plus:
seq += s
else:
@@ -120,8 +120,6 @@ class Path(object):
ids = [n.blk.id for n in self.nodes]
try:
i, j = ids.index(start[0]), ids.index(stop[0])
- except:
- return
if self.nodes[i].strand == start[1]:
beg, end, s = i, j, Strand.Plus
@@ -136,6 +134,8 @@ class Path(object):
self.position = np.cumsum([0] + [n.length(self.name) for n in self.nodes])
N += 1
+ except:
+ return
def replace(self, blk, tag, new_blks, blk_map):
new = []
diff --git a/pangraph/tree.py b/pangraph/tree.py
index ba03694..605bac3 100644
--- a/pangraph/tree.py
+++ b/pangraph/tree.py
@@ -304,6 +304,7 @@ class Tree(object):
rec = G.extract(n.name)
uncompressed_length += len(orig)
if orig != rec:
+ breakpoint("inconsistency")
nerror += 1
with open("test.fa", "w+") as out: