aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-18 11:55:01 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-18 11:55:01 -0700
commit6fed97e86a7e07713884efc9c485b2b7c2b66b68 (patch)
tree9ba58f1436c60bd760248891d5d107fa765846c2
parentda2f92077e0d773b065cea1b576d9f5171cd0091 (diff)
fix: correctly deal with block offsets when slicing for sequences
-rw-r--r--pangraph/graph.py6
-rw-r--r--pangraph/sequence.py35
2 files changed, 36 insertions, 5 deletions
diff --git a/pangraph/graph.py b/pangraph/graph.py
index a38a4e3..3395891 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -504,13 +504,11 @@ class Graph(object):
tmp.write(f">isolate_{i:04d}\n")
if left == right:
breakpoint("no difference")
- blks = self.seqs[tag[0]][left:right]
- for b in blks:
- tmp.write(b.extract(*tag))
+ tmp.write(self.seqs[tag[0]].sequence_range(left,right))
tmp.write('\n')
tmp.flush()
print(f"aligning {num_seqs} seqs")
- proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree",
+ proc = subprocess.Popen(f"mafft --auto {path} | fasttree",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
diff --git a/pangraph/sequence.py b/pangraph/sequence.py
index c437602..62bfd32 100644
--- a/pangraph/sequence.py
+++ b/pangraph/sequence.py
@@ -155,6 +155,39 @@ class Path(object):
return None
return (self.position[index[num]], self.position[index[num]+1])
+ # TODO: pull out common functionality into a helper function
+ # TODO: merge with other sequence function
+ def sequence_range(self, start=None, stop=None):
+ beg = start or 0
+ end = stop or self.position[-1]
+ l, r = "", ""
+ if beg < 0:
+ if len(self.nodes) > 1:
+ l = self.sequence_range(self.position[-1]+beg,self.position[-1])
+ beg = 0
+ if end > self.position[-1]:
+ if len(self.nodes) > 1:
+ r = self.sequence_range(0,end-self.position[-1])
+ end = self.position[-1]
+ if beg > end:
+ beg, end = end, beg
+
+ i = np.searchsorted(self.position, beg, side='right') - 1
+ j = np.searchsorted(self.position, end, side='left')
+ m = ""
+ if i < j:
+ if beg < self.position[i]:
+ breakpoint("bad start search")
+ if end > self.position[j]:
+ breakpoint("bad end search")
+ m = self.nodes[i].blk.extract(self.name, self.nodes[i].num)[(beg-self.position[i]):]
+ for n in self.nodes[i+1:j-1]:
+ m += n.blk.extract(self.name, n.num)
+ if j < len(self.nodes):
+ b = self.nodes[j].blk.extract(self.name, self.nodes[j].num)
+ m += b[0:(len(b)+self.position[j]-end)]
+ return l + m + r
+
def __getitem__(self, index):
if isinstance(index, slice):
beg = index.start or 0
@@ -162,7 +195,7 @@ class Path(object):
l, r = [], []
if beg < 0:
if len(self.nodes) > 1:
- l = self[self.position[-1] + beg:self.position[-1]]
+ l = self[(self.position[-1]+beg):self.position[-1]]
beg = 0
if end > self.position[-1]:
if len(self.nodes) > 1: