diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 11:55:01 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 11:55:01 -0700 |
commit | 6fed97e86a7e07713884efc9c485b2b7c2b66b68 (patch) | |
tree | 9ba58f1436c60bd760248891d5d107fa765846c2 | |
parent | da2f92077e0d773b065cea1b576d9f5171cd0091 (diff) |
fix: correctly deal with block offsets when slicing for sequences
-rw-r--r-- | pangraph/graph.py | 6 | ||||
-rw-r--r-- | pangraph/sequence.py | 35 |
2 files changed, 36 insertions, 5 deletions
diff --git a/pangraph/graph.py b/pangraph/graph.py index a38a4e3..3395891 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -504,13 +504,11 @@ class Graph(object): tmp.write(f">isolate_{i:04d}\n") if left == right: breakpoint("no difference") - blks = self.seqs[tag[0]][left:right] - for b in blks: - tmp.write(b.extract(*tag)) + tmp.write(self.seqs[tag[0]].sequence_range(left,right)) tmp.write('\n') tmp.flush() print(f"aligning {num_seqs} seqs") - proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree", + proc = subprocess.Popen(f"mafft --auto {path} | fasttree", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) diff --git a/pangraph/sequence.py b/pangraph/sequence.py index c437602..62bfd32 100644 --- a/pangraph/sequence.py +++ b/pangraph/sequence.py @@ -155,6 +155,39 @@ class Path(object): return None return (self.position[index[num]], self.position[index[num]+1]) + # TODO: pull out common functionality into a helper function + # TODO: merge with other sequence function + def sequence_range(self, start=None, stop=None): + beg = start or 0 + end = stop or self.position[-1] + l, r = "", "" + if beg < 0: + if len(self.nodes) > 1: + l = self.sequence_range(self.position[-1]+beg,self.position[-1]) + beg = 0 + if end > self.position[-1]: + if len(self.nodes) > 1: + r = self.sequence_range(0,end-self.position[-1]) + end = self.position[-1] + if beg > end: + beg, end = end, beg + + i = np.searchsorted(self.position, beg, side='right') - 1 + j = np.searchsorted(self.position, end, side='left') + m = "" + if i < j: + if beg < self.position[i]: + breakpoint("bad start search") + if end > self.position[j]: + breakpoint("bad end search") + m = self.nodes[i].blk.extract(self.name, self.nodes[i].num)[(beg-self.position[i]):] + for n in self.nodes[i+1:j-1]: + m += n.blk.extract(self.name, n.num) + if j < len(self.nodes): + b = self.nodes[j].blk.extract(self.name, self.nodes[j].num) + m += b[0:(len(b)+self.position[j]-end)] + return l + m + r + def __getitem__(self, index): if isinstance(index, slice): beg = index.start or 0 @@ -162,7 +195,7 @@ class Path(object): l, r = [], [] if beg < 0: if len(self.nodes) > 1: - l = self[self.position[-1] + beg:self.position[-1]] + l = self[(self.position[-1]+beg):self.position[-1]] beg = 0 if end > self.position[-1]: if len(self.nodes) > 1: |