aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-09-10 14:37:21 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-09-10 14:37:21 -0700
commite24f680eeb4aecefec3e06f4e9857657fc9e8d1c (patch)
tree7d37bd186460255c7afecab2166ddc660a62c86f
parentd1712b5a2bbeb61c0660c57e76ccb0e62230edee (diff)
fix: proper handling of Z2 flipped orientations
-rw-r--r--Makefile2
-rw-r--r--pangraph/graph.py33
-rw-r--r--pangraph/sequence.py6
3 files changed, 31 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index d1a401d..85c1b24 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json
+ pangraph build -d data/staph -m 500 -b 0 -e 25 -w 1000 data/staph/guide.json
# 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log
# figures
diff --git a/pangraph/graph.py b/pangraph/graph.py
index cc0c8e7..96cb721 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -473,14 +473,27 @@ class Graph(object):
first = True
num_seqs = 0
for tag in shared_blks[0].muts.keys():
- # NOTE: this is a hack to deal with flipped orientations
- pos = sorted([self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks], key=lambda x: x[0])
- beg, end = pos[0], pos[-1]
- lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window]
- rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend]
-
- lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
- rblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
+ pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks]
+ strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks]
+ if strand[0] == Strand.Plus:
+ beg, end = pos[0], pos[-1]
+
+ lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window]
+ rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend]
+
+ lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
+ rblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
+ elif strand[0] == Strand.Minus:
+ beg, end = pos[0], pos[-1]
+
+ rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window]
+ lblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend]
+
+ rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window]
+ lblks_s = self.seqs[tag[0]][end[1]-window:end[1]]
+ else:
+ raise ValueError("unrecognized strand polarity")
+
if first:
lblks_set_x = set([b.id for b in lblks_x])
rblks_set_x = set([b.id for b in rblks_x])
@@ -511,7 +524,8 @@ class Graph(object):
fd, path = tempfile.mkstemp()
with os.fdopen(fd, 'w') as tmp:
for i, tag in enumerate(merged_blks[0].muts.keys()):
- pos = sorted([self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks], key=lambda x: x[0])
+ pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks]
+ strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks]
beg, end = pos[0], pos[-1]
if side == 'left':
@@ -523,6 +537,7 @@ class Graph(object):
iso_blks = self.seqs[tag[0]][left:right]
print("POSITIONS", pos)
+ print("STRAND", strand)
print("LIST", lblks_set_x if side == 'left' else rblks_set_x)
print("SHARED", lblks_set_s if side == 'left' else rblks_set_s)
print("ISO", iso_blks)
diff --git a/pangraph/sequence.py b/pangraph/sequence.py
index 3660b71..98170b4 100644
--- a/pangraph/sequence.py
+++ b/pangraph/sequence.py
@@ -155,6 +155,12 @@ class Path(object):
return None
return (self.position[index[num]], self.position[index[num]+1])
+ def orientation_of(self, blk, num):
+ orientation = { n.num:n.strand for i, n in enumerate(self.nodes) if n.blk == blk }
+ if not num in orientation:
+ return None
+ return orientation[num]
+
# TODO: pull out common functionality into a helper function
# TODO: merge with other sequence function
def sequence_range(self, start=None, stop=None):