From e24f680eeb4aecefec3e06f4e9857657fc9e8d1c Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 10 Sep 2020 14:37:21 -0700 Subject: fix: proper handling of Z2 flipped orientations --- Makefile | 2 +- pangraph/graph.py | 33 ++++++++++++++++++++++++--------- pangraph/sequence.py | 6 ++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index d1a401d..85c1b24 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json + pangraph build -d data/staph -m 500 -b 0 -e 25 -w 1000 data/staph/guide.json # 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log # figures diff --git a/pangraph/graph.py b/pangraph/graph.py index cc0c8e7..96cb721 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -473,14 +473,27 @@ class Graph(object): first = True num_seqs = 0 for tag in shared_blks[0].muts.keys(): - # NOTE: this is a hack to deal with flipped orientations - pos = sorted([self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks], key=lambda x: x[0]) - beg, end = pos[0], pos[-1] - lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window] - rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend] - - lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] - rblks_s = self.seqs[tag[0]][end[1]-window:end[1]] + pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks] + strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks] + if strand[0] == Strand.Plus: + beg, end = pos[0], pos[-1] + + lblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window] + rblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend] + + lblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] + rblks_s = self.seqs[tag[0]][end[1]-window:end[1]] + elif strand[0] == Strand.Minus: + beg, end = pos[0], pos[-1] + + rblks_x = self.seqs[tag[0]][beg[0]-extend:beg[0]+window] + lblks_x = self.seqs[tag[0]][end[1]-window:end[1]+extend] + + rblks_s = self.seqs[tag[0]][beg[0]:beg[0]+window] + lblks_s = self.seqs[tag[0]][end[1]-window:end[1]] + else: + raise ValueError("unrecognized strand polarity") + if first: lblks_set_x = set([b.id for b in lblks_x]) rblks_set_x = set([b.id for b in rblks_x]) @@ -511,7 +524,8 @@ class Graph(object): fd, path = tempfile.mkstemp() with os.fdopen(fd, 'w') as tmp: for i, tag in enumerate(merged_blks[0].muts.keys()): - pos = sorted([self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks], key=lambda x: x[0]) + pos = [self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks] + strand = [self.seqs[tag[0]].orientation_of(b, tag[1]) for b in shared_blks] beg, end = pos[0], pos[-1] if side == 'left': @@ -523,6 +537,7 @@ class Graph(object): iso_blks = self.seqs[tag[0]][left:right] print("POSITIONS", pos) + print("STRAND", strand) print("LIST", lblks_set_x if side == 'left' else rblks_set_x) print("SHARED", lblks_set_s if side == 'left' else rblks_set_s) print("ISO", iso_blks) diff --git a/pangraph/sequence.py b/pangraph/sequence.py index 3660b71..98170b4 100644 --- a/pangraph/sequence.py +++ b/pangraph/sequence.py @@ -155,6 +155,12 @@ class Path(object): return None return (self.position[index[num]], self.position[index[num]+1]) + def orientation_of(self, blk, num): + orientation = { n.num:n.strand for i, n in enumerate(self.nodes) if n.blk == blk } + if not num in orientation: + return None + return orientation[num] + # TODO: pull out common functionality into a helper function # TODO: merge with other sequence function def sequence_range(self, start=None, stop=None): -- cgit v1.2.1