diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 15:42:54 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 15:42:54 -0700 |
commit | 9fa0dc81e1a6162736fbefe7ffffde79f9f808ce (patch) | |
tree | 1a8d011752b7653b902178f4926a26ed5f6fce0d | |
parent | 66bd4dc85b165756f4f1924ff953813c899a9782 (diff) |
fix: added left and right extension back in
-rw-r--r-- | pangraph/graph.py | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/pangraph/graph.py b/pangraph/graph.py index 89ebadc..2f6572a 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -496,30 +496,41 @@ class Graph(object): delta = len(blk_list)-len(shared_blks) if delta > 0 and num_seqs > 1: print(f"LEN: {delta}", end="\t") + fd = [None, None] + path = [None, None] try: - fd, path = tempfile.mkstemp() - with os.fdopen(fd, 'w') as tmp: + fd[0], path[0] = tempfile.mkstemp() + fd[1], path[1] = tempfile.mkstemp() + with os.fdopen(fd[0], 'w') as tmp1, os.fdopen(fd[1], 'w') as tmp2: + tmp = [tmp1, tmp2] for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())): pos = sorted([self.seqs[tag[0]].position_of(b, tag[1]) for b in shared_blks], key=lambda x: x[0]) beg, end = pos[0], pos[-1] - tmp.write(f">isolate_{i:04d}\n") - for n, (left, right) in enumerate([(beg[0]-EXTEND,beg[0]+WINDOW)]): #, (end[1]-WINDOW,end[1]+EXTEND)]): + for n, (left, right) in enumerate([(beg[0]-EXTEND,beg[0]+WINDOW), (end[1]-WINDOW,end[1]+EXTEND)]): + tmp[n].write(f">isolate_{i:04d}\n") s = self.seqs[tag[0]].sequence_range(left,right) if len(s) > EXTEND + WINDOW: breakpoint(f"bad sequence slicing: {len(s)}") - tmp.write(s + '\n') - tmp.flush() - print(f"aligning {num_seqs} seqs") - proc = subprocess.Popen(f"mafft --auto {path} | fasttree", - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True) - out, err = proc.communicate() - tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') - print(f"-> NUM {n}: {tree.total_branch_length()}") + tmp[n].write(s + '\n') + + tmp[0].flush() + tmp[1].flush() + + def make_tree(n): + proc = subprocess.Popen(f"mafft --auto {path[n]} | fasttree", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True) + out, err = proc.communicate() + tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') + print(f"-> {n} SCORE: {tree.total_branch_length()/(2*{num_seqs})}") + + make_tree(0) + make_tree(1) finally: - os.remove(path) + os.remove(path[0]) + os.remove(path[1]) else: print(f"NO MATCH") |