diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 11:07:31 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-18 11:07:31 -0700 |
commit | fe39e4fa983ace442e99a0e8811e69fef18c48d9 (patch) | |
tree | 4370f21cf5e2d85ac95a03c9a23937c0bfdb6c38 | |
parent | 25b974618ec3242a9d7d7373b5c9a3a8bf0f0681 (diff) |
feat: use fixed width window into each block for end alignment instead of entire block
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | pangraph/graph.py | 92 |
2 files changed, 48 insertions, 46 deletions
@@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json 2>/dev/null #1>staph.log + pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log # figures diff --git a/pangraph/graph.py b/pangraph/graph.py index 5a3c618..1664d03 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -495,55 +495,57 @@ class Graph(object): delta = len(blk_list)-len(shared_blks) if delta > 0 and num_seqs > 1: print(f"LEN: {delta}", end="\t") - fd, path = tempfile.mkstemp() - try: - with os.fdopen(fd, 'w') as tmp: - for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())): - tmp.write(f">isolate_{i:04d}\n") - blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] - for b in blks: - tmp.write(b.extract(*tag)) - tmp.write('\n') - tmp.flush() - proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree", - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True) - out, err = proc.communicate() - tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') - print(tree.total_branch_length()) - finally: - os.remove(path) + for i, (left, right) in enumerate([(beg[0]-EXTEND,beg[0]), (end[1],end[1]+EXTEND)]): + fd, path = tempfile.mkstemp() + try: + with os.fdopen(fd, 'w') as tmp: + for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())): + tmp.write(f">isolate_{i:04d}\n") + blks = self.seqs[tag[0]][left:right] + for b in blks: + tmp.write(b.extract(*tag)) + tmp.write('\n') + tmp.flush() + print(f"aligning {num_seqs} seqs") + proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True) + out, err = proc.communicate() + tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') + print(f"-> NUM {i}: {tree.total_branch_length()}") + finally: + os.remove(path) else: print(f"NO MATCH") # NOTE: debugging code - if len(blk_list) < len(shared_blks): - ref_list = set() - first = True - for tag in ref.muts.keys(): - beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1]) - end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1]) - blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] - if first: - ref_list = set([b.id for b in blks]) - first = False - else: - ref_list.intersection_update(set([b.id for b in blks])) - - qry_list = set() - first = True - for tag in qry.muts.keys(): - beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1]) - end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1]) - blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] - if first: - qry_list = set([b.id for b in blks]) - first = False - else: - qry_list.intersection_update(set([b.id for b in blks])) - - breakpoint("inconsistent number of blocks") + # if len(blk_list) < len(shared_blks): + # ref_list = set() + # first = True + # for tag in ref.muts.keys(): + # beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1]) + # end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1]) + # blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] + # if first: + # ref_list = set([b.id for b in blks]) + # first = False + # else: + # ref_list.intersection_update(set([b.id for b in blks])) + + # qry_list = set() + # first = True + # for tag in qry.muts.keys(): + # beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1]) + # end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1]) + # blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] + # if first: + # qry_list = set([b.id for b in blks]) + # first = False + # else: + # qry_list.intersection_update(set([b.id for b in blks])) + + # breakpoint("inconsistent number of blocks") self.prune_blks() |