aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-18 11:07:31 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-18 11:07:31 -0700
commitfe39e4fa983ace442e99a0e8811e69fef18c48d9 (patch)
tree4370f21cf5e2d85ac95a03c9a23937c0bfdb6c38
parent25b974618ec3242a9d7d7373b5c9a3a8bf0f0681 (diff)
feat: use fixed width window into each block for end alignment instead of entire block
-rw-r--r--Makefile2
-rw-r--r--pangraph/graph.py92
2 files changed, 48 insertions, 46 deletions
diff --git a/Makefile b/Makefile
index 6061cfc..60266a5 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json 2>/dev/null #1>staph.log
+ pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log
# figures
diff --git a/pangraph/graph.py b/pangraph/graph.py
index 5a3c618..1664d03 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -495,55 +495,57 @@ class Graph(object):
delta = len(blk_list)-len(shared_blks)
if delta > 0 and num_seqs > 1:
print(f"LEN: {delta}", end="\t")
- fd, path = tempfile.mkstemp()
- try:
- with os.fdopen(fd, 'w') as tmp:
- for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())):
- tmp.write(f">isolate_{i:04d}\n")
- blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
- for b in blks:
- tmp.write(b.extract(*tag))
- tmp.write('\n')
- tmp.flush()
- proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree",
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- shell=True)
- out, err = proc.communicate()
- tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick')
- print(tree.total_branch_length())
- finally:
- os.remove(path)
+ for i, (left, right) in enumerate([(beg[0]-EXTEND,beg[0]), (end[1],end[1]+EXTEND)]):
+ fd, path = tempfile.mkstemp()
+ try:
+ with os.fdopen(fd, 'w') as tmp:
+ for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())):
+ tmp.write(f">isolate_{i:04d}\n")
+ blks = self.seqs[tag[0]][left:right]
+ for b in blks:
+ tmp.write(b.extract(*tag))
+ tmp.write('\n')
+ tmp.flush()
+ print(f"aligning {num_seqs} seqs")
+ proc = subprocess.Popen(f"mafft --auto --quiet {path} | fasttree",
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=True)
+ out, err = proc.communicate()
+ tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick')
+ print(f"-> NUM {i}: {tree.total_branch_length()}")
+ finally:
+ os.remove(path)
else:
print(f"NO MATCH")
# NOTE: debugging code
- if len(blk_list) < len(shared_blks):
- ref_list = set()
- first = True
- for tag in ref.muts.keys():
- beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1])
- end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1])
- blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
- if first:
- ref_list = set([b.id for b in blks])
- first = False
- else:
- ref_list.intersection_update(set([b.id for b in blks]))
-
- qry_list = set()
- first = True
- for tag in qry.muts.keys():
- beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1])
- end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1])
- blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
- if first:
- qry_list = set([b.id for b in blks])
- first = False
- else:
- qry_list.intersection_update(set([b.id for b in blks]))
-
- breakpoint("inconsistent number of blocks")
+ # if len(blk_list) < len(shared_blks):
+ # ref_list = set()
+ # first = True
+ # for tag in ref.muts.keys():
+ # beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1])
+ # end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1])
+ # blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
+ # if first:
+ # ref_list = set([b.id for b in blks])
+ # first = False
+ # else:
+ # ref_list.intersection_update(set([b.id for b in blks]))
+
+ # qry_list = set()
+ # first = True
+ # for tag in qry.muts.keys():
+ # beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1])
+ # end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1])
+ # blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
+ # if first:
+ # qry_list = set([b.id for b in blks])
+ # first = False
+ # else:
+ # qry_list.intersection_update(set([b.id for b in blks]))
+
+ # breakpoint("inconsistent number of blocks")
self.prune_blks()