diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-13 14:40:16 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-13 14:40:16 -0700 |
commit | 967b6412dddc99de429e692973819bb7147b0354 (patch) | |
tree | 5cfde74874125f5796b117e1c565b6335e9957f5 | |
parent | b1af35822273ebe52f6fa089cfb5bc5f8ef542a3 (diff) |
feat: prototype of mafft alignment of blocks
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | pangraph/build.py | 3 | ||||
-rw-r--r-- | pangraph/graph.py | 21 |
3 files changed, 23 insertions, 3 deletions
@@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log 2>/dev/null + pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json 1>staph.log 2>/dev/null # figures diff --git a/pangraph/build.py b/pangraph/build.py index 14f96d3..4577732 100644 --- a/pangraph/build.py +++ b/pangraph/build.py @@ -85,6 +85,7 @@ def main(args): with open(f"{root}/graph_{i:03d}.fa", 'w') as fd: g.write_fasta(fd) - T.write_json(sys.stdout, no_seqs=True) + # NOTE: uncomment when done debugging + # T.write_json(sys.stdout, no_seqs=True) return 0 diff --git a/pangraph/graph.py b/pangraph/graph.py index afff779..f45cfd2 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -5,6 +5,8 @@ import pprint from glob import glob from collections import defaultdict, Counter +from itertools import chain +import subprocess from Bio import SeqIO, Phylo from Bio.Seq import Seq @@ -486,7 +488,24 @@ class Graph(object): blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] blk_list.intersection_update(set([b.id for b in blks])) - print(f"LEN: {len(blk_list)-len(shared_blks)}") + delta = len(blk_list)-len(shared_blks) + if delta > 0: + print(f"LEN: {delta}") + buf = "" + for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())): + buf += ">isolate_{i:04d}\n" + blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] + for b in blks: + buf += b.extract(*tag) + buf += '\n' + proc = subprocess.run(["mafft", "-"], stdout=subprocess.PIPE, input=buf, encoding='ascii') + print(proc.stdout) + print(proc.stderr) + breakpoint('test') + else: + print(f"NO MATCH") + + # NOTE: debugging code if len(blk_list) < len(shared_blks): ref_list = set() first = True |