aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-13 14:40:16 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-13 14:40:16 -0700
commit967b6412dddc99de429e692973819bb7147b0354 (patch)
tree5cfde74874125f5796b117e1c565b6335e9957f5
parentb1af35822273ebe52f6fa089cfb5bc5f8ef542a3 (diff)
feat: prototype of mafft alignment of blocks
-rw-r--r--Makefile2
-rw-r--r--pangraph/build.py3
-rw-r--r--pangraph/graph.py21
3 files changed, 23 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 89f9e94..117dbc9 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log 2>/dev/null
+ pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json 1>staph.log 2>/dev/null
# figures
diff --git a/pangraph/build.py b/pangraph/build.py
index 14f96d3..4577732 100644
--- a/pangraph/build.py
+++ b/pangraph/build.py
@@ -85,6 +85,7 @@ def main(args):
with open(f"{root}/graph_{i:03d}.fa", 'w') as fd:
g.write_fasta(fd)
- T.write_json(sys.stdout, no_seqs=True)
+ # NOTE: uncomment when done debugging
+ # T.write_json(sys.stdout, no_seqs=True)
return 0
diff --git a/pangraph/graph.py b/pangraph/graph.py
index afff779..f45cfd2 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -5,6 +5,8 @@ import pprint
from glob import glob
from collections import defaultdict, Counter
+from itertools import chain
+import subprocess
from Bio import SeqIO, Phylo
from Bio.Seq import Seq
@@ -486,7 +488,24 @@ class Graph(object):
blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
blk_list.intersection_update(set([b.id for b in blks]))
- print(f"LEN: {len(blk_list)-len(shared_blks)}")
+ delta = len(blk_list)-len(shared_blks)
+ if delta > 0:
+ print(f"LEN: {delta}")
+ buf = ""
+ for i, tag in enumerate(chain(ref.muts.keys(), qry.muts.keys())):
+ buf += ">isolate_{i:04d}\n"
+ blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
+ for b in blks:
+ buf += b.extract(*tag)
+ buf += '\n'
+ proc = subprocess.run(["mafft", "-"], stdout=subprocess.PIPE, input=buf, encoding='ascii')
+ print(proc.stdout)
+ print(proc.stderr)
+ breakpoint('test')
+ else:
+ print(f"NO MATCH")
+
+ # NOTE: debugging code
if len(blk_list) < len(shared_blks):
ref_list = set()
first = True