aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-09-02 12:22:23 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-09-02 12:22:23 -0700
commit78639f76e10810296607df2e9b3f839117185cde (patch)
tree5bd3a8530c84cfd28afad1f65a896518ecc03906
parentf90622c44bc0fe71a1e5a12a3dbc22d19ee5f770 (diff)
feat: added output of alignments to logging
-rw-r--r--Makefile2
-rw-r--r--pangraph/graph.py16
-rwxr-xr-xscripts/parse_log.py12
3 files changed, 23 insertions, 7 deletions
diff --git a/Makefile b/Makefile
index b3f1d20..b0b9ffb 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>/dev/null 1>staph-e2500-w1000.log
+ pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log
# figures
diff --git a/pangraph/graph.py b/pangraph/graph.py
index c4e3f83..d154f74 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -518,12 +518,22 @@ class Graph(object):
tmp[1].flush()
def make_tree(n):
- proc = subprocess.Popen(f"mafft --auto {path[n]} | fasttree",
+ proc = [None, None]
+ out = [None, None]
+ err = [None, None]
+ proc[0] = subprocess.Popen(f"mafft --auto {path[n]}",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
- out, err = proc.communicate()
- tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick')
+ proc[1] = subprocess.Popen(f"fasttree",
+ stdin =subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=True)
+ out[0], err[0] = proc[0].communicate()
+ out[1], err[1] = proc[1].communicate(input=out[0])
+ tree = Phylo.read(io.StringIO(out[1].decode('utf-8')), format='newick')
+ print(f"ALIGNMENT=\n{out[0]}")
print(f"SCORE={tree.total_branch_length()/(2*num_seqs)}", end=";")
make_tree(0)
diff --git a/scripts/parse_log.py b/scripts/parse_log.py
index bbca0ea..3a0aed0 100755
--- a/scripts/parse_log.py
+++ b/scripts/parse_log.py
@@ -12,10 +12,11 @@ score_preset = "SCORE="
score_offset = len(score_preset)
def main(args):
+ results = {}
for log_path in args:
+ stats = defaultdict(lambda: {'hits':[], 'miss': 0})
+ level = -1
with open(log_path) as log:
- level = -1
- stats = defaultdict(lambda: {'hits':[], 'miss': 0})
for line in log:
line.rstrip('\n')
if line[0] == "+":
@@ -37,10 +38,15 @@ def main(args):
stats[level]['hits'].extend(score)
continue
raise ValueError(f"invalid syntax: {line[1:]}")
+ if len(stats) > 0:
+ path = log_path.replace(".log", "").split("-")
+ e, w = int(path[1][1:]), int(path[2][1:])
+ results[(e,w)] = dict(stats)
+ return results
parser = argparse.ArgumentParser(description='process our data log files on end repair')
parser.add_argument('files', type=str, nargs='+')
if __name__ == "__main__":
args = parser.parse_args()
- main(args.files)
+ results = main(args.files)