From 78639f76e10810296607df2e9b3f839117185cde Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 2 Sep 2020 12:22:23 -0700 Subject: feat: added output of alignments to logging --- Makefile | 2 +- pangraph/graph.py | 16 +++++++++++++--- scripts/parse_log.py | 12 +++++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b3f1d20..b0b9ffb 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>/dev/null 1>staph-e2500-w1000.log + pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>staph-e2500-w1000.err 1>staph-e2500-w1000.log # figures diff --git a/pangraph/graph.py b/pangraph/graph.py index c4e3f83..d154f74 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -518,12 +518,22 @@ class Graph(object): tmp[1].flush() def make_tree(n): - proc = subprocess.Popen(f"mafft --auto {path[n]} | fasttree", + proc = [None, None] + out = [None, None] + err = [None, None] + proc[0] = subprocess.Popen(f"mafft --auto {path[n]}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - out, err = proc.communicate() - tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') + proc[1] = subprocess.Popen(f"fasttree", + stdin =subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True) + out[0], err[0] = proc[0].communicate() + out[1], err[1] = proc[1].communicate(input=out[0]) + tree = Phylo.read(io.StringIO(out[1].decode('utf-8')), format='newick') + print(f"ALIGNMENT=\n{out[0]}") print(f"SCORE={tree.total_branch_length()/(2*num_seqs)}", end=";") make_tree(0) diff --git a/scripts/parse_log.py b/scripts/parse_log.py index bbca0ea..3a0aed0 100755 --- a/scripts/parse_log.py +++ b/scripts/parse_log.py @@ -12,10 +12,11 @@ score_preset = "SCORE=" score_offset = len(score_preset) def main(args): + results = {} for log_path in args: + stats = defaultdict(lambda: {'hits':[], 'miss': 0}) + level = -1 with open(log_path) as log: - level = -1 - stats = defaultdict(lambda: {'hits':[], 'miss': 0}) for line in log: line.rstrip('\n') if line[0] == "+": @@ -37,10 +38,15 @@ def main(args): stats[level]['hits'].extend(score) continue raise ValueError(f"invalid syntax: {line[1:]}") + if len(stats) > 0: + path = log_path.replace(".log", "").split("-") + e, w = int(path[1][1:]), int(path[2][1:]) + results[(e,w)] = dict(stats) + return results parser = argparse.ArgumentParser(description='process our data log files on end repair') parser.add_argument('files', type=str, nargs='+') if __name__ == "__main__": args = parser.parse_args() - main(args.files) + results = main(args.files) -- cgit v1.2.1