diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-20 13:32:02 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-20 13:32:02 -0700 |
commit | 53dbbf03451cf9495fb9602257ecadf2e1f843e5 (patch) | |
tree | 1d48c4b1e57816db401ad79c67aab5486b040e56 | |
parent | dd0d0856d4def973512019b0e17afd43dc042706 (diff) |
feat: added prototype for plotting log file data
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | pangraph/graph.py | 7 | ||||
-rw-r--r-- | pangraph/tree.py | 2 | ||||
-rw-r--r-- | scripts/parse_log.py | 44 |
4 files changed, 50 insertions, 5 deletions
@@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 100 data/staph/guide.json + pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>/dev/null 1>staph-e2500-w1000.log # figures diff --git a/pangraph/graph.py b/pangraph/graph.py index f4afa97..235ef31 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -495,7 +495,7 @@ class Graph(object): delta = len(blk_list)-len(shared_blks) if delta > 0 and num_seqs > 1: - print(f"LEN: {delta}", end="\t") + print(f">LEN: {delta}", end=';') fd = [None, None] path = [None, None] try: @@ -524,15 +524,16 @@ class Graph(object): shell=True) out, err = proc.communicate() tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick') - print(f"-> {n} SCORE: {tree.total_branch_length()/(2*num_seqs)}") + print(f"-> {n} SCORE={tree.total_branch_length()/(2*num_seqs)}", end=";") make_tree(0) make_tree(1) + print("\n", end="") finally: os.remove(path[0]) os.remove(path[1]) else: - print(f"NO MATCH") + print(f">NO MATCH") # NOTE: debugging code # if len(blk_list) < len(shared_blks): diff --git a/pangraph/tree.py b/pangraph/tree.py index eeb52c8..d36b77d 100644 --- a/pangraph/tree.py +++ b/pangraph/tree.py @@ -383,7 +383,7 @@ class Tree(object): for n in self.postorder(): if n.is_leaf(): continue - print(f"---NODE LEVEL {n.level}---") + print(f"+++LEVEL={n.level}+++") n.fapath = f"{tmpdir}/{n.name}" log(f"fusing {n.child[0].name} with {n.child[1].name} @ {n.name}") n.graph = merge(*n.child) diff --git a/scripts/parse_log.py b/scripts/parse_log.py new file mode 100644 index 0000000..1caae07 --- /dev/null +++ b/scripts/parse_log.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +""" +script to process our end repair log files for plotting +""" +import argparse +from collections import defaultdict + +level_preset = "+++LEVEL=" +level_offset = len(level_preset) + +score_preset = "SCORE=" +score_offset = len(score_preset) + +def main(args): + for log_path in args: + with open(log_path) as log: + level = -1 + stats = defaultdict(lambda: {'hits':[], 'miss': 0}) + for line in log: + line.rstrip('\n') + if line[0] == "+": + assert line.startwith(level_preset), "check syntax in log file" + level = int(line[level_offset:line.find("+++", level_offset)]) + continue + if line[0] == ">": + if line[1:] == "NO MATCH": + stats[level]['miss'] += 1 + continue + if line[1:].startswith("LEN="): + offset = [line.find(";")] + offset.append(line.find(";", offset[0])) + score[0] = int(line[line.find(score_preset, offset[0])+1+score_offset:offset[1]) + score[1] = int(line[line.find(score_preset, offset[1])+1+score_offset:) + stats[level]['hits'].extend(score) + + raise ValueError(f"invalid syntax: {line}") + print(stats) + +parser = argparse.ArgumentParser(description='process our data log files on end repair') +parser.add_argument('files', type=str, nargs='+') + +if __name__ == "__main__": + args = parser.parse_args() + main(args.files) |