aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-20 13:32:02 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-20 13:32:02 -0700
commit53dbbf03451cf9495fb9602257ecadf2e1f843e5 (patch)
tree1d48c4b1e57816db401ad79c67aab5486b040e56
parentdd0d0856d4def973512019b0e17afd43dc042706 (diff)
feat: added prototype for plotting log file data
-rw-r--r--Makefile2
-rw-r--r--pangraph/graph.py7
-rw-r--r--pangraph/tree.py2
-rw-r--r--scripts/parse_log.py44
4 files changed, 50 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 0fe7c0a..b3f1d20 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 100 data/staph/guide.json
+ pangraph build -d data/staph -m 500 -b 0 -e 2500 -w 1000 data/staph/guide.json 2>/dev/null 1>staph-e2500-w1000.log
# figures
diff --git a/pangraph/graph.py b/pangraph/graph.py
index f4afa97..235ef31 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -495,7 +495,7 @@ class Graph(object):
delta = len(blk_list)-len(shared_blks)
if delta > 0 and num_seqs > 1:
- print(f"LEN: {delta}", end="\t")
+ print(f">LEN: {delta}", end=';')
fd = [None, None]
path = [None, None]
try:
@@ -524,15 +524,16 @@ class Graph(object):
shell=True)
out, err = proc.communicate()
tree = Phylo.read(io.StringIO(out.decode('utf-8')), format='newick')
- print(f"-> {n} SCORE: {tree.total_branch_length()/(2*num_seqs)}")
+ print(f"-> {n} SCORE={tree.total_branch_length()/(2*num_seqs)}", end=";")
make_tree(0)
make_tree(1)
+ print("\n", end="")
finally:
os.remove(path[0])
os.remove(path[1])
else:
- print(f"NO MATCH")
+ print(f">NO MATCH")
# NOTE: debugging code
# if len(blk_list) < len(shared_blks):
diff --git a/pangraph/tree.py b/pangraph/tree.py
index eeb52c8..d36b77d 100644
--- a/pangraph/tree.py
+++ b/pangraph/tree.py
@@ -383,7 +383,7 @@ class Tree(object):
for n in self.postorder():
if n.is_leaf():
continue
- print(f"---NODE LEVEL {n.level}---")
+ print(f"+++LEVEL={n.level}+++")
n.fapath = f"{tmpdir}/{n.name}"
log(f"fusing {n.child[0].name} with {n.child[1].name} @ {n.name}")
n.graph = merge(*n.child)
diff --git a/scripts/parse_log.py b/scripts/parse_log.py
new file mode 100644
index 0000000..1caae07
--- /dev/null
+++ b/scripts/parse_log.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""
+script to process our end repair log files for plotting
+"""
+import argparse
+from collections import defaultdict
+
+level_preset = "+++LEVEL="
+level_offset = len(level_preset)
+
+score_preset = "SCORE="
+score_offset = len(score_preset)
+
+def main(args):
+ for log_path in args:
+ with open(log_path) as log:
+ level = -1
+ stats = defaultdict(lambda: {'hits':[], 'miss': 0})
+ for line in log:
+ line.rstrip('\n')
+ if line[0] == "+":
+ assert line.startwith(level_preset), "check syntax in log file"
+ level = int(line[level_offset:line.find("+++", level_offset)])
+ continue
+ if line[0] == ">":
+ if line[1:] == "NO MATCH":
+ stats[level]['miss'] += 1
+ continue
+ if line[1:].startswith("LEN="):
+ offset = [line.find(";")]
+ offset.append(line.find(";", offset[0]))
+ score[0] = int(line[line.find(score_preset, offset[0])+1+score_offset:offset[1])
+ score[1] = int(line[line.find(score_preset, offset[1])+1+score_offset:)
+ stats[level]['hits'].extend(score)
+
+ raise ValueError(f"invalid syntax: {line}")
+ print(stats)
+
+parser = argparse.ArgumentParser(description='process our data log files on end repair')
+parser.add_argument('files', type=str, nargs='+')
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ main(args.files)