aboutsummaryrefslogtreecommitdiff
path: root/scripts/parse_log.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/parse_log.py')
-rwxr-xr-xscripts/parse_log.py50
1 files changed, 42 insertions, 8 deletions
diff --git a/scripts/parse_log.py b/scripts/parse_log.py
index 3a0aed0..8532d3d 100755
--- a/scripts/parse_log.py
+++ b/scripts/parse_log.py
@@ -2,7 +2,11 @@
"""
script to process our end repair log files for plotting
"""
+import ast
import argparse
+
+from io import StringIO
+from Bio import AlignIO
from collections import defaultdict
level_preset = "+++LEVEL="
@@ -11,6 +15,40 @@ level_offset = len(level_preset)
score_preset = "SCORE="
score_offset = len(score_preset)
+msaln_preset = "ALIGNMENT="
+msaln_offset = len(msaln_preset)
+
+def unpack(line):
+ offset = [line.find(";")]
+ offset.append(line.find(";", offset[0]+1))
+
+ align = StringIO(ast.literal_eval(line[msaln_offset:offset[0]]).decode('utf-8'))
+ align = next(AlignIO.parse(align, 'fasta'))
+ score = float(line[offset[0]+1+score_offset:offset[1]])
+
+ return align, score
+
+# TODO: remove hardcoded numbers
+def save_aln_examples(results):
+ stats = results[(2500,1000)]
+ nums = [0, 0, 0]
+ for score, aln in stats[1]['hits']:
+ if len(aln) < 10:
+ continue
+
+ if score < 1e-4:
+ with open(f"scratch/1/eg_{nums[0]}.fna", 'w') as fd:
+ AlignIO.write(aln, fd, "fasta")
+ nums[0] += 1
+ elif score < 1e-2:
+ with open(f"scratch/2/eg_{nums[1]}.fna", 'w') as fd:
+ AlignIO.write(aln, fd, "fasta")
+ nums[1] += 1
+ else:
+ with open(f"scratch/3/eg_{nums[2]}.fna", 'w') as fd:
+ AlignIO.write(aln, fd, "fasta")
+ nums[2] += 1
+
def main(args):
results = {}
for log_path in args:
@@ -28,14 +66,9 @@ def main(args):
stats[level]['miss'] += 1
continue
if line[1:].startswith("LEN="):
- offset = [line.find(";")]
- offset.append(line.find(";", offset[0]+1))
- offset.append(line.find(";", offset[1]+1))
-
- score = [None, None]
- score[0] = float(line[offset[0]+1+score_offset:offset[1]])
- score[1] = float(line[offset[1]+1+score_offset:offset[2]])
- stats[level]['hits'].extend(score)
+ laln, lscore = unpack(log.readline())
+ raln, rscore = unpack(log.readline())
+ stats[level]['hits'].extend([(lscore, laln), (rscore, raln)])
continue
raise ValueError(f"invalid syntax: {line[1:]}")
if len(stats) > 0:
@@ -50,3 +83,4 @@ parser.add_argument('files', type=str, nargs='+')
if __name__ == "__main__":
args = parser.parse_args()
results = main(args.files)
+ save_aln_examples(results)