diff options
Diffstat (limited to 'scripts/parse_log.py')
-rwxr-xr-x | scripts/parse_log.py | 50 |
1 files changed, 42 insertions, 8 deletions
diff --git a/scripts/parse_log.py b/scripts/parse_log.py index 3a0aed0..8532d3d 100755 --- a/scripts/parse_log.py +++ b/scripts/parse_log.py @@ -2,7 +2,11 @@ """ script to process our end repair log files for plotting """ +import ast import argparse + +from io import StringIO +from Bio import AlignIO from collections import defaultdict level_preset = "+++LEVEL=" @@ -11,6 +15,40 @@ level_offset = len(level_preset) score_preset = "SCORE=" score_offset = len(score_preset) +msaln_preset = "ALIGNMENT=" +msaln_offset = len(msaln_preset) + +def unpack(line): + offset = [line.find(";")] + offset.append(line.find(";", offset[0]+1)) + + align = StringIO(ast.literal_eval(line[msaln_offset:offset[0]]).decode('utf-8')) + align = next(AlignIO.parse(align, 'fasta')) + score = float(line[offset[0]+1+score_offset:offset[1]]) + + return align, score + +# TODO: remove hardcoded numbers +def save_aln_examples(results): + stats = results[(2500,1000)] + nums = [0, 0, 0] + for score, aln in stats[1]['hits']: + if len(aln) < 10: + continue + + if score < 1e-4: + with open(f"scratch/1/eg_{nums[0]}.fna", 'w') as fd: + AlignIO.write(aln, fd, "fasta") + nums[0] += 1 + elif score < 1e-2: + with open(f"scratch/2/eg_{nums[1]}.fna", 'w') as fd: + AlignIO.write(aln, fd, "fasta") + nums[1] += 1 + else: + with open(f"scratch/3/eg_{nums[2]}.fna", 'w') as fd: + AlignIO.write(aln, fd, "fasta") + nums[2] += 1 + def main(args): results = {} for log_path in args: @@ -28,14 +66,9 @@ def main(args): stats[level]['miss'] += 1 continue if line[1:].startswith("LEN="): - offset = [line.find(";")] - offset.append(line.find(";", offset[0]+1)) - offset.append(line.find(";", offset[1]+1)) - - score = [None, None] - score[0] = float(line[offset[0]+1+score_offset:offset[1]]) - score[1] = float(line[offset[1]+1+score_offset:offset[2]]) - stats[level]['hits'].extend(score) + laln, lscore = unpack(log.readline()) + raln, rscore = unpack(log.readline()) + stats[level]['hits'].extend([(lscore, laln), (rscore, raln)]) continue raise ValueError(f"invalid syntax: {line[1:]}") if len(stats) > 0: @@ -50,3 +83,4 @@ parser.add_argument('files', type=str, nargs='+') if __name__ == "__main__": args = parser.parse_args() results = main(args.files) + save_aln_examples(results) |