diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 11:02:50 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 11:02:50 -0700 |
commit | d1ac5cedcc325be21de8e75bed39ef7741a76a19 (patch) | |
tree | 6561d1865b82e8a3907c288f1c5fffce32b5ddae | |
parent | bd21a4412a3e35f32ad84cf19328b7c89e0ed6e5 (diff) |
feat: added simple fasta parser
-rw-r--r-- | pangraph/utils.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/pangraph/utils.py b/pangraph/utils.py index 5d5e888..82e0e3c 100644 --- a/pangraph/utils.py +++ b/pangraph/utils.py @@ -160,6 +160,30 @@ def getnwk(node, newick, parentdist, leaf_names): # ------------------------------------------------------------------------ # parsers +def parse_fasta(fh): + class Record: + def __init__(self, name=None, meta=None, seq=None): + self.seq = seq + self.name = name + self.meta = meta + + header = fh.readline() + while True: + if header == "": + return 1, None + if header[0] != '>': + return 0, "improper fasta file syntax" + + name = header[1:].split() + seq = "" + for line in fh: + if line == "" or line[0] == ">": + break + seq += line + + header = line + yield Record(name=name[0], meta=" ".join(name[1:]), seq=seq) + def parse_paf(fh): hits = [] for line in fh: |