aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-19 11:02:50 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-19 11:02:50 -0700
commitd1ac5cedcc325be21de8e75bed39ef7741a76a19 (patch)
tree6561d1865b82e8a3907c288f1c5fffce32b5ddae
parentbd21a4412a3e35f32ad84cf19328b7c89e0ed6e5 (diff)
feat: added simple fasta parser
-rw-r--r--pangraph/utils.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/pangraph/utils.py b/pangraph/utils.py
index 5d5e888..82e0e3c 100644
--- a/pangraph/utils.py
+++ b/pangraph/utils.py
@@ -160,6 +160,30 @@ def getnwk(node, newick, parentdist, leaf_names):
# ------------------------------------------------------------------------
# parsers
+def parse_fasta(fh):
+ class Record:
+ def __init__(self, name=None, meta=None, seq=None):
+ self.seq = seq
+ self.name = name
+ self.meta = meta
+
+ header = fh.readline()
+ while True:
+ if header == "":
+ return 1, None
+ if header[0] != '>':
+ return 0, "improper fasta file syntax"
+
+ name = header[1:].split()
+ seq = ""
+ for line in fh:
+ if line == "" or line[0] == ">":
+ break
+ seq += line
+
+ header = line
+ yield Record(name=name[0], meta=" ".join(name[1:]), seq=seq)
+
def parse_paf(fh):
hits = []
for line in fh: