aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-19 11:40:11 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-19 11:40:11 -0700
commit91e389d6e3d0c33f02474804802b053bb67cc48d (patch)
treeb40390a3e95a418041c146c298f1970e94b3ad28
parentd1ac5cedcc325be21de8e75bed39ef7741a76a19 (diff)
fix: corrected the iterator
-rw-r--r--pangraph/utils.py21
1 files changed, 14 insertions, 7 deletions
diff --git a/pangraph/utils.py b/pangraph/utils.py
index 82e0e3c..eff9432 100644
--- a/pangraph/utils.py
+++ b/pangraph/utils.py
@@ -157,6 +157,11 @@ def getnwk(node, newick, parentdist, leaf_names):
newick = "(%s" % (newick)
return newick
+def as_str(s):
+ if isinstance(s, bytes):
+ return s.decode('utf-8')
+ return s
+
# ------------------------------------------------------------------------
# parsers
@@ -167,21 +172,23 @@ def parse_fasta(fh):
self.name = name
self.meta = meta
- header = fh.readline()
- while True:
- if header == "":
- return 1, None
- if header[0] != '>':
- return 0, "improper fasta file syntax"
+ def __str__(self):
+ return f">{self.name} {self.meta}\n{self.seq[:77]}...\n"
+
+ def __repr__(self):
+ return str(self)
+ header = as_str(fh.readline())
+ while header != "" and header[0] == ">":
name = header[1:].split()
seq = ""
for line in fh:
+ line = as_str(line)
if line == "" or line[0] == ">":
break
seq += line
- header = line
+ header = as_str(line)
yield Record(name=name[0], meta=" ".join(name[1:]), seq=seq)
def parse_paf(fh):