fix: corrected the iterator

author: Nicholas Noll <nbnoll@eml.cc> 2020-08-19 11:40:11 -0700
committer: Nicholas Noll <nbnoll@eml.cc> 2020-08-19 11:40:11 -0700
commit: 91e389d6e3d0c33f02474804802b053bb67cc48d (patch)
tree: b40390a3e95a418041c146c298f1970e94b3ad28
parent: d1ac5cedcc325be21de8e75bed39ef7741a76a19 (diff)
1 files changed, 14 insertions, 7 deletions
diff --git a/pangraph/utils.py b/pangraph/utils.py
index 82e0e3c..eff9432 100644
--- a/pangraph/utils.py
+++ b/pangraph/utils.py
@@ -157,6 +157,11 @@ def getnwk(node, newick, parentdist, leaf_names):
         newick = "(%s" % (newick)
         return newick
 
+def as_str(s):
+    if isinstance(s, bytes):
+        return s.decode('utf-8')
+    return s
+
 # ------------------------------------------------------------------------
 # parsers
 
@@ -167,21 +172,23 @@ def parse_fasta(fh):
             self.name = name
             self.meta = meta
 
-    header = fh.readline()
-    while True:
-        if header == "":
-            return 1, None
-        if header[0] != '>':
-            return 0, "improper fasta file syntax"
+        def __str__(self):
+            return f">{self.name} {self.meta}\n{self.seq[:77]}...\n"
+
+        def __repr__(self):
+            return str(self)
 
+    header = as_str(fh.readline())
+    while header != "" and header[0] == ">":
         name = header[1:].split()
         seq  = ""
         for line in fh:
+            line = as_str(line)
             if line == "" or line[0] == ">":
                 break
             seq += line
 
-        header = line
+        header = as_str(line)
         yield Record(name=name[0], meta=" ".join(name[1:]), seq=seq)
 
 def parse_paf(fh):
author	Nicholas Noll <nbnoll@eml.cc>	2020-08-19 11:40:11 -0700
committer	Nicholas Noll <nbnoll@eml.cc>	2020-08-19 11:40:11 -0700
commit	91e389d6e3d0c33f02474804802b053bb67cc48d (patch)
tree	b40390a3e95a418041c146c298f1970e94b3ad28
parent	d1ac5cedcc325be21de8e75bed39ef7741a76a19 (diff)