diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 11:40:11 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 11:40:11 -0700 |
commit | 91e389d6e3d0c33f02474804802b053bb67cc48d (patch) | |
tree | b40390a3e95a418041c146c298f1970e94b3ad28 | |
parent | d1ac5cedcc325be21de8e75bed39ef7741a76a19 (diff) |
fix: corrected the iterator
-rw-r--r-- | pangraph/utils.py | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/pangraph/utils.py b/pangraph/utils.py index 82e0e3c..eff9432 100644 --- a/pangraph/utils.py +++ b/pangraph/utils.py @@ -157,6 +157,11 @@ def getnwk(node, newick, parentdist, leaf_names): newick = "(%s" % (newick) return newick +def as_str(s): + if isinstance(s, bytes): + return s.decode('utf-8') + return s + # ------------------------------------------------------------------------ # parsers @@ -167,21 +172,23 @@ def parse_fasta(fh): self.name = name self.meta = meta - header = fh.readline() - while True: - if header == "": - return 1, None - if header[0] != '>': - return 0, "improper fasta file syntax" + def __str__(self): + return f">{self.name} {self.meta}\n{self.seq[:77]}...\n" + + def __repr__(self): + return str(self) + header = as_str(fh.readline()) + while header != "" and header[0] == ">": name = header[1:].split() seq = "" for line in fh: + line = as_str(line) if line == "" or line[0] == ">": break seq += line - header = line + header = as_str(line) yield Record(name=name[0], meta=" ".join(name[1:]), seq=seq) def parse_paf(fh): |