aboutsummaryrefslogtreecommitdiff
path: root/pangraph/graph.py
diff options
context:
space:
mode:
Diffstat (limited to 'pangraph/graph.py')
-rw-r--r--pangraph/graph.py51
1 files changed, 26 insertions, 25 deletions
diff --git a/pangraph/graph.py b/pangraph/graph.py
index d561e6c..fbbb1e7 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -36,11 +36,15 @@ def entropy(s):
return S
def alignment_entropy(rdr):
- aln = AlignIO.read(rdr, 'fasta')
- S = 0
- for i in range(aln.get_alignment_length()):
- S += entropy(aln[:,i])
- return S/aln.get_alignment_length()
+ try:
+ aln = np.array([list(rec) for rec in AlignIO.read(rdr, 'fasta')], np.character)
+ S = 0
+ for i in range(aln.shape[1]):
+ S += entropy(aln[:,i])
+ return S/aln.shape[1]
+ except Exception as msg:
+ print(f"ERROR: {msg}")
+ return None
# ------------------------------------------------------------------------
# Junction class
@@ -569,14 +573,14 @@ class Graph(object):
raise ValueError(f"unrecognized argument '{side}' for side")
iso_blks = self.seqs[tag[0]][left:right]
- print("POSITIONS", pos)
- print("STRAND", strand)
- print("LIST", shared_blks)
- print("MERGED", merged_blks)
- print("INTERSECTION", lblks_set_x if side == 'left' else rblks_set_x)
- print("UNION", lblks_set_s if side == 'left' else rblks_set_s)
- print("ISO", iso_blks)
- breakpoint("stop")
+ # print("POSITIONS", pos)
+ # print("STRAND", strand)
+ # print("LIST", shared_blks)
+ # print("MERGED", merged_blks)
+ # print("INTERSECTION", lblks_set_x if side == 'left' else rblks_set_x)
+ # print("UNION", lblks_set_s if side == 'left' else rblks_set_s)
+ # print("ISO", iso_blks)
+ # breakpoint("stop")
tmp.write(f">isolate_{i:04d} {','.join(b.id for b in iso_blks)}\n")
s = self.seqs[tag[0]].sequence_range(left,right)
if len(s) > extend + window:
@@ -585,23 +589,20 @@ class Graph(object):
tmp.flush()
- proc = [None, None]
- out = [None, None]
- err = [None, None]
- proc[0] = subprocess.Popen(f"mafft --auto {path}",
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- shell=True)
- proc[1] = subprocess.Popen(f"fasttree",
- stdin =subprocess.PIPE,
+ proc = subprocess.Popen(f"mafft --auto {path}",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
- out[0], err[0] = proc[0].communicate()
+ # proc[1] = subprocess.Popen(f"fasttree",
+ # stdin =subprocess.PIPE,
+ # stdout=subprocess.PIPE,
+ # stderr=subprocess.PIPE,
+ # shell=True)
+ out, err = proc.communicate()
# out[1], err[1] = proc[1].communicate(input=out[0])
# tree = Phylo.read(io.StringIO(out[1].decode('utf-8')), format='newick')
- print(f"ALIGNMENT=\n{out[0].decode('utf-8')}", end=";")
- rdr = StringIO(out[0].decode('utf-8'))
+ print(f"ALIGNMENT={out}", end=";")
+ rdr = StringIO(out.decode('utf-8'))
print(f"SCORE={alignment_entropy(rdr)}", end=";")
rdr.close()
# print(f"SCORE={tree.total_branch_length()/(2*num_seqs)}", end=";")