aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-13 11:37:05 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-13 11:37:05 -0700
commit9deb0978ba029fc59ee699e8280dd60fb47bee1b (patch)
tree22ea138ce8b0fcb4380101e8309846f77f2f88eb
parentc796f164ea6856af354b879abefceb342de2f27c (diff)
fix: pull out interval of merged blocks and not just the first one
-rw-r--r--Makefile2
-rw-r--r--pangraph/block.py4
-rw-r--r--pangraph/graph.py38
-rw-r--r--pangraph/tree.py7
4 files changed, 30 insertions, 21 deletions
diff --git a/Makefile b/Makefile
index 9681f3b..89f9e94 100644
--- a/Makefile
+++ b/Makefile
@@ -55,7 +55,7 @@ staph:
@echo "cluster staph"; \
pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz
@echo "build staph"; \
- pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json # 1>data/staph/pangraph.json
+ pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log 2>/dev/null
# figures
diff --git a/pangraph/block.py b/pangraph/block.py
index 5c649b9..1bc501d 100644
--- a/pangraph/block.py
+++ b/pangraph/block.py
@@ -148,7 +148,9 @@ class Block(object):
qryblks = qryblks[::-1]
refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None]
- return newblks, qryblks, refblks, isomap
+ sharedblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None and qrys[i] is not None]
+
+ return newblks, qryblks, refblks, sharedblks, isomap
# --------------
# methods
diff --git a/pangraph/graph.py b/pangraph/graph.py
index c2c3cdb..fb46be9 100644
--- a/pangraph/graph.py
+++ b/pangraph/graph.py
@@ -433,7 +433,7 @@ class Graph(object):
"qry_name" : hit["qry"]["name"],
"orientation" : hit["orientation"]}
- merged_blks, new_qrys, new_refs, blk_map = Block.from_aln(aln)
+ merged_blks, new_qrys, new_refs, shared_blks, blk_map = Block.from_aln(aln)
for merged_blk in merged_blks:
self.blks[merged_blk.id] = merged_blk
@@ -463,31 +463,31 @@ class Graph(object):
new_blocks = []
new_blocks.extend(update(old_ref, new_refs, hit['ref'], Strand.Plus))
new_blocks.extend(update(old_qry, new_qrys, hit['qry'], hit['orientation']))
- self.prune_blks()
- shared_blks = set()
- first = True
+ blk_list = set()
+ first = True
for tag in ref.muts.keys():
- pos = self.seqs[tag[0]].position_of(new_refs[0], tag[1])
- blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND]
+ beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1])
+ end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1])
+ blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
if first:
- shared_blks = set([b.id for b in blks])
+ blk_list = set([b.id for b in blks])
first = False
else:
- shared_blks.intersection_update(set([b.id for b in blks]))
- first = True
+ blk_list.intersection_update(set([b.id for b in blks]))
+
for tag in qry.muts.keys():
- pos = self.seqs[tag[0]].position_of(new_qrys[0], tag[1])
- blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND]
- if first:
- shared_blks = set([b.id for b in blks])
- first = False
- else:
- shared_blks.intersection_update(set([b.id for b in blks]))
+ beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1])
+ end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1])
+ blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND]
+ blk_list.intersection_update(set([b.id for b in blks]))
+
+ print(f"LEN: {len(blk_list)-len(shared_blks)}")
+ if len(blk_list) < len(shared_blks):
+ breakpoint("inconsistent number of blocks")
+
+ self.prune_blks()
- print(f"LEN: {len(shared_blks)}")
- if len(shared_blks) > 20:
- breakpoint("big number of blocks")
return [b[0] for b in new_blocks]
def extract(self, name, strip_gaps=True, verbose=False):
diff --git a/pangraph/tree.py b/pangraph/tree.py
index 605bac3..64ac119 100644
--- a/pangraph/tree.py
+++ b/pangraph/tree.py
@@ -139,6 +139,11 @@ class Clade(object):
'fapath' : self.fapath,
'graph' : serialize(self.graph) if self.graph is not None else None }
+ def set_level(self, level):
+ for c in self.child:
+ c.set_level(level+1)
+ self.level = level
+
class Tree(object):
# -------------------
# Class constructor
@@ -288,6 +293,7 @@ class Tree(object):
self.seqs = {leafs[name]:seq for name,seq in seqs.items()}
def align(self, tmpdir, min_blk_len, mu, beta, extensive, log_stats=False, verbose=False):
+ self.root.set_level(0) # NOTE: for debug logging
stats = {}
# ---------------------------------------------
# internal functions
@@ -377,6 +383,7 @@ class Tree(object):
for n in self.postorder():
if n.is_leaf():
continue
+ print(f"---NODE LEVEL {n.level}---")
n.fapath = f"{tmpdir}/{n.name}"
log(f"fusing {n.child[0].name} with {n.child[1].name} @ {n.name}")
n.graph = merge(*n.child)