diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-13 11:37:05 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-13 11:37:05 -0700 |
commit | 9deb0978ba029fc59ee699e8280dd60fb47bee1b (patch) | |
tree | 22ea138ce8b0fcb4380101e8309846f77f2f88eb | |
parent | c796f164ea6856af354b879abefceb342de2f27c (diff) |
fix: pull out interval of merged blocks and not just the first one
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | pangraph/block.py | 4 | ||||
-rw-r--r-- | pangraph/graph.py | 38 | ||||
-rw-r--r-- | pangraph/tree.py | 7 |
4 files changed, 30 insertions, 21 deletions
@@ -55,7 +55,7 @@ staph: @echo "cluster staph"; \ pangraph cluster -d data/staph data/staph/assemblies/*.fna.gz @echo "build staph"; \ - pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json # 1>data/staph/pangraph.json + pangraph build -d data/staph -m 500 -b 0 data/staph/guide.json #1>staph.log 2>/dev/null # figures diff --git a/pangraph/block.py b/pangraph/block.py index 5c649b9..1bc501d 100644 --- a/pangraph/block.py +++ b/pangraph/block.py @@ -148,7 +148,9 @@ class Block(object): qryblks = qryblks[::-1] refblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None] - return newblks, qryblks, refblks, isomap + sharedblks = [nb for i, nb in enumerate(newblks) if refs[i] is not None and qrys[i] is not None] + + return newblks, qryblks, refblks, sharedblks, isomap # -------------- # methods diff --git a/pangraph/graph.py b/pangraph/graph.py index c2c3cdb..fb46be9 100644 --- a/pangraph/graph.py +++ b/pangraph/graph.py @@ -433,7 +433,7 @@ class Graph(object): "qry_name" : hit["qry"]["name"], "orientation" : hit["orientation"]} - merged_blks, new_qrys, new_refs, blk_map = Block.from_aln(aln) + merged_blks, new_qrys, new_refs, shared_blks, blk_map = Block.from_aln(aln) for merged_blk in merged_blks: self.blks[merged_blk.id] = merged_blk @@ -463,31 +463,31 @@ class Graph(object): new_blocks = [] new_blocks.extend(update(old_ref, new_refs, hit['ref'], Strand.Plus)) new_blocks.extend(update(old_qry, new_qrys, hit['qry'], hit['orientation'])) - self.prune_blks() - shared_blks = set() - first = True + blk_list = set() + first = True for tag in ref.muts.keys(): - pos = self.seqs[tag[0]].position_of(new_refs[0], tag[1]) - blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND] + beg = self.seqs[tag[0]].position_of(new_refs[0], tag[1]) + end = self.seqs[tag[0]].position_of(new_refs[-1], tag[1]) + blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] if first: - shared_blks = set([b.id for b in blks]) + blk_list = set([b.id for b in blks]) first = False else: - shared_blks.intersection_update(set([b.id for b in blks])) - first = True + blk_list.intersection_update(set([b.id for b in blks])) + for tag in qry.muts.keys(): - pos = self.seqs[tag[0]].position_of(new_qrys[0], tag[1]) - blks = self.seqs[tag[0]][pos[0]-EXTEND:pos[1]+EXTEND] - if first: - shared_blks = set([b.id for b in blks]) - first = False - else: - shared_blks.intersection_update(set([b.id for b in blks])) + beg = self.seqs[tag[0]].position_of(new_qrys[0], tag[1]) + end = self.seqs[tag[0]].position_of(new_qrys[-1], tag[1]) + blks = self.seqs[tag[0]][beg[0]-EXTEND:end[1]+EXTEND] + blk_list.intersection_update(set([b.id for b in blks])) + + print(f"LEN: {len(blk_list)-len(shared_blks)}") + if len(blk_list) < len(shared_blks): + breakpoint("inconsistent number of blocks") + + self.prune_blks() - print(f"LEN: {len(shared_blks)}") - if len(shared_blks) > 20: - breakpoint("big number of blocks") return [b[0] for b in new_blocks] def extract(self, name, strip_gaps=True, verbose=False): diff --git a/pangraph/tree.py b/pangraph/tree.py index 605bac3..64ac119 100644 --- a/pangraph/tree.py +++ b/pangraph/tree.py @@ -139,6 +139,11 @@ class Clade(object): 'fapath' : self.fapath, 'graph' : serialize(self.graph) if self.graph is not None else None } + def set_level(self, level): + for c in self.child: + c.set_level(level+1) + self.level = level + class Tree(object): # ------------------- # Class constructor @@ -288,6 +293,7 @@ class Tree(object): self.seqs = {leafs[name]:seq for name,seq in seqs.items()} def align(self, tmpdir, min_blk_len, mu, beta, extensive, log_stats=False, verbose=False): + self.root.set_level(0) # NOTE: for debug logging stats = {} # --------------------------------------------- # internal functions @@ -377,6 +383,7 @@ class Tree(object): for n in self.postorder(): if n.is_leaf(): continue + print(f"---NODE LEVEL {n.level}---") n.fapath = f"{tmpdir}/{n.name}" log(f"fusing {n.child[0].name} with {n.child[1].name} @ {n.name}") n.graph = merge(*n.child) |