Get branch support using bootstrap

Contents

Get branch support using bootstrap#

Bootstrapping is the most common method for assessing branch support, applicable to distance-based, parsimony, and maximum likelihood trees. It’s a resampling technique that gauges how consistently the data supports each clade. We can use Felsenstein’s bootstrapping method to determine the tree’s branch confidence. see details at this paper.

Example 1#

Load our toy example and run ScisTree2.

import scistree2 as s2
import numpy as np 
import pandas as pd
from phytreeviz import TreeViz # for tree visuaization


gp = s2.probability.from_csv('./data/toy_probs.csv')
caller = s2.ScisTree2(threads=8) # use 8 threads
tree, imputed_genotype, likelihood = caller.infer(gp) # run Scistree2 inference

Bootstrapping on the inferred tree (100 times).

tree = caller.bootstrap(tree, gp, num_bootstrap=100)
print(tree.output(branch_length_func=lambda x: x.branch_confidence))
(((cell1:1.0,cell3:1.0):0.98,(cell2:1.0,cell4:1.0):0.61):0.0,cell5:1.0):1.0;

Plot the bootstrapping tree with the number of mutations and branch confidence on branches.

tv = TreeViz(tree.output(branch_length_func=lambda x: len(x.mutations), confidence_func=lambda x: x.branch_confidence))
tv.show_branch_length(color='red', label_formatter=lambda v: f"#mut: {v:.2f}")
tv.show_confidence(color='blue', label_formatter=lambda v: f"conf: {v:.2f}")
tv.show_scale_axis()
fig = tv.plotfig()
../_images/c4de752a4b6121e7755c5b3827557f7c4436801a493c05e7775a88fa3abaa845.png

Example 2#

Load a larger dataset and run ScisTree2.

gp = s2.probability.from_csv('./data/toy_raw_reads.csv', source='read')
caller_spr = s2.ScisTree2(threads=8)
tree_spr, imputed_genotype_spr, likelihood_spr = caller_spr.infer(gp)

Evaluate branch confidence.

tree_spr_bootstrap = caller_spr.bootstrap(tree_spr, gp)
print(tree_spr_bootstrap.output(branch_length_func=lambda x: x.branch_confidence))
((((((((((((cell24:1.0,cell43:1.0):0.73,cell22:1.0):0.18,cell39:1.0):0.08,((cell15:1.0,cell46:1.0):0.12,cell40:1.0):0.02):0.07,((((cell49:1.0,cell5:1.0):0.23,cell28:1.0):0.18,cell3:1.0):0.11,(cell32:1.0,cell4:1.0):0.4):0.0):0.0,cell19:1.0):0.0,cell8:1.0):0.01,((((cell13:1.0,cell38:1.0):0.58,cell21:1.0):0.29,cell20:1.0):0.22,cell42:1.0):0.5):0.01,(((cell31:1.0,cell41:1.0):0.29,cell25:1.0):0.11,cell23:1.0):0.24):0.11,cell44:1.0):0.37,cell29:1.0):1.0,(((((((((cell16:1.0,cell17:1.0):0.18,(cell27:1.0,cell2:1.0):0.11):0.1,cell30:1.0):0.09,cell11:1.0):0.17,((cell14:1.0,cell26:1.0):0.51,cell47:1.0):0.27):0.29,cell48:1.0):0.18,((((cell36:1.0,cell9:1.0):0.41,cell1:1.0):0.09,cell6:1.0):0.13,((cell0:1.0,cell34:1.0):0.51,(cell33:1.0,cell45:1.0):0.48):0.39):0.01):0.01,((((cell10:1.0,cell12:1.0):0.58,cell37:1.0):0.85,cell35:1.0):0.56,cell18:1.0):0.57):0.22,cell7:1.0):0.99):1.0;

Plot the tree, darker nodes show higher confidence.

# add 0.5 to each branch to avoid 0 length.
tv = TreeViz(tree_spr_bootstrap.output(branch_length_func=lambda x: len(x.mutations)+0.5, confidence_func=lambda x: x.branch_confidence), width=15)
# tv.show_confidence(bbox=dict(facecolor='red', alpha=0.5))
for node in tree_spr_bootstrap.get_all_nodes():
    leaf_names = [leaf.name for leaf in tree_spr_bootstrap[node].get_leaves()]
    tv.marker(leaf_names, marker="D", color='red', alpha=tree_spr_bootstrap[node].branch_confidence)
fig = tv.plotfig()
../_images/30645eecdbd4263516e3985e8d5d01f762fc79454485fd76dae0d05b028baaaa.png