Commit 2e428b54 by Edoardo Sarti

tree similarity final notebook

parent df3fb1f0
{
"cells": [
{
"cell_type": "code",
"execution_count": 82,
"id": "edb197fc",
"metadata": {},
"outputs": [],
"source": [
"from ete3 import Tree\n",
"import math\n",
"import random\n",
"import numpy as np\n",
"import scipy.stats"
]
},
{
"cell_type": "markdown",
"id": "38abced4",
"metadata": {},
"source": [
"# Toy models"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "55e52783",
"metadata": {},
"outputs": [],
"source": [
"def permutations(h,d=1):\n",
" \"\"\"Lists all binary permutations of histogram h (must have a length of a power of 2)\"\"\"\n",
" hlen = len(h)\n",
" if hlen == d:\n",
" return [h]\n",
" if hlen > 16:\n",
" return None\n",
" \n",
" iblist = []\n",
" for i in range(2**d):\n",
" ib = [int(x) for x in ('{:b}'.format(i)).zfill(d)]\n",
" iblist.append(ib)\n",
" res = []\n",
" binlength = int(hlen/d)\n",
" sme = []\n",
" for i in range(0,hlen,binlength):\n",
" sme.append((i,i+int(binlength/2),i+binlength))\n",
" hlist = []\n",
" for ib in iblist:\n",
" hnew = h[:]\n",
" for i, tripl in enumerate(sme):\n",
" if ib[i]:\n",
" s, m, e = tripl\n",
" hnew = hnew[:s] + hnew[m:e] + hnew[s:m] + hnew[e:]\n",
" hlist.append(hnew)\n",
" newlist = []\n",
" for hh in hlist:\n",
" newlist += permutations(hh,d*2)\n",
" return newlist"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "7a9df981",
"metadata": {},
"outputs": [],
"source": [
"def probhist(hh1, hh2, symm=True, v=False):\n",
" if v:\n",
" print(hh1)\n",
" print(hh2)\n",
" # Pseudocounts\n",
" eps = 0.00001\n",
" h1 = [x+(max(hh1)-min(hh1)+eps)/100 for x in hh1]\n",
" h2 = [x+(max(hh2)-min(hh2)+eps)/100 for x in hh2]\n",
" \n",
" # Normalize histos\n",
" s1, s2 = sum(h1), sum(h2)\n",
" nh1, nh2 = [x/s1 for x in h1], [x/s2 for x in h2]\n",
" if v:\n",
" print(\"Normalized histos\")\n",
" print(nh1)\n",
" print(nh2)\n",
" \n",
" # Couple histos and sort second\n",
" twonhs = list(zip(nh1,nh2))\n",
" twonhs = sorted(twonhs, key= lambda x:x[1])\n",
" if v:\n",
" print(\"TWONHS\")\n",
" print(twonhs)\n",
" \n",
" # Sort both histos and then couple\n",
" twosortednhs = list(zip(sorted(nh1), sorted(nh2)))\n",
" if v:\n",
" print(\"TWOSORTEDNHS\")\n",
" print(twosortednhs)\n",
" \n",
" # Probability model\n",
" dp = []\n",
" for i in range(len(nh1)):\n",
" for j in range(i, len(nh2)):\n",
" dp.append(abs(nh1[i] - nh2[j]))\n",
" mu = sum(dp)/len(dp)\n",
" mu2 = sum([x**2 for x in dp])/len(dp)\n",
" sigma = (mu2 - mu**2)**0.5\n",
" model = scipy.stats.norm(mu, sigma)\n",
" binsize = 0.01\n",
" \n",
" # Cross-mutual information\n",
" # SUM f(h1,h2) log2 (f(h1,h2) / f(h1',h2')) with h2 = h2'\n",
" res = 0\n",
" for i in range(len(h1)):\n",
" dif = abs(twonhs[i][0]-twonhs[i][1])\n",
" srtdif = abs(twosortednhs[i][0]-twosortednhs[i][1])\n",
" p = model.cdf(dif+binsize) - model.cdf(dif-binsize)\n",
" sp = model.cdf(srtdif+binsize) - model.cdf(srtdif-binsize)\n",
" res += p * math.log(p/sp, 2)\n",
" if v:\n",
" print(h1[i], p * math.log(p/sp, 2), p, sp, math.log(p/sp, 2), dif, srtdif)\n",
"\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "caf1a874",
"metadata": {},
"outputs": [],
"source": [
"def agg_vector(h):\n",
" l = len(h)\n",
" hlist = [h]\n",
" newh = [] # list of all the upper histos\n",
" while len(hlist[-1]) > 2:\n",
" for i, v in enumerate(hlist[-1]):\n",
" if i%2==0:\n",
" newh.append(v)\n",
" else:\n",
" newh[-1] += v\n",
" hlist.append(newh)\n",
" newh = []\n",
" res = []\n",
" for hh in hlist:\n",
" res += hh\n",
" return res\n",
"\n",
"def aggregated_permutations(h1):\n",
" ps = permutations(h1)\n",
" for ips in range(len(ps)):\n",
" ps[ips] = agg_vector(ps[ips])\n",
" return ps\n",
"\n",
"def aggmaxprobhist(h1, h2, allout=False):\n",
" aggps = aggregated_permutations(h1)\n",
" am = np.argmin([probhist(h,agg_vector(h2)) for h in aggps])\n",
" if allout:\n",
" return am, aggps[am], probhist(aggps[am],agg_vector(h2))\n",
" return probhist(aggps[am],agg_vector(h2), v=False)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "dff1ab66",
"metadata": {},
"outputs": [],
"source": [
"def tests(f):\n",
" answ = []\n",
" \n",
" # Reference\n",
" h1 = [10,9,8,7,12,10,7,5]\n",
" answ.append((h1, True, 'Reference'))\n",
" \n",
" # h1-h2 are similar (organisation problem) \n",
" h2 = [10,9,8,7,13,10,7,5]\n",
" answ.append((h2, True, 'Almost identical'))\n",
"\n",
" # h1-h3 are not similar (null hypothesis problem)\n",
" h3 = [10,5,8,7,12,10,7,9]\n",
" answ.append((h3, False, 'Identical sorting but not permutable'))\n",
" \n",
" # h1-h4 are similar (noise problem)\n",
" h4 = [11,8,7,8,11,9,8,6]\n",
" answ.append((h4, True, 'noise'))\n",
" \n",
" # h1-h5 are similar (topology problem)\n",
" h5 = [12,10,7,5,10,9,8,7]\n",
" answ.append((h5, True, 'Permutable'))\n",
"\n",
" # h1-h6 are similar (non-proportional incrementation problem)\n",
" h6 = [x+100000 for x in h1]\n",
" answ.append((h6, False, 'non-proportional increment'))\n",
"\n",
" # h1-h7 are not similar (null case)\n",
" h7 = [max(0,random.uniform(-1,1))*512 for x in h1]\n",
" answ.append((h7, False, 'Random big numbers or zero'))\n",
"\n",
" # h1-h8 are not similar (unbalance case)\n",
" h8 = [0, 0, 0, 0, 10000, 10000, 10000, 10000]\n",
" answ.append((h8, False, 'max unbalance'))\n",
" \n",
" # h1-h9 are similar (proportional incrementation problem)\n",
" h9 = [x*100 for x in h1]\n",
" answ.append((h9, True, 'proportional increment'))\n",
" \n",
" print(\"#### \" + f.__name__ + \" ####\")\n",
" for h, a, c in answ:\n",
" print(\"{0:50s} {1} {2:20.6f} {3}\".format(c, h, f(h1, h), a))\n",
" print(\"\\n\") \n",
" \n",
" print(\"--- Incremental noise on signal ---\")\n",
" for magn in range(11):\n",
" sph = 0\n",
" sqsph = 0\n",
" for rep in range(10):\n",
" coef = 3*magn/10\n",
" hr = [max(0,random.gauss(x, coef)) for x in h1]\n",
" ph = f(h1,hr)\n",
" sph += ph\n",
" sqsph += ph**2\n",
" #if ph > 50:\n",
" # print(h1, hr, magn, ph)\n",
" print(\"{0:<30d} {1:15.6f} +/- {2:10.6f}\".format(magn, sph/100, (sqsph/100 - (sph/100)**2)**0.5))\n",
" print(\"\\n\")\n",
" \n",
" print(\"--- Incremental random noise ---\")\n",
" for magn in range(11):\n",
" sph = 0\n",
" sqsph = 0\n",
" for rep in range(10):\n",
" hr = [random.random()*10*magn for x in h1]\n",
" ph = f(h1,hr)\n",
" sph += ph\n",
" sqsph += ph**2\n",
" #if ph > 50:\n",
" # print(h1, hr, magn, ph)\n",
" print(\"{0:<30d} {1:15.6f} +/- {2:10.6f}\".format(magn, sph/100, (sqsph/100 - (sph/100)**2)**0.5))\n",
" print(\"\\n\")\n",
" \n",
" print(\"--- Incremental unbalanced distribution ---\")\n",
" hrr = [0]*int(len(h1)/2) + [1]*int(len(h1)/2)\n",
" for magn in range(1,11):\n",
" hr = [1+x*10*magn for x in hrr]\n",
" print(\"{0:<30d} {1:15.6f}\".format(magn, f(h1,hr)))\n",
" print(\"---\")\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "markdown",
"id": "a7f41851",
"metadata": {},
"source": [
"## Tests"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "9eade642",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#### aggmaxprobhist ####\n",
"Reference [10, 9, 8, 7, 12, 10, 7, 5] 0.000000 True\n",
"Almost identical [10, 9, 8, 7, 13, 10, 7, 5] 0.000000 True\n",
"Identical sorting but not permutable [10, 5, 8, 7, 12, 10, 7, 9] 0.189489 False\n",
"noise [11, 8, 7, 8, 11, 9, 8, 6] 0.000000 True\n",
"Permutable [12, 10, 7, 5, 10, 9, 8, 7] 0.000000 True\n",
"non-proportional increment [100010, 100009, 100008, 100007, 100012, 100010, 100007, 100005] 0.000000 False\n",
"Random big numbers or zero [0, 0, 197.98556516655424, 172.2728994418659, 457.79335075776396, 152.4892738791217, 0, 225.23236097569952] 0.309266 False\n",
"max unbalance [0, 0, 0, 0, 10000, 10000, 10000, 10000] 0.263501 False\n",
"proportional increment [1000, 900, 800, 700, 1200, 1000, 700, 500] 0.000000 True\n",
"\n",
"\n",
"--- Incremental noise on signal ---\n",
"0 0.000000 +/- 0.000000\n",
"1 0.000000 +/- 0.000000\n",
"2 0.000749 +/- 0.004237\n",
"3 0.001959 +/- 0.008455\n",
"4 0.003080 +/- 0.013375\n",
"5 0.008437 +/- 0.033195\n",
"6 0.013145 +/- 0.051032\n",
"7 0.016885 +/- 0.059035\n",
"8 0.012466 +/- 0.043463\n",
"9 0.015080 +/- 0.053964\n",
"10 0.015644 +/- 0.059220\n",
"\n",
"\n",
"--- Incremental random noise ---\n",
"0 0.018806 +/- 0.056417\n",
"1 0.027545 +/- 0.088254\n",
"2 0.022396 +/- 0.071375\n",
"3 0.029703 +/- 0.091366\n",
"4 0.026686 +/- 0.085925\n",
"5 0.022741 +/- 0.073748\n",
"6 0.023412 +/- 0.073059\n",
"7 0.024663 +/- 0.084127\n",
"8 0.022264 +/- 0.075595\n",
"9 0.022961 +/- 0.070557\n",
"10 0.025343 +/- 0.080337\n",
"\n",
"\n",
"--- Incremental unbalanced distribution ---\n",
"1 0.230790\n",
"2 0.243396\n",
"3 0.249401\n",
"4 0.252661\n",
"5 0.254701\n",
"6 0.256097\n",
"7 0.257111\n",
"8 0.257881\n",
"9 0.258486\n",
"10 0.258973\n",
"---\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"tests(aggmaxprobhist)"
]
},
{
"cell_type": "markdown",
"id": "9fd5e884",
"metadata": {},
"source": [
"# Real trees "
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "7641e158",
"metadata": {},
"outputs": [],
"source": [
"def generate_histo(input_file,v):\n",
" t = Tree(input_file) # ouverture du fichier\n",
" l=[]\n",
" for n in t.get_descendants():\n",
" if t.get_distance(n,topology_only=True)==v:# test sur la distance\n",
" l.append(n)\n",
" histof=[]\n",
"\n",
" for x in range(len(l)):# pour toute les occurences dans l\n",
" histo=[]\n",
" for leaf in l[x]: # pour toute les feuilles dans L \n",
" histo.append(leaf)\n",
" histof.append(len(histo))# on recupere le nombre de feuille de chaque sous arbre\n",
" return histof"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "dc029957",
"metadata": {},
"outputs": [],
"source": [
"histo70k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.7_k20.nhx\",3)\n",
"histo80k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.8_k20.nhx\",3)\n",
"histo90k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.9_k20.nhx\",3)\n",
"agghisto70k20 = agg_vector(histo70k20)\n",
"agghisto80k20 = agg_vector(histo80k20)\n",
"agghisto90k20 = agg_vector(histo90k20)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "ae740395",
"metadata": {},
"outputs": [],
"source": [
"histo80k3=generate_histo(\"results_djeser/CrFBA3_PV00274_0.8_k3.nhx\",3)\n",
"histo90k3=generate_histo(\"results_djeser/CrFBA3_PV00274_0.9_k3.nhx\",3)\n",
"histo100k3=generate_histo(\"results_djeser/CrFBA3_PV00274_1.0_k3.nhx\",3)\n",
"agghisto80k3 = agg_vector(histo80k3)\n",
"agghisto90k3 = agg_vector(histo90k3)\n",
"agghisto100k3 = agg_vector(histo100k3)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "f95a30d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<BarContainer object of 16 artists>"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQDklEQVR4nO3df6zddX3H8edrrb/AGUp6y2qLKywVRaLD3DmUzDgrsZuE8sdYatTcTJZmCyoaNy0zGfEPFjKN02TTpQGkiQTWII5miY6m6sySibvgL0pFiLhSqPQ6549oglbf++N8Gy+3t/Tec86959xPn4+kOef7Od9zzgva+7qf8/11UlVIktryG6MOIEkaPstdkhpkuUtSgyx3SWqQ5S5JDVo96gAAa9eurU2bNo06hiStKPfdd9/3q2pivsfGotw3bdrE9PT0qGNI0oqS5H9O9pibZSSpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUFjcYaqpJUnH8wJY3W9X/4zLpy5S1KDLHdJapDlLkkNstwlqUGnLPcktyQ5muSBWWMfSvKtJN9I8pkkZ8167LokjyR5KMkblyi3JOkZLGTmfiuwdc7YPuCiqno58G3gOoAkFwLbgZd1z/l4klVDSytJWpBTlntVfQn4wZyxe6rqWLf4ZWBjd38bcEdVPVVVjwKPAK8aYl5J0gIMY5v724HPdvc3AI/NeuxwN3aCJDuSTCeZnpmZGUIMSdJxA5V7kg8Ax4Dbjg/Ns9q8ZzVU1a6qmqyqyYmJeb8CUJLUp77LPckUcDnwlqo6XuCHgXNnrbYReKL/eJLGQnLiH421vso9yVbg/cAVVfWzWQ/tBbYneU6S84DNwFcGjylJWoxTXlsmye3A64C1SQ4D19M7OuY5wL70foN/uar+oqoOJNkDPEhvc801VfXLpQovSZrfKcu9qt48z/DNz7D+DcANg4SSJA3GM1QlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNOmW5J7klydEkD8waOzvJviQPd7drZj12XZJHkjyU5I1LFVySdHILmbnfCmydM7YT2F9Vm4H93TJJLgS2Ay/rnvPxJKuGllaStCCnLPeq+hLwgznD24Dd3f3dwJWzxu+oqqeq6lHgEeBVw4kqSVqofre5n1NVRwC623Xd+AbgsVnrHe7GTpBkR5LpJNMzMzN9xpAkzWfYO1Qzz1jNt2JV7aqqyaqanJiYGHIMSTq99VvuTyZZD9DdHu3GDwPnzlpvI/BE//EkSf3ot9z3AlPd/Sng7lnj25M8J8l5wGbgK4NFlCQt1upTrZDkduB1wNokh4HrgRuBPUmuBg4BVwFU1YEke4AHgWPANVX1yyXKLkk6iVOWe1W9+SQPbTnJ+jcANwwSSpI0GM9QlaQGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAad8puYJKlFyYljVcufY6k4c5ekBlnuktQgy12SGmS5S1KD3KEqNaD1nYNaPGfuktSggco9yXuSHEjyQJLbkzw3ydlJ9iV5uLtdM6ywkqSF6bvck2wA3gVMVtVFwCpgO7AT2F9Vm4H93bIkaRkNullmNfC8JKuBM4AngG3A7u7x3cCVA76HJGmR+i73qnoc+DBwCDgC/Kiq7gHOqaoj3TpHgHXzPT/JjiTTSaZnZmb6jSFJmscgm2XW0Julnwe8EDgzyVsX+vyq2lVVk1U1OTEx0W8MSdI8Btks8wbg0aqaqapfAHcBrwGeTLIeoLs9OnhMSdJiDFLuh4BLkpyRJMAW4CCwF5jq1pkC7h4soiRpsfo+iamq7k1yJ3A/cAz4KrALeD6wJ8nV9H4BXDWMoJKkhRvoDNWquh64fs7wU/Rm8ZKkEfEMVUlqkOUuSQ2y3CWpQZa7JDXIS/5qaXgNWmmknLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUFeOEw6TeSDJ17Mra73Ym6tcuYuSQ2y3CWpQZa7JDXIbe6NcbuqJHDmLklNstwlqUEDlXuSs5LcmeRbSQ4meXWSs5PsS/Jwd7tmWGElLb3kxD9aeQaduX8M+FxVvQR4BXAQ2Ansr6rNwP5uWZK0jPou9yQvAF4L3AxQVT+vqh8C24Dd3Wq7gSsHiyj1z1moTleDzNzPB2aATyb5apKbkpwJnFNVRwC623XzPTnJjiTTSaZnZmYGiCFJmmuQcl8NvBL4RFVdDPyURWyCqapdVTVZVZMTExMDxJAkzTVIuR8GDlfVvd3ynfTK/skk6wG626ODRZQkLVbf5V5V3wMeS3JBN7QFeBDYC0x1Y1PA3QMllCQt2qBnqL4TuC3Js4HvAH9G7xfGniRXA4eAqwZ8D0nSIg1U7lX1NWBynoe2DPK6kqTBeIaqJDXIC4dJWtnmnrxQXigPnLlLUpMsd6lVnpp7WrPcJalBlrskNchyl6QGWe6S1CAPhdRpz++dVYss9z7NLQTLQIsx38ErHp6tYXKzjCQ1yHKXpAZZ7pLUIMt9JfMLQiWdhOUuSQ2y3CWpQR4KqV8b1+PzxjWXtEjLeU6FM3dJapAz9zFwupwhebr8d2q0RvHvbBw/XDpzl6SlMOKj2Sx3SWqQ5S5JDbLcJalBlrskNWjgo2WSrAKmgcer6vIkZwP/AmwCvgv8aVX936DvI83mkTfSMxvGzP1a4OCs5Z3A/qraDOzvliVJy2igck+yEXgTcNOs4W3A7u7+buDKQd5DkrR4g87cPwq8D/jVrLFzquoIQHe7bsD3kCQtUt/b3JNcDhytqvuSvK6P5+8AdgC86EUv6jeG1DT3Lahfg8zcLwWuSPJd4A7g9Uk+BTyZZD1Ad3t0vidX1a6qmqyqyYmJiQFiSJLm6rvcq+q6qtpYVZuA7cDnq+qtwF5gqlttCrh74JSSpEVZiuPcbwQuS/IwcFm3LElaRkO5KmRVfRH4Ynf/f4Etw3hdSVJ/PENVkhpkuUtSgyx3SWqQ5a7Tzwi/QEFaLn7NnsbKvF9XtvwxBuKJRxoHztwlqUGWuzQuRvydm2qL5S5JDbLcpUE429aYcofqHPPu0HNfmKQVxpm7JDXIcpekBlnuktQgy12SGuQOVUkrRgtnMC8XZ+6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQR4KqYF5eJo0fpy5S1KDLHdJapCbZfSM5n4fqN8FKq0Mfc/ck5yb5AtJDiY5kOTabvzsJPuSPNzdrhleXEnSQgyyWeYY8N6qeilwCXBNkguBncD+qtoM7O+Wl5ffjiPpNNd3uVfVkaq6v7v/E+AgsAHYBuzuVtsNXDlgRknSIg1lh2qSTcDFwL3AOVV1BHq/AIB1J3nOjiTTSaZnZmaGEWPp+ElA0goz8A7VJM8HPg28u6p+nAUWX1XtAnYBTE5OLvleurk7BsGdg9K4GOefz3HO9kwGmrkneRa9Yr+tqu7qhp9Msr57fD1wdLCIkqTFGuRomQA3Awer6iOzHtoLTHX3p4C7+48nSerHIJtlLgXeBnwzyde6sb8BbgT2JLkaOARcNVBCSdKi9V3uVfWfwMk2sG/p93UlSYPz8gOS1CDLXZIaZLlL0nENndNiuUtSg7wqpKQlM+8XuYz/+T9NcOa+3Ab42DfMT4sNffqUNA/LXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJalATFw6be10Ur0sk6XTnzF2SGtTEzH1czXu50+WPIek05MxdkhpkuUtSgyx3SWqQ29ylBXIfilaSJZu5J9ma5KEkjyTZuVTvI0k60ZKUe5JVwD8BfwRcCLw5yYVL8V6SVhi/wHdZLNXM/VXAI1X1nar6OXAHsG2J3kuSNEeqhr/VMMmfAFur6s+75bcBv19V75i1zg5gR7d4AfDQEN56LfD9IbzOUhjXbOOaC8zWj3HNBWbrx6ly/XZVTcz3wFLtUJ3vc9bTfotU1S5g11DfNJmuqslhvuawjGu2cc0FZuvHuOYCs/VjkFxLtVnmMHDurOWNwBNL9F6SpDmWqtz/G9ic5Lwkzwa2A3uX6L0kSXMsyWaZqjqW5B3AvwOrgFuq6sBSvNccQ93MM2Tjmm1cc4HZ+jGuucBs/eg715LsUJUkjZaXH5CkBlnuktSgJsp9XC91kOTcJF9IcjDJgSTXjjrTXElWJflqkn8bdZbjkpyV5M4k3+r+37161JmOS/Ke7u/ygSS3J3nuCLPckuRokgdmjZ2dZF+Sh7vbNWOU7UPd3+k3knwmyVnjkGvWY3+VpJKsXe5cz5QtyTu7fjuQ5O8X+norvtzH/FIHx4D3VtVLgUuAa8Yo23HXAgdHHWKOjwGfq6qXAK9gTPIl2QC8C5isqovoHSywfYSRbgW2zhnbCeyvqs3A/m55FG7lxGz7gIuq6uXAt4HrljsU8+ciybnAZcCh5Q40y63MyZbkD+md3f/yqnoZ8OGFvtiKL3fG+FIHVXWkqu7v7v+EXkltGG2qX0uyEXgTcNOosxyX5AXAa4GbAarq51X1w5GGerrVwPOSrAbOYITnb1TVl4AfzBneBuzu7u8GrlzOTMfNl62q7qmqY93il+md/zLyXJ1/AN7HCC/0eZJsfwncWFVPdescXejrtVDuG4DHZi0fZowK9Lgkm4CLgXtHHGW2j9L7B/2rEeeY7XxgBvhkt7nopiRnjjoUQFU9Tm/mdAg4Avyoqu4ZbaoTnFNVR6A3uQDWjTjPybwd+OyoQwAkuQJ4vKq+Puos83gx8AdJ7k3yH0l+b6FPbKHcT3mpg1FL8nzg08C7q+rHo84DkORy4GhV3TfqLHOsBl4JfKKqLgZ+yug2LTxNt/16G3Ae8ELgzCRvHW2qlSfJB+htsrxtDLKcAXwA+NtRZzmJ1cAaept1/xrYkyzsMpotlPtYX+ogybPoFfttVXXXqPPMcilwRZLv0tuU9foknxptJKD393m4qo5/wrmTXtmPgzcAj1bVTFX9ArgLeM2IM831ZJL1AN3tgj/GL4ckU8DlwFtqPE6y+R16v6y/3v0sbATuT/JbI031a4eBu6rnK/Q+ZS9oh28L5T62lzrofsPeDBysqo+MOs9sVXVdVW2sqk30/p99vqpGPgutqu8BjyW5oBvaAjw4wkizHQIuSXJG93e7hTHZ2TvLXmCquz8F3D3CLE+TZCvwfuCKqvrZqPMAVNU3q2pdVW3qfhYOA6/s/h2Og38FXg+Q5MXAs1ng1StXfLl3O2iOX+rgILBnmS51sBCXAm+jNyv+Wvfnj0cdagV4J3Bbkm8Avwv83Wjj9HSfJu4E7ge+Se/nZ2SnrSe5Hfgv4IIkh5NcDdwIXJbkYXpHf9w4Rtn+EfhNYF/3s/DPY5JrLJwk2y3A+d3hkXcAUwv9xOPlBySpQSt+5i5JOpHlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhr0//yGKcRs2GlBAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from matplotlib import pyplot as plt\n",
"X = np.arange(len(histo70k20))\n",
"plt.bar(X + 0.00, histo70k20, color = 'b', width = 0.25)\n",
"plt.bar(X + 0.25, histo80k20, color = 'r', width = 0.25)\n",
"plt.bar(X + 0.50, histo90k20, color = 'g', width = 0.25)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "8a4dd3b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8814593234696813\n",
"0.7869731847259129\n",
"0.4027629892694329\n"
]
}
],
"source": [
"print(aggmaxprobhist(histo70k20, histo80k20))\n",
"print(aggmaxprobhist(histo70k20, histo90k20))\n",
"print(aggmaxprobhist(histo80k20, histo90k20))"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "6c2ec90b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<BarContainer object of 16 artists>"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASPUlEQVR4nO3df6zldX3n8edrQd1Va8Tlwk6B7oAZ2aKxQ3OXdZdorNQ6dQ1os3aHdMnsyu7oBrq6td2CJqW2oSGtP9qkq2aUWdgUQSKwko26TlgjaVK1F0QcHCmgFC7MztxqW01t6A6++8f53ni491zumfNjzpnPPB/JyTnnc77fc15w577u937u90eqCklSW/7BrANIkibPcpekBlnuktQgy12SGmS5S1KDTp51AIBTTz21tm7dOusYknRcueeee/6iqhYGvTYX5b5161aWlpZmHUOSjitJ/nyj15yWkaQGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBs3FEaqSxpOsHyvWDHphnhOKW+6S1CDLXZIaZLlLUoMsd0lq0KblnuSsJF9IciDJA0ne2Y2/JMm+JA9196f0rXN1koeTPJjkDdP8D5AkrTfMlvsR4N1V9ZPAq4ArkpwHXAXcVVXbgLu653Sv7QReDuwAPpzkpGmElyQNtmm5V9XBqrq3e/x94ABwBnAJcGO32I3Am7vHlwC3VNVTVfVt4GHgggnnliQ9i6Oac0+yFTgf+DJwelUdhN4PAOC0brEzgMf7VlvuxiRJx8jQ5Z7khcBtwLuq6nvPtuiAsXVHTyTZnWQpydLKysqwMSRJQxiq3JM8h16x31RVt3fDh5Js6V7fAhzuxpeBs/pWPxN4cu17VtWeqlqsqsWFhYHXd5UkjWiYvWUCXA8cqKoP9r10J7Cre7wL+HTf+M4kz0tyNrAN+MrkIkuSNjPMuWUuBC4Dvp7kvm7sPcB1wK1JLgceA94KUFUPJLkV+Aa9PW2uqKqnJx1ckrSxTcu9qv6YwfPoABdtsM61wLVj5JIkjcEjVCWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUHDnH5Akk5Yed/6A/TrmnUnup07brlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBg1zmb29SQ4n2d839skk93W3R1ev0JRka5K/7Xvto1PMLknawDD7ud8A/CHwP1cHqurfrj5O8gHgr/uWf6Sqtk8onyRpBMNcZu/uJFsHvdZdPPsXgddNOJckaQzjzrm/GjhUVQ/1jZ2d5KtJvpjk1RutmGR3kqUkSysrK2PGkCT1G7fcLwVu7nt+EPiJqjof+BXgE0leNGjFqtpTVYtVtbiwsDBmDElSv5HLPcnJwC8An1wdq6qnquo73eN7gEeAl40bUpJ0dMbZcv9Z4JtVtbw6kGQhyUnd43OAbcC3xosoSTpaw+wKeTPwJ8C5SZaTXN69tJNnTskAvAa4P8nXgE8B76iq704ysCRpc8PsLXPpBuP/fsDYbcBt48eSJI3DI1QlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ0a5kpMe5McTrK/b+w3kzyR5L7u9sa+165O8nCSB5O8YVrBJUkbG2bL/QZgx4DxD1XV9u72GYAk59G7/N7Lu3U+vHpNVUnSsbNpuVfV3cCw10G9BLilqp6qqm8DDwMXjJFPkjSCcebcr0xyfzdtc0o3dgbweN8yy93YOkl2J1lKsrSysjJGDEnSWqOW+0eAlwLbgYPAB7rxDFi2Br1BVe2pqsWqWlxYWBgxhiRpkJHKvaoOVdXTVfVD4GP8aOplGTirb9EzgSfHiyhJOlojlXuSLX1P3wKs7klzJ7AzyfOSnA1sA74yXkRJ0tE6ebMFktwMvBY4NckycA3w2iTb6U25PAq8HaCqHkhyK/AN4AhwRVU9PZXkkqQNbVruVXXpgOHrn2X5a4FrxwklSRqPR6hKUoMsd0lqkOUuSQ2y3CWpQZv+QVWalLxv/TFudc3AY9wkjcktd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDPIjpOJI1xwDVoAtflQcFSXLLXZKaZLlLUoM2Lfcke5McTrK/b+z3knwzyf1J7kjy4m58a5K/TXJfd/voFLNLkjYwzJb7DcCONWP7gFdU1SuBPwOu7nvtkara3t3eMZmYkqSjsWm5V9XdwHfXjH2+qo50T78EnDmFbJKkEU1izv1twGf7np+d5KtJvpjk1RutlGR3kqUkSysrKxOIIUlaNVa5J3kvcAS4qRs6CPxEVZ0P/ArwiSQvGrRuVe2pqsWqWlxYWBgnhiRpjZHLPcku4E3AL1X1dq6uqqeq6jvd43uAR4CXTSKoJGl4I5V7kh3ArwMXV9UP+sYXkpzUPT4H2AZ8axJBJUnD2/QI1SQ3A68FTk2yDFxDb++Y5wH70jts8kvdnjGvAX4ryRHgaeAdVfXdgW8sSZqaTcu9qi4dMHz9BsveBtw2bihJ0ng8QlWSGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yCsxSdIxkvetv3paXTOdq6e55S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ3atNyT7E1yOMn+vrGXJNmX5KHu/pS+165O8nCSB5O8YVrBJUkbG2bL/QZgx5qxq4C7qmobcFf3nCTnATuBl3frfHj1mqqSpGNn03KvqruBtddBvQS4sXt8I/DmvvFbquqpqvo28DBwwWSiSpKGNeqc++lVdRCguz+tGz8DeLxvueVubJ0ku5MsJVlaWVkZMYYkaZBJ/0F1/SnPYOApz6pqT1UtVtXiwsLChGNI0olt1HI/lGQLQHd/uBtfBs7qW+5M4MnR40mSRjFqud8J7Ooe7wI+3Te+M8nzkpwNbAO+Ml5ESdLR2vRiHUluBl4LnJpkGbgGuA64NcnlwGPAWwGq6oEktwLfAI4AV1TV01PKLknawKblXlWXbvDSRRssfy1w7TihJEnj8QhVSWqQ5S5JDbLcJalBlrskjSlZfxs8eOxY7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBm55+QNKJZdAeezXobN418GzemhNuuUtSg9xy13Ep71u/JVnXuCUprXLLXZIaZLlLUoNGnpZJci7wyb6hc4DfAF4M/Cdg9arX76mqz4z6OZKkozdyuVfVg8B2gCQnAU8AdwD/AfhQVb1/EgElSUdvUtMyFwGPVNWfT+j9JEljmFS57wRu7nt+ZZL7k+xNcsqgFZLsTrKUZGllZWXQIpKkEY29K2SS5wIXA1d3Qx8Bfhuo7v4DwNvWrldVe4A9AIuLi+7Dpplxt0q1aBJb7j8P3FtVhwCq6lBVPV1VPwQ+Blwwgc+QJB2FSZT7pfRNySTZ0vfaW4D9E/gMSdJRGGtaJsnzgdcDb+8b/t0k2+lNyzy65jVJ0jEwVrlX1Q+Af7xm7LKxEkknAE/ONT3+DaXHI1QlqUGWuyQ1yHKXdNxI1t/WDwgsd0lqkuUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAaNeyWmR4HvA08DR6pqMclLgE8CW+ldiekXq+ovx4spSToak9hy/5mq2l5Vi93zq4C7qmobcFf3XA0b6jSsnopVOqamMS1zCXBj9/hG4M1T+AxJ0rMYt9wL+HySe5Ls7sZOr6qDAN39aYNWTLI7yVKSpZWVlTFjSNPnbyg6now15w5cWFVPJjkN2Jfkm8OuWFV7gD0Ai4uLJ97VayVpisYq96p6srs/nOQO4ALgUJItVXUwyRbg8ARyakhe+V0SjDEtk+QFSX5s9THwc8B+4E5gV7fYLuDT44aUJB2dcbbcTwfuSG+O8WTgE1X1uSR/Ctya5HLgMeCt48eUJB2Nkcu9qr4F/NSA8e8AF40TSpI0Ho9QlaQGWe6S1CDLXZIaNO5+7mrc2l0r3a1SOj645S5JDbLcJalBlrskNchyl6QGWe6S1CD3lpHmmCeC06gsd0kz5w+xyXNaRpIaZLlLUoMsd0lqkOV+gvJ6oFLbLHc1bagfYlKDxrnM3llJvpDkQJIHkryzG//NJE8kua+7vXFycafPLVpJLRhnV8gjwLur6t7uWqr3JNnXvfahqnr/+PEkSaMY5zJ7B4GD3ePvJzkAnDGpYJKk0U1kzj3JVuB84Mvd0JVJ7k+yN8kpG6yzO8lSkqWVlZVJxJAkdcYu9yQvBG4D3lVV3wM+ArwU2E5vy/4Dg9arqj1VtVhViwsLC+PGkCT1GavckzyHXrHfVFW3A1TVoap6uqp+CHwMuGD8mJKkozHO3jIBrgcOVNUH+8a39C32FmD/6PEkSaMYZ2+ZC4HLgK8nua8bew9waZLtQAGPAm8f4zMkSSMYZ2+ZPwYG7fD9mdHjSJImwVP+zgFPdypp0jz9gKQTUutHo7vlLmkk/sY539xyl6QGWe6S1CCnZaQJc7pC86CJcl/7N48atIdm+c2lE5s/dE4sTstIUoMsd0lqkOUuSQ2y3Keo9YMkJM0vy12SGmS5S5oaf3udHctdkhpkuWuuuKUnTUYTBzHNwtoDQjwYRNI8mVq5J9kB/AFwEvDxqrpuWp81VB6PzpN0ApnKtEySk4D/Dvw8cB69S++dN43PkiStN6059wuAh6vqW1X1d8AtwCVT+ixJ0hqpKZxQK8m/AXZU1X/snl8G/IuqurJvmd3A7u7pucCDE/joU4G/mMD7TMO8ZpvXXGC2UcxrLjDbKDbL9U+ramHQC9Oacx+0O8MzfopU1R5gz0Q/NFmqqsVJvuekzGu2ec0FZhvFvOYCs41inFzTmpZZBs7qe34m8OSUPkuStMa0yv1PgW1Jzk7yXGAncOeUPkuStMZUpmWq6kiSK4H/Q29XyL1V9cA0PmuNiU7zTNi8ZpvXXGC2UcxrLjDbKEbONZU/qEqSZsvTD0hSgyx3SWpQE+WeZEeSB5M8nOSqWedZleSsJF9IciDJA0neOetMayU5KclXk/zvWWdZleTFST6V5Jvd/7t/OetMq5L81+5ruT/JzUn+4Qyz7E1yOMn+vrGXJNmX5KHu/pQ5yvZ73df0/iR3JHnxPOTqe+1Xk1SSU491rmfLluSXu357IMnvDvt+x325z/mpDo4A766qnwReBVwxR9lWvRM4MOsQa/wB8Lmq+mfATzEn+ZKcAfwXYLGqXkFvZ4GdM4x0A7BjzdhVwF1VtQ24q3s+CzewPts+4BVV9Urgz4Crj3UoBuciyVnA64HHjnWgPjewJluSn6F3dP8rq+rlwPuHfbPjvtyZ41MdVNXBqrq3e/x9eiV1xmxT/UiSM4F/DXx81llWJXkR8BrgeoCq+ruq+quZhnqmk4F/lORk4PnM8PiNqrob+O6a4UuAG7vHNwJvPpaZVg3KVlWfr6oj3dMv0Tv+Zea5Oh8C/htrDrY8ljbI9p+B66rqqW6Zw8O+XwvlfgbweN/zZeaoQFcl2QqcD3x5xlH6/T69f9A/nHGOfucAK8D/6KaLPp7kBbMOBVBVT9DbcnoMOAj8dVV9frap1jm9qg5Cb+MCOG3GeTbyNuCzsw4BkORi4Imq+tqsswzwMuDVSb6c5ItJ/vmwK7ZQ7pue6mDWkrwQuA14V1V9b9Z5AJK8CThcVffMOssaJwM/DXykqs4H/obZTS08Qzd/fQlwNvDjwAuS/LvZpjr+JHkvvSnLm+Ygy/OB9wK/MessGzgZOIXetO6vAbcmw12tpoVyn+tTHSR5Dr1iv6mqbp91nj4XAhcneZTeVNbrkvzRbCMBva/nclWt/obzKXplPw9+Fvh2Va1U1f8Hbgf+1YwzrXUoyRaA7n7oX+OPhSS7gDcBv1TzcZDNS+n9sP5a971wJnBvkn8y01Q/sgzcXj1fofdb9lB/8G2h3Of2VAfdT9jrgQNV9cFZ5+lXVVdX1ZlVtZXe/7P/W1Uz3wqtqv8HPJ7k3G7oIuAbM4zU7zHgVUme331tL2JO/tjb505gV/d4F/DpGWZ5hu4CPr8OXFxVP5h1HoCq+npVnVZVW7vvhWXgp7t/h/PgfwGvA0jyMuC5DHn2yuO+3Ls/0Kye6uAAcOsxOtXBMC4ELqO3VXxfd3vjrEMdB34ZuCnJ/cB24HdmG6en+23iU8C9wNfpff/M7LD1JDcDfwKcm2Q5yeXAdcDrkzxEb++PmVwBbYNsfwj8GLCv+1746JzkmgsbZNsLnNPtHnkLsGvY33g8/YAkNei433KXJK1nuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QG/T0eEhX1C1t3fgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.bar(X + 0.00, histo80k3, color = 'b', width = 0.25)\n",
"plt.bar(X + 0.25, histo90k3, color = 'r', width = 0.25)\n",
"plt.bar(X + 0.50, histo100k3, color = 'g', width = 0.25)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "1540d594",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n",
"0.9798736319567721\n",
"0.9798736319567721\n"
]
}
],
"source": [
"print(aggmaxprobhist(histo80k3, histo90k3))\n",
"print(aggmaxprobhist(histo80k3, histo100k3))\n",
"print(aggmaxprobhist(histo90k3, histo100k3))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment