Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
profileview
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Edoardo SARTI
profileview
Commits
2e428b54
Commit
2e428b54
authored
May 18, 2022
by
Edoardo Sarti
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tree similarity final notebook
parent
df3fb1f0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
532 additions
and
0 deletions
+532
-0
tree_similarity.ipynb
tree_similarity/tree_similarity.ipynb
+532
-0
No files found.
tree_similarity/tree_similarity.ipynb
0 → 100644
View file @
2e428b54
{
"cells": [
{
"cell_type": "code",
"execution_count": 82,
"id": "edb197fc",
"metadata": {},
"outputs": [],
"source": [
"from ete3 import Tree\n",
"import math\n",
"import random\n",
"import numpy as np\n",
"import scipy.stats"
]
},
{
"cell_type": "markdown",
"id": "38abced4",
"metadata": {},
"source": [
"# Toy models"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "55e52783",
"metadata": {},
"outputs": [],
"source": [
"def permutations(h,d=1):\n",
" \"\"\"Lists all binary permutations of histogram h (must have a length of a power of 2)\"\"\"\n",
" hlen = len(h)\n",
" if hlen == d:\n",
" return [h]\n",
" if hlen > 16:\n",
" return None\n",
" \n",
" iblist = []\n",
" for i in range(2**d):\n",
" ib = [int(x) for x in ('{:b}'.format(i)).zfill(d)]\n",
" iblist.append(ib)\n",
" res = []\n",
" binlength = int(hlen/d)\n",
" sme = []\n",
" for i in range(0,hlen,binlength):\n",
" sme.append((i,i+int(binlength/2),i+binlength))\n",
" hlist = []\n",
" for ib in iblist:\n",
" hnew = h[:]\n",
" for i, tripl in enumerate(sme):\n",
" if ib[i]:\n",
" s, m, e = tripl\n",
" hnew = hnew[:s] + hnew[m:e] + hnew[s:m] + hnew[e:]\n",
" hlist.append(hnew)\n",
" newlist = []\n",
" for hh in hlist:\n",
" newlist += permutations(hh,d*2)\n",
" return newlist"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "7a9df981",
"metadata": {},
"outputs": [],
"source": [
"def probhist(hh1, hh2, symm=True, v=False):\n",
" if v:\n",
" print(hh1)\n",
" print(hh2)\n",
" # Pseudocounts\n",
" eps = 0.00001\n",
" h1 = [x+(max(hh1)-min(hh1)+eps)/100 for x in hh1]\n",
" h2 = [x+(max(hh2)-min(hh2)+eps)/100 for x in hh2]\n",
" \n",
" # Normalize histos\n",
" s1, s2 = sum(h1), sum(h2)\n",
" nh1, nh2 = [x/s1 for x in h1], [x/s2 for x in h2]\n",
" if v:\n",
" print(\"Normalized histos\")\n",
" print(nh1)\n",
" print(nh2)\n",
" \n",
" # Couple histos and sort second\n",
" twonhs = list(zip(nh1,nh2))\n",
" twonhs = sorted(twonhs, key= lambda x:x[1])\n",
" if v:\n",
" print(\"TWONHS\")\n",
" print(twonhs)\n",
" \n",
" # Sort both histos and then couple\n",
" twosortednhs = list(zip(sorted(nh1), sorted(nh2)))\n",
" if v:\n",
" print(\"TWOSORTEDNHS\")\n",
" print(twosortednhs)\n",
" \n",
" # Probability model\n",
" dp = []\n",
" for i in range(len(nh1)):\n",
" for j in range(i, len(nh2)):\n",
" dp.append(abs(nh1[i] - nh2[j]))\n",
" mu = sum(dp)/len(dp)\n",
" mu2 = sum([x**2 for x in dp])/len(dp)\n",
" sigma = (mu2 - mu**2)**0.5\n",
" model = scipy.stats.norm(mu, sigma)\n",
" binsize = 0.01\n",
" \n",
" # Cross-mutual information\n",
" # SUM f(h1,h2) log2 (f(h1,h2) / f(h1',h2')) with h2 = h2'\n",
" res = 0\n",
" for i in range(len(h1)):\n",
" dif = abs(twonhs[i][0]-twonhs[i][1])\n",
" srtdif = abs(twosortednhs[i][0]-twosortednhs[i][1])\n",
" p = model.cdf(dif+binsize) - model.cdf(dif-binsize)\n",
" sp = model.cdf(srtdif+binsize) - model.cdf(srtdif-binsize)\n",
" res += p * math.log(p/sp, 2)\n",
" if v:\n",
" print(h1[i], p * math.log(p/sp, 2), p, sp, math.log(p/sp, 2), dif, srtdif)\n",
"\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "caf1a874",
"metadata": {},
"outputs": [],
"source": [
"def agg_vector(h):\n",
" l = len(h)\n",
" hlist = [h]\n",
" newh = [] # list of all the upper histos\n",
" while len(hlist[-1]) > 2:\n",
" for i, v in enumerate(hlist[-1]):\n",
" if i%2==0:\n",
" newh.append(v)\n",
" else:\n",
" newh[-1] += v\n",
" hlist.append(newh)\n",
" newh = []\n",
" res = []\n",
" for hh in hlist:\n",
" res += hh\n",
" return res\n",
"\n",
"def aggregated_permutations(h1):\n",
" ps = permutations(h1)\n",
" for ips in range(len(ps)):\n",
" ps[ips] = agg_vector(ps[ips])\n",
" return ps\n",
"\n",
"def aggmaxprobhist(h1, h2, allout=False):\n",
" aggps = aggregated_permutations(h1)\n",
" am = np.argmin([probhist(h,agg_vector(h2)) for h in aggps])\n",
" if allout:\n",
" return am, aggps[am], probhist(aggps[am],agg_vector(h2))\n",
" return probhist(aggps[am],agg_vector(h2), v=False)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "dff1ab66",
"metadata": {},
"outputs": [],
"source": [
"def tests(f):\n",
" answ = []\n",
" \n",
" # Reference\n",
" h1 = [10,9,8,7,12,10,7,5]\n",
" answ.append((h1, True, 'Reference'))\n",
" \n",
" # h1-h2 are similar (organisation problem) \n",
" h2 = [10,9,8,7,13,10,7,5]\n",
" answ.append((h2, True, 'Almost identical'))\n",
"\n",
" # h1-h3 are not similar (null hypothesis problem)\n",
" h3 = [10,5,8,7,12,10,7,9]\n",
" answ.append((h3, False, 'Identical sorting but not permutable'))\n",
" \n",
" # h1-h4 are similar (noise problem)\n",
" h4 = [11,8,7,8,11,9,8,6]\n",
" answ.append((h4, True, 'noise'))\n",
" \n",
" # h1-h5 are similar (topology problem)\n",
" h5 = [12,10,7,5,10,9,8,7]\n",
" answ.append((h5, True, 'Permutable'))\n",
"\n",
" # h1-h6 are similar (non-proportional incrementation problem)\n",
" h6 = [x+100000 for x in h1]\n",
" answ.append((h6, False, 'non-proportional increment'))\n",
"\n",
" # h1-h7 are not similar (null case)\n",
" h7 = [max(0,random.uniform(-1,1))*512 for x in h1]\n",
" answ.append((h7, False, 'Random big numbers or zero'))\n",
"\n",
" # h1-h8 are not similar (unbalance case)\n",
" h8 = [0, 0, 0, 0, 10000, 10000, 10000, 10000]\n",
" answ.append((h8, False, 'max unbalance'))\n",
" \n",
" # h1-h9 are similar (proportional incrementation problem)\n",
" h9 = [x*100 for x in h1]\n",
" answ.append((h9, True, 'proportional increment'))\n",
" \n",
" print(\"#### \" + f.__name__ + \" ####\")\n",
" for h, a, c in answ:\n",
" print(\"{0:50s} {1} {2:20.6f} {3}\".format(c, h, f(h1, h), a))\n",
" print(\"\\n\") \n",
" \n",
" print(\"--- Incremental noise on signal ---\")\n",
" for magn in range(11):\n",
" sph = 0\n",
" sqsph = 0\n",
" for rep in range(10):\n",
" coef = 3*magn/10\n",
" hr = [max(0,random.gauss(x, coef)) for x in h1]\n",
" ph = f(h1,hr)\n",
" sph += ph\n",
" sqsph += ph**2\n",
" #if ph > 50:\n",
" # print(h1, hr, magn, ph)\n",
" print(\"{0:<30d} {1:15.6f} +/- {2:10.6f}\".format(magn, sph/100, (sqsph/100 - (sph/100)**2)**0.5))\n",
" print(\"\\n\")\n",
" \n",
" print(\"--- Incremental random noise ---\")\n",
" for magn in range(11):\n",
" sph = 0\n",
" sqsph = 0\n",
" for rep in range(10):\n",
" hr = [random.random()*10*magn for x in h1]\n",
" ph = f(h1,hr)\n",
" sph += ph\n",
" sqsph += ph**2\n",
" #if ph > 50:\n",
" # print(h1, hr, magn, ph)\n",
" print(\"{0:<30d} {1:15.6f} +/- {2:10.6f}\".format(magn, sph/100, (sqsph/100 - (sph/100)**2)**0.5))\n",
" print(\"\\n\")\n",
" \n",
" print(\"--- Incremental unbalanced distribution ---\")\n",
" hrr = [0]*int(len(h1)/2) + [1]*int(len(h1)/2)\n",
" for magn in range(1,11):\n",
" hr = [1+x*10*magn for x in hrr]\n",
" print(\"{0:<30d} {1:15.6f}\".format(magn, f(h1,hr)))\n",
" print(\"---\")\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "markdown",
"id": "a7f41851",
"metadata": {},
"source": [
"## Tests"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "9eade642",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#### aggmaxprobhist ####\n",
"Reference [10, 9, 8, 7, 12, 10, 7, 5] 0.000000 True\n",
"Almost identical [10, 9, 8, 7, 13, 10, 7, 5] 0.000000 True\n",
"Identical sorting but not permutable [10, 5, 8, 7, 12, 10, 7, 9] 0.189489 False\n",
"noise [11, 8, 7, 8, 11, 9, 8, 6] 0.000000 True\n",
"Permutable [12, 10, 7, 5, 10, 9, 8, 7] 0.000000 True\n",
"non-proportional increment [100010, 100009, 100008, 100007, 100012, 100010, 100007, 100005] 0.000000 False\n",
"Random big numbers or zero [0, 0, 197.98556516655424, 172.2728994418659, 457.79335075776396, 152.4892738791217, 0, 225.23236097569952] 0.309266 False\n",
"max unbalance [0, 0, 0, 0, 10000, 10000, 10000, 10000] 0.263501 False\n",
"proportional increment [1000, 900, 800, 700, 1200, 1000, 700, 500] 0.000000 True\n",
"\n",
"\n",
"--- Incremental noise on signal ---\n",
"0 0.000000 +/- 0.000000\n",
"1 0.000000 +/- 0.000000\n",
"2 0.000749 +/- 0.004237\n",
"3 0.001959 +/- 0.008455\n",
"4 0.003080 +/- 0.013375\n",
"5 0.008437 +/- 0.033195\n",
"6 0.013145 +/- 0.051032\n",
"7 0.016885 +/- 0.059035\n",
"8 0.012466 +/- 0.043463\n",
"9 0.015080 +/- 0.053964\n",
"10 0.015644 +/- 0.059220\n",
"\n",
"\n",
"--- Incremental random noise ---\n",
"0 0.018806 +/- 0.056417\n",
"1 0.027545 +/- 0.088254\n",
"2 0.022396 +/- 0.071375\n",
"3 0.029703 +/- 0.091366\n",
"4 0.026686 +/- 0.085925\n",
"5 0.022741 +/- 0.073748\n",
"6 0.023412 +/- 0.073059\n",
"7 0.024663 +/- 0.084127\n",
"8 0.022264 +/- 0.075595\n",
"9 0.022961 +/- 0.070557\n",
"10 0.025343 +/- 0.080337\n",
"\n",
"\n",
"--- Incremental unbalanced distribution ---\n",
"1 0.230790\n",
"2 0.243396\n",
"3 0.249401\n",
"4 0.252661\n",
"5 0.254701\n",
"6 0.256097\n",
"7 0.257111\n",
"8 0.257881\n",
"9 0.258486\n",
"10 0.258973\n",
"---\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"tests(aggmaxprobhist)"
]
},
{
"cell_type": "markdown",
"id": "9fd5e884",
"metadata": {},
"source": [
"# Real trees "
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "7641e158",
"metadata": {},
"outputs": [],
"source": [
"def generate_histo(input_file,v):\n",
" t = Tree(input_file) # ouverture du fichier\n",
" l=[]\n",
" for n in t.get_descendants():\n",
" if t.get_distance(n,topology_only=True)==v:# test sur la distance\n",
" l.append(n)\n",
" histof=[]\n",
"\n",
" for x in range(len(l)):# pour toute les occurences dans l\n",
" histo=[]\n",
" for leaf in l[x]: # pour toute les feuilles dans L \n",
" histo.append(leaf)\n",
" histof.append(len(histo))# on recupere le nombre de feuille de chaque sous arbre\n",
" return histof"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "dc029957",
"metadata": {},
"outputs": [],
"source": [
"histo70k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.7_k20.nhx\",3)\n",
"histo80k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.8_k20.nhx\",3)\n",
"histo90k20=generate_histo(\"results_djeser/CrFBA3_PV00274_0.9_k20.nhx\",3)\n",
"agghisto70k20 = agg_vector(histo70k20)\n",
"agghisto80k20 = agg_vector(histo80k20)\n",
"agghisto90k20 = agg_vector(histo90k20)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "ae740395",
"metadata": {},
"outputs": [],
"source": [
"histo80k3=generate_histo(\"results_djeser/CrFBA3_PV00274_0.8_k3.nhx\",3)\n",
"histo90k3=generate_histo(\"results_djeser/CrFBA3_PV00274_0.9_k3.nhx\",3)\n",
"histo100k3=generate_histo(\"results_djeser/CrFBA3_PV00274_1.0_k3.nhx\",3)\n",
"agghisto80k3 = agg_vector(histo80k3)\n",
"agghisto90k3 = agg_vector(histo90k3)\n",
"agghisto100k3 = agg_vector(histo100k3)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "f95a30d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<BarContainer object of 16 artists>"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQDklEQVR4nO3df6zddX3H8edrrb/AGUp6y2qLKywVRaLD3DmUzDgrsZuE8sdYatTcTJZmCyoaNy0zGfEPFjKN02TTpQGkiQTWII5miY6m6sySibvgL0pFiLhSqPQ6549oglbf++N8Gy+3t/Tec86959xPn4+kOef7Od9zzgva+7qf8/11UlVIktryG6MOIEkaPstdkhpkuUtSgyx3SWqQ5S5JDVo96gAAa9eurU2bNo06hiStKPfdd9/3q2pivsfGotw3bdrE9PT0qGNI0oqS5H9O9pibZSSpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUFjcYaqpJUnH8wJY3W9X/4zLpy5S1KDLHdJapDlLkkNstwlqUGnLPcktyQ5muSBWWMfSvKtJN9I8pkkZ8167LokjyR5KMkblyi3JOkZLGTmfiuwdc7YPuCiqno58G3gOoAkFwLbgZd1z/l4klVDSytJWpBTlntVfQn4wZyxe6rqWLf4ZWBjd38bcEdVPVVVjwKPAK8aYl5J0gIMY5v724HPdvc3AI/NeuxwN3aCJDuSTCeZnpmZGUIMSdJxA5V7kg8Ax4Dbjg/Ns9q8ZzVU1a6qmqyqyYmJeb8CUJLUp77LPckUcDnwlqo6XuCHgXNnrbYReKL/eJLGQnLiH421vso9yVbg/cAVVfWzWQ/tBbYneU6S84DNwFcGjylJWoxTXlsmye3A64C1SQ4D19M7OuY5wL70foN/uar+oqoOJNkDPEhvc801VfXLpQovSZrfKcu9qt48z/DNz7D+DcANg4SSJA3GM1QlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNOmW5J7klydEkD8waOzvJviQPd7drZj12XZJHkjyU5I1LFVySdHILmbnfCmydM7YT2F9Vm4H93TJJLgS2Ay/rnvPxJKuGllaStCCnLPeq+hLwgznD24Dd3f3dwJWzxu+oqqeq6lHgEeBVw4kqSVqofre5n1NVRwC623Xd+AbgsVnrHe7GTpBkR5LpJNMzMzN9xpAkzWfYO1Qzz1jNt2JV7aqqyaqanJiYGHIMSTq99VvuTyZZD9DdHu3GDwPnzlpvI/BE//EkSf3ot9z3AlPd/Sng7lnj25M8J8l5wGbgK4NFlCQt1upTrZDkduB1wNokh4HrgRuBPUmuBg4BVwFU1YEke4AHgWPANVX1yyXKLkk6iVOWe1W9+SQPbTnJ+jcANwwSSpI0GM9QlaQGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAad8puYJKlFyYljVcufY6k4c5ekBlnuktQgy12SGmS5S1KD3KEqNaD1nYNaPGfuktSggco9yXuSHEjyQJLbkzw3ydlJ9iV5uLtdM6ywkqSF6bvck2wA3gVMVtVFwCpgO7AT2F9Vm4H93bIkaRkNullmNfC8JKuBM4AngG3A7u7x3cCVA76HJGmR+i73qnoc+DBwCDgC/Kiq7gHOqaoj3TpHgHXzPT/JjiTTSaZnZmb6jSFJmscgm2XW0Julnwe8EDgzyVsX+vyq2lVVk1U1OTEx0W8MSdI8Btks8wbg0aqaqapfAHcBrwGeTLIeoLs9OnhMSdJiDFLuh4BLkpyRJMAW4CCwF5jq1pkC7h4soiRpsfo+iamq7k1yJ3A/cAz4KrALeD6wJ8nV9H4BXDWMoJKkhRvoDNWquh64fs7wU/Rm8ZKkEfEMVUlqkOUuSQ2y3CWpQZa7JDXIS/5qaXgNWmmknLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUFeOEw6TeSDJ17Mra73Ym6tcuYuSQ2y3CWpQZa7JDXIbe6NcbuqJHDmLklNstwlqUEDlXuSs5LcmeRbSQ4meXWSs5PsS/Jwd7tmWGElLb3kxD9aeQaduX8M+FxVvQR4BXAQ2Ansr6rNwP5uWZK0jPou9yQvAF4L3AxQVT+vqh8C24Dd3Wq7gSsHiyj1z1moTleDzNzPB2aATyb5apKbkpwJnFNVRwC623XzPTnJjiTTSaZnZmYGiCFJmmuQcl8NvBL4RFVdDPyURWyCqapdVTVZVZMTExMDxJAkzTVIuR8GDlfVvd3ynfTK/skk6wG626ODRZQkLVbf5V5V3wMeS3JBN7QFeBDYC0x1Y1PA3QMllCQt2qBnqL4TuC3Js4HvAH9G7xfGniRXA4eAqwZ8D0nSIg1U7lX1NWBynoe2DPK6kqTBeIaqJDXIC4dJWtnmnrxQXigPnLlLUpMsd6lVnpp7WrPcJalBlrskNchyl6QGWe6S1CAPhdRpz++dVYss9z7NLQTLQIsx38ErHp6tYXKzjCQ1yHKXpAZZ7pLUIMt9JfMLQiWdhOUuSQ2y3CWpQR4KqV8b1+PzxjWXtEjLeU6FM3dJapAz9zFwupwhebr8d2q0RvHvbBw/XDpzl6SlMOKj2Sx3SWqQ5S5JDbLcJalBlrskNWjgo2WSrAKmgcer6vIkZwP/AmwCvgv8aVX936DvI83mkTfSMxvGzP1a4OCs5Z3A/qraDOzvliVJy2igck+yEXgTcNOs4W3A7u7+buDKQd5DkrR4g87cPwq8D/jVrLFzquoIQHe7bsD3kCQtUt/b3JNcDhytqvuSvK6P5+8AdgC86EUv6jeG1DT3Lahfg8zcLwWuSPJd4A7g9Uk+BTyZZD1Ad3t0vidX1a6qmqyqyYmJiQFiSJLm6rvcq+q6qtpYVZuA7cDnq+qtwF5gqlttCrh74JSSpEVZiuPcbwQuS/IwcFm3LElaRkO5KmRVfRH4Ynf/f4Etw3hdSVJ/PENVkhpkuUtSgyx3SWqQ5a7Tzwi/QEFaLn7NnsbKvF9XtvwxBuKJRxoHztwlqUGWuzQuRvydm2qL5S5JDbLcpUE429aYcofqHPPu0HNfmKQVxpm7JDXIcpekBlnuktQgy12SGuQOVUkrRgtnMC8XZ+6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQR4KqYF5eJo0fpy5S1KDLHdJapCbZfSM5n4fqN8FKq0Mfc/ck5yb5AtJDiY5kOTabvzsJPuSPNzdrhleXEnSQgyyWeYY8N6qeilwCXBNkguBncD+qtoM7O+Wl5ffjiPpNNd3uVfVkaq6v7v/E+AgsAHYBuzuVtsNXDlgRknSIg1lh2qSTcDFwL3AOVV1BHq/AIB1J3nOjiTTSaZnZmaGEWPp+ElA0goz8A7VJM8HPg28u6p+nAUWX1XtAnYBTE5OLvleurk7BsGdg9K4GOefz3HO9kwGmrkneRa9Yr+tqu7qhp9Msr57fD1wdLCIkqTFGuRomQA3Awer6iOzHtoLTHX3p4C7+48nSerHIJtlLgXeBnwzyde6sb8BbgT2JLkaOARcNVBCSdKi9V3uVfWfwMk2sG/p93UlSYPz8gOS1CDLXZIaZLlL0nENndNiuUtSg7wqpKQlM+8XuYz/+T9NcOa+3Ab42DfMT4sNffqUNA/LXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJalATFw6be10Ur0sk6XTnzF2SGtTEzH1czXu50+WPIek05MxdkhpkuUtSgyx3SWqQ29ylBXIfilaSJZu5J9ma5KEkjyTZuVTvI0k60ZKUe5JVwD8BfwRcCLw5yYVL8V6SVhi/wHdZLNXM/VXAI1X1nar6OXAHsG2J3kuSNEeqhr/VMMmfAFur6s+75bcBv19V75i1zg5gR7d4AfDQEN56LfD9IbzOUhjXbOOaC8zWj3HNBWbrx6ly/XZVTcz3wFLtUJ3vc9bTfotU1S5g11DfNJmuqslhvuawjGu2cc0FZuvHuOYCs/VjkFxLtVnmMHDurOWNwBNL9F6SpDmWqtz/G9ic5Lwkzwa2A3uX6L0kSXMsyWaZqjqW5B3AvwOrgFuq6sBSvNccQ93MM2Tjmm1cc4HZ+jGuucBs/eg715LsUJUkjZaXH5CkBlnuktSgJsp9XC91kOTcJF9IcjDJgSTXjjrTXElWJflqkn8bdZbjkpyV5M4k3+r+37161JmOS/Ke7u/ygSS3J3nuCLPckuRokgdmjZ2dZF+Sh7vbNWOU7UPd3+k3knwmyVnjkGvWY3+VpJKsXe5cz5QtyTu7fjuQ5O8X+norvtzH/FIHx4D3VtVLgUuAa8Yo23HXAgdHHWKOjwGfq6qXAK9gTPIl2QC8C5isqovoHSywfYSRbgW2zhnbCeyvqs3A/m55FG7lxGz7gIuq6uXAt4HrljsU8+ciybnAZcCh5Q40y63MyZbkD+md3f/yqnoZ8OGFvtiKL3fG+FIHVXWkqu7v7v+EXkltGG2qX0uyEXgTcNOosxyX5AXAa4GbAarq51X1w5GGerrVwPOSrAbOYITnb1TVl4AfzBneBuzu7u8GrlzOTMfNl62q7qmqY93il+md/zLyXJ1/AN7HCC/0eZJsfwncWFVPdescXejrtVDuG4DHZi0fZowK9Lgkm4CLgXtHHGW2j9L7B/2rEeeY7XxgBvhkt7nopiRnjjoUQFU9Tm/mdAg4Avyoqu4ZbaoTnFNVR6A3uQDWjTjPybwd+OyoQwAkuQJ4vKq+Puos83gx8AdJ7k3yH0l+b6FPbKHcT3mpg1FL8nzg08C7q+rHo84DkORy4GhV3TfqLHOsBl4JfKKqLgZ+yug2LTxNt/16G3Ae8ELgzCRvHW2qlSfJB+htsrxtDLKcAXwA+NtRZzmJ1cAaept1/xrYkyzsMpotlPtYX+ogybPoFfttVXXXqPPMcilwRZLv0tuU9foknxptJKD393m4qo5/wrmTXtmPgzcAj1bVTFX9ArgLeM2IM831ZJL1AN3tgj/GL4ckU8DlwFtqPE6y+R16v6y/3v0sbATuT/JbI031a4eBu6rnK/Q+ZS9oh28L5T62lzrofsPeDBysqo+MOs9sVXVdVW2sqk30/p99vqpGPgutqu8BjyW5oBvaAjw4wkizHQIuSXJG93e7hTHZ2TvLXmCquz8F3D3CLE+TZCvwfuCKqvrZqPMAVNU3q2pdVW3qfhYOA6/s/h2Og38FXg+Q5MXAs1ng1StXfLl3O2iOX+rgILBnmS51sBCXAm+jNyv+Wvfnj0cdagV4J3Bbkm8Avwv83Wjj9HSfJu4E7ge+Se/nZ2SnrSe5Hfgv4IIkh5NcDdwIXJbkYXpHf9w4Rtn+EfhNYF/3s/DPY5JrLJwk2y3A+d3hkXcAUwv9xOPlBySpQSt+5i5JOpHlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhr0//yGKcRs2GlBAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from matplotlib import pyplot as plt\n",
"X = np.arange(len(histo70k20))\n",
"plt.bar(X + 0.00, histo70k20, color = 'b', width = 0.25)\n",
"plt.bar(X + 0.25, histo80k20, color = 'r', width = 0.25)\n",
"plt.bar(X + 0.50, histo90k20, color = 'g', width = 0.25)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "8a4dd3b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8814593234696813\n",
"0.7869731847259129\n",
"0.4027629892694329\n"
]
}
],
"source": [
"print(aggmaxprobhist(histo70k20, histo80k20))\n",
"print(aggmaxprobhist(histo70k20, histo90k20))\n",
"print(aggmaxprobhist(histo80k20, histo90k20))"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "6c2ec90b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<BarContainer object of 16 artists>"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASPUlEQVR4nO3df6zldX3n8edrQd1Va8Tlwk6B7oAZ2aKxQ3OXdZdorNQ6dQ1os3aHdMnsyu7oBrq6td2CJqW2oSGtP9qkq2aUWdgUQSKwko26TlgjaVK1F0QcHCmgFC7MztxqW01t6A6++8f53ni491zumfNjzpnPPB/JyTnnc77fc15w577u937u90eqCklSW/7BrANIkibPcpekBlnuktQgy12SGmS5S1KDTp51AIBTTz21tm7dOusYknRcueeee/6iqhYGvTYX5b5161aWlpZmHUOSjitJ/nyj15yWkaQGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBs3FEaqSxpOsHyvWDHphnhOKW+6S1CDLXZIaZLlLUoMsd0lq0KblnuSsJF9IciDJA0ne2Y2/JMm+JA9196f0rXN1koeTPJjkDdP8D5AkrTfMlvsR4N1V9ZPAq4ArkpwHXAXcVVXbgLu653Sv7QReDuwAPpzkpGmElyQNtmm5V9XBqrq3e/x94ABwBnAJcGO32I3Am7vHlwC3VNVTVfVt4GHgggnnliQ9i6Oac0+yFTgf+DJwelUdhN4PAOC0brEzgMf7VlvuxiRJx8jQ5Z7khcBtwLuq6nvPtuiAsXVHTyTZnWQpydLKysqwMSRJQxiq3JM8h16x31RVt3fDh5Js6V7fAhzuxpeBs/pWPxN4cu17VtWeqlqsqsWFhYHXd5UkjWiYvWUCXA8cqKoP9r10J7Cre7wL+HTf+M4kz0tyNrAN+MrkIkuSNjPMuWUuBC4Dvp7kvm7sPcB1wK1JLgceA94KUFUPJLkV+Aa9PW2uqKqnJx1ckrSxTcu9qv6YwfPoABdtsM61wLVj5JIkjcEjVCWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUHDnH5Akk5Yed/6A/TrmnUnup07brlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBg1zmb29SQ4n2d839skk93W3R1ev0JRka5K/7Xvto1PMLknawDD7ud8A/CHwP1cHqurfrj5O8gHgr/uWf6Sqtk8onyRpBMNcZu/uJFsHvdZdPPsXgddNOJckaQzjzrm/GjhUVQ/1jZ2d5KtJvpjk1RutmGR3kqUkSysrK2PGkCT1G7fcLwVu7nt+EPiJqjof+BXgE0leNGjFqtpTVYtVtbiwsDBmDElSv5HLPcnJwC8An1wdq6qnquo73eN7gEeAl40bUpJ0dMbZcv9Z4JtVtbw6kGQhyUnd43OAbcC3xosoSTpaw+wKeTPwJ8C5SZaTXN69tJNnTskAvAa4P8nXgE8B76iq704ysCRpc8PsLXPpBuP/fsDYbcBt48eSJI3DI1QlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ0a5kpMe5McTrK/b+w3kzyR5L7u9sa+165O8nCSB5O8YVrBJUkbG2bL/QZgx4DxD1XV9u72GYAk59G7/N7Lu3U+vHpNVUnSsbNpuVfV3cCw10G9BLilqp6qqm8DDwMXjJFPkjSCcebcr0xyfzdtc0o3dgbweN8yy93YOkl2J1lKsrSysjJGDEnSWqOW+0eAlwLbgYPAB7rxDFi2Br1BVe2pqsWqWlxYWBgxhiRpkJHKvaoOVdXTVfVD4GP8aOplGTirb9EzgSfHiyhJOlojlXuSLX1P3wKs7klzJ7AzyfOSnA1sA74yXkRJ0tE6ebMFktwMvBY4NckycA3w2iTb6U25PAq8HaCqHkhyK/AN4AhwRVU9PZXkkqQNbVruVXXpgOHrn2X5a4FrxwklSRqPR6hKUoMsd0lqkOUuSQ2y3CWpQZv+QVWalLxv/TFudc3AY9wkjcktd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDPIjpOJI1xwDVoAtflQcFSXLLXZKaZLlLUoM2Lfcke5McTrK/b+z3knwzyf1J7kjy4m58a5K/TXJfd/voFLNLkjYwzJb7DcCONWP7gFdU1SuBPwOu7nvtkara3t3eMZmYkqSjsWm5V9XdwHfXjH2+qo50T78EnDmFbJKkEU1izv1twGf7np+d5KtJvpjk1RutlGR3kqUkSysrKxOIIUlaNVa5J3kvcAS4qRs6CPxEVZ0P/ArwiSQvGrRuVe2pqsWqWlxYWBgnhiRpjZHLPcku4E3AL1X1dq6uqqeq6jvd43uAR4CXTSKoJGl4I5V7kh3ArwMXV9UP+sYXkpzUPT4H2AZ8axJBJUnD2/QI1SQ3A68FTk2yDFxDb++Y5wH70jts8kvdnjGvAX4ryRHgaeAdVfXdgW8sSZqaTcu9qi4dMHz9BsveBtw2bihJ0ng8QlWSGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yCsxSdIxkvetv3paXTOdq6e55S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ3atNyT7E1yOMn+vrGXJNmX5KHu/pS+165O8nCSB5O8YVrBJUkbG2bL/QZgx5qxq4C7qmobcFf3nCTnATuBl3frfHj1mqqSpGNn03KvqruBtddBvQS4sXt8I/DmvvFbquqpqvo28DBwwWSiSpKGNeqc++lVdRCguz+tGz8DeLxvueVubJ0ku5MsJVlaWVkZMYYkaZBJ/0F1/SnPYOApz6pqT1UtVtXiwsLChGNI0olt1HI/lGQLQHd/uBtfBs7qW+5M4MnR40mSRjFqud8J7Ooe7wI+3Te+M8nzkpwNbAO+Ml5ESdLR2vRiHUluBl4LnJpkGbgGuA64NcnlwGPAWwGq6oEktwLfAI4AV1TV01PKLknawKblXlWXbvDSRRssfy1w7TihJEnj8QhVSWqQ5S5JDbLcJalBlrskjSlZfxs8eOxY7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBm55+QNKJZdAeezXobN418GzemhNuuUtSg9xy13Ep71u/JVnXuCUprXLLXZIaZLlLUoNGnpZJci7wyb6hc4DfAF4M/Cdg9arX76mqz4z6OZKkozdyuVfVg8B2gCQnAU8AdwD/AfhQVb1/EgElSUdvUtMyFwGPVNWfT+j9JEljmFS57wRu7nt+ZZL7k+xNcsqgFZLsTrKUZGllZWXQIpKkEY29K2SS5wIXA1d3Qx8Bfhuo7v4DwNvWrldVe4A9AIuLi+7Dpplxt0q1aBJb7j8P3FtVhwCq6lBVPV1VPwQ+Blwwgc+QJB2FSZT7pfRNySTZ0vfaW4D9E/gMSdJRGGtaJsnzgdcDb+8b/t0k2+lNyzy65jVJ0jEwVrlX1Q+Af7xm7LKxEkknAE/ONT3+DaXHI1QlqUGWuyQ1yHKXdNxI1t/WDwgsd0lqkuUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAaNeyWmR4HvA08DR6pqMclLgE8CW+ldiekXq+ovx4spSToak9hy/5mq2l5Vi93zq4C7qmobcFf3XA0b6jSsnopVOqamMS1zCXBj9/hG4M1T+AxJ0rMYt9wL+HySe5Ls7sZOr6qDAN39aYNWTLI7yVKSpZWVlTFjSNPnbyg6now15w5cWFVPJjkN2Jfkm8OuWFV7gD0Ai4uLJ97VayVpisYq96p6srs/nOQO4ALgUJItVXUwyRbg8ARyakhe+V0SjDEtk+QFSX5s9THwc8B+4E5gV7fYLuDT44aUJB2dcbbcTwfuSG+O8WTgE1X1uSR/Ctya5HLgMeCt48eUJB2Nkcu9qr4F/NSA8e8AF40TSpI0Ho9QlaQGWe6S1CDLXZIaNO5+7mrc2l0r3a1SOj645S5JDbLcJalBlrskNchyl6QGWe6S1CD3lpHmmCeC06gsd0kz5w+xyXNaRpIaZLlLUoMsd0lqkOV+gvJ6oFLbLHc1bagfYlKDxrnM3llJvpDkQJIHkryzG//NJE8kua+7vXFycafPLVpJLRhnV8gjwLur6t7uWqr3JNnXvfahqnr/+PEkSaMY5zJ7B4GD3ePvJzkAnDGpYJKk0U1kzj3JVuB84Mvd0JVJ7k+yN8kpG6yzO8lSkqWVlZVJxJAkdcYu9yQvBG4D3lVV3wM+ArwU2E5vy/4Dg9arqj1VtVhViwsLC+PGkCT1GavckzyHXrHfVFW3A1TVoap6uqp+CHwMuGD8mJKkozHO3jIBrgcOVNUH+8a39C32FmD/6PEkSaMYZ2+ZC4HLgK8nua8bew9waZLtQAGPAm8f4zMkSSMYZ2+ZPwYG7fD9mdHjSJImwVP+zgFPdypp0jz9gKQTUutHo7vlLmkk/sY539xyl6QGWe6S1CCnZaQJc7pC86CJcl/7N48atIdm+c2lE5s/dE4sTstIUoMsd0lqkOUuSQ2y3Keo9YMkJM0vy12SGmS5S5oaf3udHctdkhpkuWuuuKUnTUYTBzHNwtoDQjwYRNI8mVq5J9kB/AFwEvDxqrpuWp81VB6PzpN0ApnKtEySk4D/Dvw8cB69S++dN43PkiStN6059wuAh6vqW1X1d8AtwCVT+ixJ0hqpKZxQK8m/AXZU1X/snl8G/IuqurJvmd3A7u7pucCDE/joU4G/mMD7TMO8ZpvXXGC2UcxrLjDbKDbL9U+ramHQC9Oacx+0O8MzfopU1R5gz0Q/NFmqqsVJvuekzGu2ec0FZhvFvOYCs41inFzTmpZZBs7qe34m8OSUPkuStMa0yv1PgW1Jzk7yXGAncOeUPkuStMZUpmWq6kiSK4H/Q29XyL1V9cA0PmuNiU7zTNi8ZpvXXGC2UcxrLjDbKEbONZU/qEqSZsvTD0hSgyx3SWpQE+WeZEeSB5M8nOSqWedZleSsJF9IciDJA0neOetMayU5KclXk/zvWWdZleTFST6V5Jvd/7t/OetMq5L81+5ruT/JzUn+4Qyz7E1yOMn+vrGXJNmX5KHu/pQ5yvZ73df0/iR3JHnxPOTqe+1Xk1SSU491rmfLluSXu357IMnvDvt+x325z/mpDo4A766qnwReBVwxR9lWvRM4MOsQa/wB8Lmq+mfATzEn+ZKcAfwXYLGqXkFvZ4GdM4x0A7BjzdhVwF1VtQ24q3s+CzewPts+4BVV9Urgz4Crj3UoBuciyVnA64HHjnWgPjewJluSn6F3dP8rq+rlwPuHfbPjvtyZ41MdVNXBqrq3e/x9eiV1xmxT/UiSM4F/DXx81llWJXkR8BrgeoCq+ruq+quZhnqmk4F/lORk4PnM8PiNqrob+O6a4UuAG7vHNwJvPpaZVg3KVlWfr6oj3dMv0Tv+Zea5Oh8C/htrDrY8ljbI9p+B66rqqW6Zw8O+XwvlfgbweN/zZeaoQFcl2QqcD3x5xlH6/T69f9A/nHGOfucAK8D/6KaLPp7kBbMOBVBVT9DbcnoMOAj8dVV9frap1jm9qg5Cb+MCOG3GeTbyNuCzsw4BkORi4Imq+tqsswzwMuDVSb6c5ItJ/vmwK7ZQ7pue6mDWkrwQuA14V1V9b9Z5AJK8CThcVffMOssaJwM/DXykqs4H/obZTS08Qzd/fQlwNvDjwAuS/LvZpjr+JHkvvSnLm+Ygy/OB9wK/MessGzgZOIXetO6vAbcmw12tpoVyn+tTHSR5Dr1iv6mqbp91nj4XAhcneZTeVNbrkvzRbCMBva/nclWt/obzKXplPw9+Fvh2Va1U1f8Hbgf+1YwzrXUoyRaA7n7oX+OPhSS7gDcBv1TzcZDNS+n9sP5a971wJnBvkn8y01Q/sgzcXj1fofdb9lB/8G2h3Of2VAfdT9jrgQNV9cFZ5+lXVVdX1ZlVtZXe/7P/W1Uz3wqtqv8HPJ7k3G7oIuAbM4zU7zHgVUme331tL2JO/tjb505gV/d4F/DpGWZ5hu4CPr8OXFxVP5h1HoCq+npVnVZVW7vvhWXgp7t/h/PgfwGvA0jyMuC5DHn2yuO+3Ls/0Kye6uAAcOsxOtXBMC4ELqO3VXxfd3vjrEMdB34ZuCnJ/cB24HdmG6en+23iU8C9wNfpff/M7LD1JDcDfwKcm2Q5yeXAdcDrkzxEb++PmVwBbYNsfwj8GLCv+1746JzkmgsbZNsLnNPtHnkLsGvY33g8/YAkNei433KXJK1nuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QG/T0eEhX1C1t3fgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.bar(X + 0.00, histo80k3, color = 'b', width = 0.25)\n",
"plt.bar(X + 0.25, histo90k3, color = 'r', width = 0.25)\n",
"plt.bar(X + 0.50, histo100k3, color = 'g', width = 0.25)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "1540d594",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n",
"0.9798736319567721\n",
"0.9798736319567721\n"
]
}
],
"source": [
"print(aggmaxprobhist(histo80k3, histo90k3))\n",
"print(aggmaxprobhist(histo80k3, histo100k3))\n",
"print(aggmaxprobhist(histo90k3, histo100k3))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment