cleanup

c3e61ba4 · Edoardo Sarti · 27a3d69a · c3e61ba4
Commit c3e61ba4 authored Apr 25, 2022 by Edoardo Sarti
Show whitespace changes
Inline Side-by-side

Showing with 34 additions and 38 deletions

mesureHistoramme.ipynb tree_similarity/mesureHistoramme.ipynb +34 -38

No files found.
--- a/tree_similarity/mesureHistoramme.ipynb
+++ b/tree_similarity/mesureHistoramme.ipynb
@@ -212,7 +212,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -226,37 +226,37 @@
      "noise                                                          0.000000 True\n",
      "Permutable                                                     0.000000 True\n",
      "non-proportional increment                                     0.000000 True\n",
-      "Random big numbers or zero                                     0.025459 False\n",
+      "Random big numbers or zero                                     0.024565 False\n",
      "max unbalance                                                  0.146087 False\n",
      "proportional increment                                         0.000000 True\n",
      "\n",
      "\n",
      "--- Incremental noise on signal ---\n",
      "0                                     0.000000 +/-   0.000000\n",
-      "1                                     0.000185 +/-   0.000440\n",
+      "1                                     0.000219 +/-   0.000476\n",
-      "2                                     0.001894 +/-   0.002282\n",
+      "2                                     0.001619 +/-   0.001852\n",
-      "3                                     0.005138 +/-   0.004323\n",
+      "3                                     0.005863 +/-   0.004867\n",
-      "4                                     0.010559 +/-   0.007702\n",
+      "4                                     0.008322 +/-   0.005766\n",
-      "5                                     0.012664 +/-   0.007561\n",
+      "5                                     0.013591 +/-   0.008541\n",
-      "6                                     0.015651 +/-   0.012095\n",
+      "6                                     0.017515 +/-   0.012170\n",
-      "7                                     0.020212 +/-   0.013827\n",
+      "7                                     0.020925 +/-   0.014721\n",
-      "8                                     0.026308 +/-   0.020254\n",
+      "8                                     0.024099 +/-   0.019659\n",
-      "9                                     0.029255 +/-   0.024522\n",
+      "9                                     0.024999 +/-   0.019011\n",
-      "10                                    0.031770 +/-   0.023523\n",
+      "10                                    0.032244 +/-   0.024834\n",
      "\n",
      "\n",
      "--- Incremental random noise ---\n",
      "0                                     0.000138 +/-   0.000000\n",
-      "1                                     0.025829 +/-   0.018906\n",
+      "1                                     0.025660 +/-   0.019466\n",
-      "2                                     0.033744 +/-   0.025267\n",
+      "2                                     0.027256 +/-   0.022967\n",
-      "3                                     0.028168 +/-   0.022969\n",
+      "3                                     0.030341 +/-   0.024569\n",
-      "4                                     0.028078 +/-   0.024013\n",
+      "4                                     0.027788 +/-   0.019176\n",
-      "5                                     0.029980 +/-   0.025617\n",
+      "5                                     0.023954 +/-   0.018645\n",
-      "6                                     0.028973 +/-   0.021527\n",
+      "6                                     0.025143 +/-   0.021361\n",
-      "7                                     0.028279 +/-   0.019776\n",
+      "7                                     0.030559 +/-   0.023147\n",
-      "8                                     0.028193 +/-   0.024633\n",
+      "8                                     0.025742 +/-   0.017046\n",
-      "9                                     0.029332 +/-   0.024630\n",
+      "9                                     0.026679 +/-   0.021437\n",
-      "10                                    0.025569 +/-   0.022864\n",
+      "10                                    0.026494 +/-   0.023600\n",
      "\n",
      "\n",
      "--- Incremental unbalanced distribution ---\n",
@@ -339,6 +339,7 @@
    "    am = np.argmax([pears(h,h2) for h in ps])\n",
    "    return am, ps[am], pears(ps[am],h2)\n",
    "\n",
+    "\"\"\"\n",
    "def probhist(hh1, hh2):\n",
    "    #pseudocounts\n",
    "    h1 = [x+1 for x in hh1]\n",
@@ -359,6 +360,7 @@
    "        res += rat*math.log((rat/ratsort), 2)\n",
    "    return res\n",
    "\n",
+    "\n",
    "def probhist(hh1, hh2, symm=True):\n",
    "    #Pseudocounts\n",
    "    eps = 0.00001\n",
@@ -390,33 +392,27 @@
    "        return res/2\n",
    "\n",
    "    return res\n",
+    "\"\"\"\n",
    "\n",
-    "\n",
+    "def probhist(hh1, hh2, symm=True):   \n",
-    "def probhist(hh1, hh2, symm=True):\n",
+    "    # Pseudocounts\n",
-    "    def likelihood(doubleh, d):\n",
-    "        diffs = np.zeros(len(doubleh)+1)\n",
-    "        for i in range(len(doubleh)):\n",
-    "            diffs[i] = abs(doubleh[i][0]-doubleh[i][1])\n",
-    "        diffs[-1] = 10\n",
-    "        return sum(diffs >= d)/(len(diffs))\n",
-    "    \n",
-    "    #NO Pseudocounts\n",
    "    eps = 0.00001\n",
    "    h1 = [x+(max(hh1)-min(hh1)+eps)/100 for x in hh1]\n",
    "    h2 = [x+(max(hh2)-min(hh2)+eps)/100 for x in hh2]\n",
    "    \n",
-    "    #Normalize histos\n",
+    "    # Normalize histos\n",
    "    s1, s2 = sum(h1), sum(h2)\n",
    "    nh1, nh2 = [x/s1 for x in h1], [x/s2 for x in h2]\n",
    "    \n",
-    "    #Couple histos and sort second\n",
+    "    # Couple histos and sort second\n",
    "    twonhs = list(zip(nh1,nh2))\n",
    "    twonhs = sorted(twonhs, key= lambda x:x[1])\n",
    "    \n",
-    "    #Sort both histos and then couple\n",
+    "    # Sort both histos and then couple\n",
    "    twosortednhs = list(zip(sorted(nh1), sorted(nh2)))\n",
    "    \n",
-    "    # SUM P1/P2 log2(P1/P1') = SUM P1/P2 log2((P1/P2) / (P1'/P2')) because P2 = P2'\n",
+    "    # Cross-mutual information\n",
+    "    # SUM f(h1,h2) log2 (f(h1,h2) / f(h1',h2'))   with h2 = h2'\n",
    "    res = 0\n",
    "    for i in range(len(h1)):\n",
    "        d = stable_sigmoid(abs(twonhs[i][0]-twonhs[i][1])-0.5)\n",
@@ -426,8 +422,7 @@
    "    return res\n",
    "\n",
    "\n",
-    "\n",
+    "\"\"\"\n",
-    "\n",
    "def probhist2(hh1, hh2):\n",
    "    def likelihood(doubleh, d):\n",
    "        diffs = np.zeros(len(doubleh)+1)\n",
@@ -451,6 +446,7 @@
    "    for i in range(len(h1)):\n",
    "        res += likelihood(twosortednhs, twonhs[i][0]-twonhs[i][1])*math.log(likelihood(twosortednhs, twonhs[i][0]-twonhs[i][1]), 2)\n",
    "    return res\n",
+    "\"\"\"\n",
    "\n",
    "def agg_probhist(h1, h2):\n",
    "    return probhist(agg_vector(h1), agg_vector(h2))\n",