kelly push

6f37e209 · Kelly Chang · 332395dc · 6f37e209 · 6f37e209 · 6f37e209
Commit 6f37e209 authored Mar 24, 2022 by Kelly Chang
4 changed files
--- a/.ipynb_checkpoints/Encoding_Kelly-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Encoding_Kelly-checkpoint.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 447,
   "id": "8868bc30",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 448,
   "id": "76317b02",
   "metadata": {},
   "outputs": [],
@@ -72,14 +72,14 @@
    "    \"\"\"\n",
    "    tiff = []\n",
    "    for im in images:\n",
-    "        if im[-7:-6] == num:\n",
+    "        if im[-7:-5] == num:\n",
    "            tiff.append(im)\n",
    "    return tiff"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 449,
   "id": "be1ff8a1",
   "metadata": {},
   "outputs": [],
@@ -125,7 +125,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 440,
   "id": "8483903e",
   "metadata": {},
   "outputs": [],
@@ -171,237 +171,903 @@
    "    return nodes[0][0]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "c7104fbf",
+   "metadata": {},
+   "source": [
+    "### Huffman without dividing into bins"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 86,
-   "id": "64a3a193",
+   "execution_count": 401,
+   "id": "a43f3f1c",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.444949904726781\n"
+     ]
+    }
+   ],
   "source": [
-    "def reconstruct(error, A):\n",
-    "    \"\"\"\n",
-    "    Function that reconstructs the original image\n",
-    "    from the error matrix and using the predictive\n",
-    "    algorithm developed in the encoding.\n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "def huffman_nb(image):\n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
    "    \n",
-    "    Parameters:\n",
-    "        error (array): matrix of errors computed in encoding. Same \n",
-    "                       shape as the original image (512, 640) in this case\n",
-    "        A (array): Matrix used for the system of equations to create predictions\n",
-    "    Returns: \n",
-    "        image (array): The reconstructed image\n",
-    "    \"\"\"\n",
-    "    new_e = error.copy()\n",
-    "    rows, columns = new_e.shape\n",
-    "\n",
-    "    for r in range(1, rows-1):\n",
-    "        for c in range(1, columns-1):\n",
-    "            z0, z1, z2, z3 = new_e[r-1][c-1], new_e[r-1][c], new_e[r-1][c+1], new_e[r][c-1]\n",
-    "            y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    #ab_error = np.abs(new_error)\n",
+    "    #string = [str(i) for i in ab_error]\n",
+    "    string = [str(i) for i in new_error.astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    \n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encoding = huffman_code_tree(node)\n",
+    "    #encoded = [\"1\"+encoding[str(-i)] if i < 0 else \"0\"+encoding[str(i)] for i in error]\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    return encoding, new_error, image.reshape(-1)\n",
+    "    \n",
+    "    \n",
+    "def compress_rate_nb(image, error, encoding):\n",
+    "    #original = original.reshape(-1)\n",
+    "    #error = error.reshape(-1)\n",
+    "    o_len = 0\n",
+    "    c_len = 0\n",
+    "    for i in range(0, len(original)):\n",
+    "        o_len += len(bin(original[i])[2:])\n",
+    "        c_len += len(encoding[str(int(error[i]))])\n",
    "\n",
-    "            '''if r == 345 and c == 421:\n",
-    "                print(new_e[r][c])\n",
-    "                print(np.linalg.solve(A,y)[-1])\n",
-    "                print(new_e[r][c] + np.linalg.solve(A,y)[-1])\n",
-    "                print(np.ceil(new_e[r][c]) + np.floor(np.linalg.solve(A,y)[-1]))\n",
-    "                \n",
-    "                y0 = np.ravel(-z0+z2-z3)\n",
-    "    y1 = np.ravel(z0+z1+z2)\n",
-    "    y2 = np.ravel(-z0-z1-z2-z3)\n",
-    "    y = np.vstack((y0,y1,y2))\n",
-    "    # use numpy solver to solve the system of equations all at once\n",
-    "    predict = np.floor(np.linalg.solve(A,y)[-1])\n",
-    "    # flatten the neighbor pixlels and stack them together\n",
-    "    z0 = np.ravel(z0)\n",
-    "    z1 = np.ravel(z1)\n",
-    "    z2 = np.ravel(z2)\n",
-    "    z3 = np.ravel(z3)\n",
-    "    neighbor = np.vstack((z0,z1,z2,z3)).T'''\n",
    "        \n",
-    "            #Real solution that works, DO NOT DELETE\n",
-    "            print(new_e[r][c]+ np.floor(np.linalg.solve(A,y)[-1]))\n",
-    "            new_e[r][c] = new_e[r][c] + np.floor(np.linalg.solve(A,y)[-1])\n",
-    "            print(new_e[r][c])\n",
-    "            #new_e[r][c] = np.ceil(new_e[r][c]) + np.floor(np.linalg.solve(A,y)[-1])\n",
-    "            \n",
-    "    return new_e"
+    "    return c_len/o_len\n",
+    "\n",
+    "\n",
+    "encoding, error, image = huffman_nb(images[0])\n",
+    "print(compress_rate_nb(image, error, encoding))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eac2f456",
+   "metadata": {},
+   "source": [
+    "### Huffman with dividing into non-uniform bins"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 132,
-   "id": "91879f19",
+   "execution_count": 497,
+   "id": "207b0bd2",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "257\n"
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'images'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_20810/3539667741.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     96\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     97\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mc_len\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mo_len\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0mscenes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfile_extractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     99\u001b[0m \u001b[0mimages\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimage_extractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscenes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    100\u001b[0m \u001b[0mencode1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mboundary\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhuffman\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_20810/3921095921.py\u001b[0m in \u001b[0;36mfile_extractor\u001b[0;34m(dirname)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfile_extractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"images\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mscenes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mfile\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mscenes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'images'"
     ]
-    },
+    }
+   ],
+   "source": [
+    "def huffman(image):\n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "\n",
+    "    \n",
+    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    boundary = boundary - image[0,0]\n",
+    "    boundary[0] = image[0,0]\n",
+    "\n",
+    "    string = [str(i) for i in boundary]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode1 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff <= 25\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode2 = huffman_code_tree(node)\n",
+    "\n",
+    "    \n",
+    "    mask = diff > 25\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 40\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode3 = huffman_code_tree(node)\n",
+    "    \n",
+    "\n",
+    "    mask = diff > 40\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 70\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode4 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff > 70\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode5 = huffman_code_tree(node)\n",
+    "\n",
+    "\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    bins = [25,40,70]\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary, bins\n",
+    "\n",
+    "def compress_rate(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
+    "    #original = original.reshape(-1)\n",
+    "    #error = error.reshape(-1)\n",
+    "    o_len = 0\n",
+    "    c_len = 0\n",
+    "    im = np.reshape(image,(512, 640))\n",
+    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
+    "    original = im[1:-1,1:-1].reshape(-1)\n",
+    "\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        c_len += len(encode1[str(bound[i])])\n",
+    "    \n",
+    "    for i in range(0, len(original)):\n",
+    "        o_len += len(bin(original[i])[2:])\n",
+    "        if diff[i] <= 25:\n",
+    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "\n",
+    "        if diff[i] <= 40 and diff[i] > 25:\n",
+    "            c_len += len(encode3[str(int(error[i]))])\n",
+    "   \n",
+    "        if diff[i] <= 70 and diff[i] > 40:\n",
+    "            c_len += len(encode4[str(int(error[i]))])\n",
+    "            \n",
+    "        if diff[i] > 70:\n",
+    "            c_len += len(encode5[str(int(error[i]))])\n",
+    " \n",
+    "    return c_len/o_len\n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "encode1, encode2, encode3, encode4, encode5, image, error, diff, boundary = huffman(images[0])\n",
+    "compress_rate(image, error, diff, boundary, encode1, encode2, encode3, encode4, encode5)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a3f06a5",
+   "metadata": {},
+   "source": [
+    "### Huffman with dividing into uniform bins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 415,
+   "id": "14075c94",
+   "metadata": {},
+   "outputs": [
    {
     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQCklEQVR4nO3df6zdd13H8efLwQjhR9bRS23azg6txkHi2JqtiWhQtOu2mI6oZPxhG1yoCVsCiSYW90cJk2SYgLERZ4Y0dAaZi4BrYFhKgxL/6OgdlHY/GL2MLmvTtYXODYMZTt/+cT53+VrO6b29v865vc9HcnK+5/39nu95n2+/97z6/XG+J1WFJGlp+5lhNyBJGj7DQJJkGEiSDANJEoaBJAl4xbAbmKnly5fX2rVrh92GJC0qjzzyyA+qauzc+qINg7Vr1zI+Pj7sNiRpUUnydL+6u4kkSYaBJMkwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQi/gayNBfWbv/Sy8PH7r55iJ1Iw+WWgSTJMJAkGQaSJAwDSRKGgSQJzyaS+vIsIy01bhlIkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwmsTaYnwWkPS+bllIEkyDCRJhoEkCcNAksQ0wiDJmiRfS/J4kseSvL/VL0+yL8nRdr+s1ZNkZ5KJJIeTXNOZ19Y2/dEkWzv1a5Mcac/ZmSTz8WYlSf1NZ8vgJeCPq+oqYANwe5KrgO3A/qpaB+xvjwFuBNa12zbgHuiFB7ADuB64DtgxGSBtmvd2nrdp9m9NkjRdU4ZBVZ2sqm+24R8BTwCrgM3A7jbZbuCWNrwZuK96DgCXJVkJ3ADsq6qzVfUcsA/Y1Ma9vqoOVFUB93XmJUlaABd0zCDJWuCtwMPAiqo62UY9C6xow6uAZzpPO95q56sf71Pv9/rbkownGT9z5syFtC5JOo9ph0GS1wKfAz5QVS90x7X/0dcc9/ZTqureqlpfVevHxsbm++UkacmYVhgkeSW9IPhMVX2+lU+1XTy0+9OtfgJY03n66lY7X311n7okaYFM52yiAJ8Cnqiqj3dG7QEmzwjaCjzYqW9pZxVtAJ5vu5P2AhuTLGsHjjcCe9u4F5JsaK+1pTMvSdICmM61iX4V+APgSJJDrfZnwN3AA0luA54G3tXGPQTcBEwAPwbeA1BVZ5PcBRxs0324qs624fcBnwZeDXy53SRJC2TKMKiqfwcGnff/jj7TF3D7gHntAnb1qY8Db5mqF2nYvOCdLlZ+A1mSZBhIkgwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkMb0ft5EWDX9vQJoZtwwkSYaBJMkwkCRhGEiS8ACylqDuQWZJPYaB1BgSWsrcTSRJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJ+KUzaUp+GU1LgVsGkiTDQJJkGEiSMAwkSXgAWZoT/vayFrsptwyS7EpyOsmjndqHkpxIcqjdbuqM+2CSiSRPJrmhU9/UahNJtnfqVyZ5uNX/Mcmlc/kGJUlTm85uok8Dm/rU/7Kqrm63hwCSXAXcCry5PedvklyS5BLgE8CNwFXAu9u0AB9t8/oF4Dngttm8IUnShZsyDKrq68DZac5vM3B/Vb1YVd8HJoDr2m2iqp6qqp8A9wObkwT4TeCf2vN3A7dc2FuQJM3WbA4g35HkcNuNtKzVVgHPdKY53mqD6m8A/qOqXjqn3leSbUnGk4yfOXNmFq1LkrpmGgb3AD8PXA2cBD42Vw2dT1XdW1Xrq2r92NjYQrykJC0JMzqbqKpOTQ4n+STwxfbwBLCmM+nqVmNA/YfAZUle0bYOutNLkhbIjLYMkqzsPHwnMHmm0R7g1iSvSnIlsA74BnAQWNfOHLqU3kHmPVVVwNeA32vP3wo8OJOeJEkzN+WWQZLPAm8Hlic5DuwA3p7kaqCAY8AfAVTVY0keAB4HXgJur6r/afO5A9gLXALsqqrH2kv8KXB/kj8HvgV8aq7enCRpeqYMg6p6d5/ywA/sqvoI8JE+9YeAh/rUn6J3tpEkaUj8BrIuWl56Wpo+r00kSTIMJEmGgSQJw0CShGEgScIwkCRhGEiSMAwkSfilMy1S/sykNLcMA2mG/IazLibuJpIkuWUgzTV3YWkxcstAkmQYSJIMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkvByFLgJeME6aPbcMJEmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKYRhgk2ZXkdJJHO7XLk+xLcrTdL2v1JNmZZCLJ4STXdJ6ztU1/NMnWTv3aJEfac3YmyVy/SUnS+U1ny+DTwKZzatuB/VW1DtjfHgPcCKxrt23APdALD2AHcD1wHbBjMkDaNO/tPO/c15IkzbMpw6Cqvg6cPae8GdjdhncDt3Tq91XPAeCyJCuBG4B9VXW2qp4D9gGb2rjXV9WBqirgvs68JEkLZKbHDFZU1ck2/Cywog2vAp7pTHe81c5XP96n3leSbUnGk4yfOXNmhq1Lks416wPI7X/0NQe9TOe17q2q9VW1fmxsbCFeUpKWhJmGwam2i4d2f7rVTwBrOtOtbrXz1Vf3qUuSFtBMw2APMHlG0FbgwU59SzuraAPwfNudtBfYmGRZO3C8Edjbxr2QZEM7i2hLZ16SpAUy5W8gJ/ks8HZgeZLj9M4Kuht4IMltwNPAu9rkDwE3ARPAj4H3AFTV2SR3AQfbdB+uqsmD0u+jd8bSq4Evt5skaQFNGQZV9e4Bo97RZ9oCbh8wn13Arj71ceAtU/UhSZo/U4aBNCxrt3/p5eFjd988xE6ki5+Xo5AkuWUgLRS3dDTK3DKQJBkGkiTDQJKEYSBJwjCQJGEYSJLw1FItEt3TMiXNPcNAmkeGmBYLdxNJkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShD97qRHQ/WnIY3ffPMROpKXLLQNJkmEgSXI3kTR07ibTKDAMNFK6H4ySFo67iSRJbhlIw+AWkEbNrLYMkhxLciTJoSTjrXZ5kn1Jjrb7Za2eJDuTTCQ5nOSazny2tumPJtk6u7ckSbpQc7Gb6Deq6uqqWt8ebwf2V9U6YH97DHAjsK7dtgH3QC88gB3A9cB1wI7JAJEkLYz5OGawGdjdhncDt3Tq91XPAeCyJCuBG4B9VXW2qp4D9gGb5qEvSdIAsw2DAr6S5JEk21ptRVWdbMPPAiva8Crgmc5zj7faoLokaYHM9gDy26rqRJI3AvuSfKc7sqoqSc3yNV7WAmcbwBVXXDFXs5WkJW9WWwZVdaLdnwa+QG+f/6m2+4d2f7pNfgJY03n66lYbVO/3evdW1fqqWj82Njab1iVJHTMOgySvSfK6yWFgI/AosAeYPCNoK/BgG94DbGlnFW0Anm+7k/YCG5MsaweON7aaJGmBzGY30QrgC0km5/MPVfUvSQ4CDyS5DXgaeFeb/iHgJmAC+DHwHoCqOpvkLuBgm+7DVXV2Fn1Jki7QjMOgqp4CfqVP/YfAO/rUC7h9wLx2Abtm2oskaXa8HIUkyTCQJBkGkiQMA0kShoEkCcNAkoS/Z6AF5M87SqPLMNBQ+OMu0mgxDKQRMigk3ZLSfPOYgSTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiS8NpHmgVcnnV8uX80Hw0DzyquTSouDYSAtAoaq5pvHDCRJhoEkyTCQJGEYSJIwDCRJGAaSJDy1VLPg6Y7Dd75/A7+QpgvhloEkyTCQJBkGkiQMA0kSHkDWNHigePHzSqeaimEgXaQMcV0IdxNJkkZnyyDJJuCvgEuAv6uqu4fc0pLj/ySXhkH/zu4+WtpGIgySXAJ8Avht4DhwMMmeqnp8uJ1dnPzQVz+GxNI2EmEAXAdMVNVTAEnuBzYDhsE0+OGu+XSh65fhsTiNShisAp7pPD4OXH/uREm2Advaw/9M8uQcvPZy4AdzMJ+Fspj6tdf5M7L95qM/VRrZXgdYTP3OpNef61cclTCYlqq6F7h3LueZZLyq1s/lPOfTYurXXufPYup3MfUKi6vfuex1VM4mOgGs6Txe3WqSpAUwKmFwEFiX5MoklwK3AnuG3JMkLRkjsZuoql5Kcgewl96ppbuq6rEFevk53e20ABZTv/Y6fxZTv4upV1hc/c5Zr6mquZqXJGmRGpXdRJKkITIMJElLKwyS/H6Sx5L8b5L1nfraJP+V5FC7/W1n3LVJjiSZSLIzSYbZaxv3wdbPk0lu6NQ3tdpEku0L0Wc/ST6U5ERned7UGde392EaleU2SJJjbR08lGS81S5Psi/J0Xa/bIj97UpyOsmjnVrf/tKzsy3rw0muGYFeR3J9TbImydeSPN4+C97f6vOzbKtqydyAXwZ+CfhXYH2nvhZ4dMBzvgFsAAJ8GbhxyL1eBXwbeBVwJfA9egfdL2nDbwIubdNcNaTl/CHgT/rU+/Y+5HViZJbbeXo8Biw/p/YXwPY2vB346BD7+3Xgmu7f0KD+gJva31Ha39XDI9DrSK6vwErgmjb8OuC7rad5WbZLasugqp6oqml/aznJSuD1VXWgekv7PuCW+eqv6zy9bgbur6oXq+r7wAS9y3m8fEmPqvoJMHlJj1EyqPdhWgzLrZ/NwO42vJsFWi/7qaqvA2fPKQ/qbzNwX/UcAC5rf2cLYkCvgwx1fa2qk1X1zTb8I+AJeldrmJdlu6TCYApXJvlWkn9L8muttorepTEmHW+1Yep36Y5V56kPyx1tU3VXZxfGqPUIo9nTuQr4SpJH2iVZAFZU1ck2/CywYjitDTSov1Fd3iO9viZZC7wVeJh5WrYj8T2DuZTkq8DP9hl1Z1U9OOBpJ4ErquqHSa4F/jnJm+etyWaGvY6E8/UO3APcRe9D7C7gY8AfLlx3F523VdWJJG8E9iX5TndkVVWSkT1HfNT7Y8TX1ySvBT4HfKCqXugetpzLZXvRhUFV/dYMnvMi8GIbfiTJ94BfpHdJjNWdSef0Mhkz6ZXzX7pjwS7pMd3ek3wS+GJ7OIqXHRnFnv6fqjrR7k8n+QK9XRWnkqysqpNtV8DpoTb50wb1N3LLu6pOTQ6P2vqa5JX0guAzVfX5Vp6XZetuIiDJWHq/qUCSNwHrgKfaptgLSTa0s4i2AMP+H/se4NYkr0pyJb1ev8EIXdLjnP2U7wQmz9wY1Pswjcxy6yfJa5K8bnIY2Ehvee4BtrbJtjL89fJcg/rbA2xpZ75sAJ7v7PIYilFdX9tnzqeAJ6rq451R87NsF+rI+Cjc6P1DH6e3FXAK2Nvqvws8BhwCvgn8Tuc56+mtHN8D/pr2re1h9drG3dn6eZLO2U30zib4bht35xCX898DR4DDbQVdOVXvQ14vRmK5DejtTfTOaPl2W0fvbPU3APuBo8BXgcuH2ONn6e1q/e+2zt42qD96Z7p8oi3rI3TOlBtiryO5vgJvo7fr6nD7bDrU1tV5WbZejkKS5G4iSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEvB/DrQFsWZUkGEAAAAASUVORK5CYII=\n",
      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
+       "0.4432273356119792"
      ]
     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    },
+     "execution_count": 415,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def huffman_u(image):\n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "\n",
+    "    \n",
+    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    boundary = boundary - image[0,0]\n",
+    "    boundary[0] = image[0,0]\n",
+    "\n",
+    "    string = [str(i) for i in boundary]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode1 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff <= 100\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode2 = huffman_code_tree(node)\n",
+    "\n",
+    "    \n",
+    "    mask = diff > 100\n",
+    "    #new_error = error[mask]\n",
+    "    #mask2 = diff[mask] <= 200\n",
+    "    #string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode3 = huffman_code_tree(node)\n",
+    "    \n",
+    "\n",
+    "    '''mask = diff > 200\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 300\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode4 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff > 300\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode5 = huffman_code_tree(node)'''\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    #return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
+    "    \n",
+    "    return encode1, encode2, encode3, np.ravel(image), error, diff, boundary\n",
+    "\n",
+    "#def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
+    "def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3):\n",
+    "    #original = original.reshape(-1)\n",
+    "    #error = error.reshape(-1)\n",
+    "    o_len = 0\n",
+    "    c_len = 0\n",
+    "    im = np.reshape(image,(512, 640))\n",
+    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
+    "    original = im[1:-1,1:-1].reshape(-1)\n",
+    "\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        c_len += len(encode1[str(bound[i])])\n",
+    "    \n",
+    "    for i in range(0, len(original)):\n",
+    "        o_len += len(bin(original[i])[2:])\n",
+    "        if diff[i] <= 100:\n",
+    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "            \n",
+    "        if diff[i] > 100:\n",
+    "            c_len += len(encode3[str(int(error[i]))])\n",
+    "\n",
+    "        '''if diff[i] <= 200 and diff[i] > 100:\n",
+    "            c_len += len(encode3[str(int(error[i]))])'''\n",
+    "   \n",
+    "        '''if diff[i] <= 300 and diff[i] > 200:\n",
+    "            c_len += len(encode4[str(int(error[i]))])\n",
+    "            \n",
+    "        if diff[i] > 300:\n",
+    "            c_len += len(encode5[str(int(error[i]))])'''\n",
+    " \n",
+    "    return c_len/o_len\n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "encode1, encode2, encode3, image, error, diff, boundary = huffman_u(images[0])\n",
+    "compress_rate_u(image, error, diff, boundary, encode1, encode2, encode3)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8b93cc5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 417,
+   "id": "6abed5da",
+   "metadata": {},
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "-154.0\n",
-      "197.0\n"
+      "Compression rate of huffman with different bins: 0.44946919759114584\n",
+      "Compression rate of huffman without bins: 0.4513634314749933\n",
+      "Compression rate of huffman with uniform bins: 0.44956921895345053\n"
     ]
    }
   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "origin, predict, diff, error, A = plot_hist(images[0])\n",
-    "image = Image.open(images[0])    #Open the image and read it as an Image object\n",
-    "image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
-    "image = image.astype(int)\n",
-    "print(len(set(list(error))))\n",
-    "plt.hist(error,100)\n",
-    "plt.show()\n",
-    "print(min(error))\n",
-    "print(max(error))"
+    "num_images = im_distribution(images, \"_9\")\n",
+    "rate = []\n",
+    "rate_nb = []\n",
+    "rate_u = []\n",
+    "for i in range(len(num_images)):\n",
+    "    encode1, encode2, encode3, encode4, encode5, image, error, diff, bound = huffman(num_images[i])\n",
+    "    r = compress_rate(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5)\n",
+    "    rate.append(r)\n",
+    "    encoding, error, image = huffman_nb(num_images[i])\n",
+    "    r = compress_rate_nb(image, error, encoding)\n",
+    "    rate_nb.append(r)\n",
+    "    encode1, encode2, encode3, image, error, diff, bound = huffman_u(num_images[i])\n",
+    "    r = compress_rate_u(image, error, diff, bound, encode1, encode2, encode3)\n",
+    "    rate_u.append(r)\n",
+    "    \n",
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "print(f\"Compression rate of huffman without bins: {np.mean(rate_nb)}\")\n",
+    "print(f\"Compression rate of huffman with uniform bins: {np.mean(rate_u)}\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 145,
-   "id": "207b0bd2",
+   "execution_count": 430,
+   "id": "15eecad3",
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVyUlEQVR4nO3db4yd5X3m8e9VHAhKC7bD1ItsZ+2obiqSVQhMwVG6VYo3xkAU86JFRKvipSzeDSRKtJW6TvPCu9BIkEpLYiklQuBit2mJQ5PFClB36iTd3RcGhj+BAGE9EBBjAXZjA+2igkh+++LchsNwZuYY22dm7O9HOjr383vu55n75oy5zvPnnElVIUk6vv3STA9AkjTzDANJkmEgSTIMJEkYBpIkYN5MD+CdOu2002rZsmUzPQxJmjPuv//+f6yqoV7r5mwYLFu2jNHR0ZkehiTNGUmemWzdtKeJknwgyUNdj5eTfCHJwiQjSXa35wWtf5JsSjKW5OEkZ3Xta13rvzvJuq762UkeadtsSpLDnbQkqX/ThkFVPVFVZ1bVmcDZwCvAd4ENwM6qWgHsbMsAFwAr2mM9cCNAkoXARuBc4Bxg48EAaX2u7NpuzZGYnCSpP4d6AXkV8GRVPQOsBba0+hbg4tZeC2ytjl3A/CSnA+cDI1W1v6oOACPAmrbulKraVZ2PQ2/t2pckaQAONQwuBf66tRdV1XOt/TywqLUXA892bTPealPVx3vU3ybJ+iSjSUb37dt3iEOXJE2m7zBIciLwKeDbE9e1d/RH/UuOquqmqhququGhoZ4XxCVJ78ChHBlcADxQVS+05RfaKR7a895W3wMs7dpuSatNVV/Soy5JGpBDCYNP8+YpIoDtwME7gtYBd3TVL2t3Fa0EXmqnk3YAq5MsaBeOVwM72rqXk6xsdxFd1rUvSdIA9PU5gyTvAT4B/Keu8nXAtiRXAM8Al7T6XcCFwBidO48uB6iq/UmuBe5r/a6pqv2tfRVwK3AycHd7SJIGJHP17xkMDw+XHzqTpP4lub+qhnutm7OfQJZm2rINd77Rfvq6i2ZwJNLh84vqJEmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiS6DMMksxPcnuSnyR5PMlHkyxMMpJkd3te0PomyaYkY0keTnJW137Wtf67k6zrqp+d5JG2zaYkOfJTlY6eZRvufOMhzUX9Hhl8DfjbqvoN4MPA48AGYGdVrQB2tmWAC4AV7bEeuBEgyUJgI3AucA6w8WCAtD5Xdm235vCmJUk6FNOGQZJTgd8GbgGoqteq6kVgLbClddsCXNzaa4Gt1bELmJ/kdOB8YKSq9lfVAWAEWNPWnVJVu6qqgK1d+5IkDUA/RwbLgX3Anyd5MMnNSd4DLKqq51qf54FFrb0YeLZr+/FWm6o+3qP+NknWJxlNMrpv374+hi5J6kc/YTAPOAu4sao+Avw/3jwlBEB7R19HfnhvVVU3VdVwVQ0PDQ0d7R8nSceNfsJgHBivqnva8u10wuGFdoqH9ry3rd8DLO3afkmrTVVf0qMuSRqQacOgqp4Hnk3ygVZaBTwGbAcO3hG0DrijtbcDl7W7ilYCL7XTSTuA1UkWtAvHq4Edbd3LSVa2u4gu69qXJGkA5vXZ73PAN5OcCDwFXE4nSLYluQJ4Brik9b0LuBAYA15pfamq/UmuBe5r/a6pqv2tfRVwK3AycHd7SJIGpK8wqKqHgOEeq1b16FvA1ZPsZzOwuUd9FPhQP2ORJB15fgJZkmQYSJIMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRJ9/g1kSR3LNtw500OQjgqPDCRJ/YVBkqeTPJLkoSSjrbYwyUiS3e15QasnyaYkY0keTnJW137Wtf67k6zrqp/d9j/Wts2RnqgkaXKHcmTwO1V1ZlUNt+UNwM6qWgHsbMsAFwAr2mM9cCN0wgPYCJwLnANsPBggrc+VXdutecczkiQdssM5TbQW2NLaW4CLu+pbq2MXMD/J6cD5wEhV7a+qA8AIsKatO6WqdlVVAVu79iVJGoB+w6CAv0tyf5L1rbaoqp5r7eeBRa29GHi2a9vxVpuqPt6jLkkakH7vJvqtqtqT5FeBkSQ/6V5ZVZWkjvzw3qoF0XqA973vfUf7x0nScaOvI4Oq2tOe9wLfpXPO/4V2iof2vLd13wMs7dp8SatNVV/So95rHDdV1XBVDQ8NDfUzdElSH6YNgyTvSfIrB9vAauDHwHbg4B1B64A7Wns7cFm7q2gl8FI7nbQDWJ1kQbtwvBrY0da9nGRlu4vosq59SZIGoJ/TRIuA77a7PecBf1VVf5vkPmBbkiuAZ4BLWv+7gAuBMeAV4HKAqtqf5Frgvtbvmqra39pXAbcCJwN3t4ckaUCmDYOqegr4cI/6z4BVPeoFXD3JvjYDm3vUR4EP9TFeSdJR4CeQJUmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQOIQySnJDkwSTfa8vLk9yTZCzJt5Kc2OonteWxtn5Z1z6+2OpPJDm/q76m1caSbDiC85MGbtmGO994SHPFoRwZfB54vGv5euCGqvo14ABwRatfARxo9RtaP5KcAVwKfBBYA/xZC5gTgK8DFwBnAJ9ufSVJA9JXGCRZAlwE3NyWA5wH3N66bAEubu21bZm2flXrvxa4raperaqfAmPAOe0xVlVPVdVrwG2tryRpQPo9Mvgq8EfAL9rye4EXq+r1tjwOLG7txcCzAG39S63/G/UJ20xWf5sk65OMJhndt29fn0OXJE1n2jBI8klgb1XdP4DxTKmqbqqq4aoaHhoamunhSNIxY14ffT4GfCrJhcC7gVOArwHzk8xr7/6XAHta/z3AUmA8yTzgVOBnXfWDureZrC5JGoBpjwyq6otVtaSqltG5APz9qvr3wA+A323d1gF3tPb2tkxb//2qqla/tN1ttBxYAdwL3AesaHcnndh+xvYjMjtJUl/6OTKYzH8FbkvyJ8CDwC2tfgvwF0nGgP10/udOVT2aZBvwGPA6cHVV/RwgyWeBHcAJwOaqevQwxiVJOkSHFAZV9UPgh639FJ07gSb2+Rfg9ybZ/svAl3vU7wLuOpSxSJKOHD+BLEkyDCRJhoEkCcNAksTh3U0kHRf8wjkdDzwykCQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIk+giDJO9Ocm+SHyV5NMl/b/XlSe5JMpbkW0lObPWT2vJYW7+sa19fbPUnkpzfVV/TamNJNhyFeUqSptDPkcGrwHlV9WHgTGBNkpXA9cANVfVrwAHgitb/CuBAq9/Q+pHkDOBS4IPAGuDPkpyQ5ATg68AFwBnAp1tfSdKATBsG1fHPbfFd7VHAecDtrb4FuLi117Zl2vpVSdLqt1XVq1X1U2AMOKc9xqrqqap6Dbit9ZUkDUhf1wzaO/iHgL3ACPAk8GJVvd66jAOLW3sx8CxAW/8S8N7u+oRtJqv3Gsf6JKNJRvft29fP0CVJfegrDKrq51V1JrCEzjv53ziag5piHDdV1XBVDQ8NDc3EECTpmHRIdxNV1YvAD4CPAvOTzGurlgB7WnsPsBSgrT8V+Fl3fcI2k9UlSQPSz91EQ0nmt/bJwCeAx+mEwu+2buuAO1p7e1umrf9+VVWrX9ruNloOrADuBe4DVrS7k06kc5F5+xGYmySpT/Om78LpwJZ2188vAduq6ntJHgNuS/InwIPALa3/LcBfJBkD9tP5nztV9WiSbcBjwOvA1VX1c4AknwV2ACcAm6vq0SM2Q0nStKYNg6p6GPhIj/pTdK4fTKz/C/B7k+zry8CXe9TvAu7qY7ySpKPATyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgSaKPv4Es6Z1btuHON9pPX3fRDI5EmppHBpIkw0CS1EcYJFma5AdJHkvyaJLPt/rCJCNJdrfnBa2eJJuSjCV5OMlZXfta1/rvTrKuq352kkfaNpuS5GhMVpLUWz9HBq8Df1hVZwArgauTnAFsAHZW1QpgZ1sGuABY0R7rgRuhEx7ARuBc4Bxg48EAaX2u7NpuzeFPTZLUr2nDoKqeq6oHWvufgMeBxcBaYEvrtgW4uLXXAlurYxcwP8npwPnASFXtr6oDwAiwpq07pap2VVUBW7v2JUkagEO6ZpBkGfAR4B5gUVU911Y9Dyxq7cXAs12bjbfaVPXxHvVeP399ktEko/v27TuUoUuSptB3GCT5ZeBvgC9U1cvd69o7+jrCY3ubqrqpqoaranhoaOho/zhJOm70FQZJ3kUnCL5ZVd9p5RfaKR7a895W3wMs7dp8SatNVV/Soy5JGpB+7iYKcAvweFX9j65V24GDdwStA+7oql/W7ipaCbzUTiftAFYnWdAuHK8GdrR1LydZ2X7WZV37kiQNQD+fQP4Y8PvAI0kearU/Bq4DtiW5AngGuKStuwu4EBgDXgEuB6iq/UmuBe5r/a6pqv2tfRVwK3AycHd7SDOm+5PD0vFg2jCoqv8DTHbf/6oe/Qu4epJ9bQY296iPAh+abiySpKPDTyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiS6CMMkmxOsjfJj7tqC5OMJNndnhe0epJsSjKW5OEkZ3Vts671351kXVf97CSPtG02JcmRnqQkaWr9HBncCqyZUNsA7KyqFcDOtgxwAbCiPdYDN0InPICNwLnAOcDGgwHS+lzZtd3EnyVJOsqmDYOq+l/A/gnltcCW1t4CXNxV31odu4D5SU4HzgdGqmp/VR0ARoA1bd0pVbWrqgrY2rUvSdKAvNNrBouq6rnWfh5Y1NqLgWe7+o232lT18R71npKsTzKaZHTfvn3vcOiSpIkO+wJye0dfR2As/fysm6pquKqGh4aGBvEjJem48E7D4IV2iof2vLfV9wBLu/otabWp6kt61CVJA/ROw2A7cPCOoHXAHV31y9pdRSuBl9rppB3A6iQL2oXj1cCOtu7lJCvbXUSXde1LOqYs23DnGw9ptpk3XYckfw18HDgtyTidu4KuA7YluQJ4Brikdb8LuBAYA14BLgeoqv1JrgXua/2uqaqDF6WvonPH0snA3e0hSRqgacOgqj49yapVPfoWcPUk+9kMbO5RHwU+NN04JElHj59AliRNf2QgHS88l6/jmUcGkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkvC7iaQZ0f09SE9fd9EMjkTq8MhAkmQYSJI8TaTjnF9bLXV4ZCBJMgwkSYaBJAmvGUgzzttMNRt4ZCBJmj1HBknWAF8DTgBurqrrZnhIOkZ5B5H0drMiDJKcAHwd+AQwDtyXZHtVPTazI5MGy1NGmimzIgyAc4CxqnoKIMltwFrAMNA7NtePACYbvyGho2G2hMFi4Nmu5XHg3ImdkqwH1rfFf07yxBT7PA34xyM2wpl1LM0FnM9hyfVHdfe+NrPb4c7nX0+2YraEQV+q6ibgpn76JhmtquGjPKSBOJbmAs5nNjuW5gLO51DMlruJ9gBLu5aXtJokaQBmSxjcB6xIsjzJicClwPYZHpMkHTdmxWmiqno9yWeBHXRuLd1cVY8e5m77Op00RxxLcwHnM5sdS3MB59O3VNXR2rckaY6YLaeJJEkzyDCQJB0bYZDkc0l+kuTRJF/pqn8xyViSJ5Kc31Vf02pjSTbMzKinluQPk1SS09pykmxqY344yVldfdcl2d0e62Zu1G+X5E/ba/Nwku8mmd+1bs6+PjB3xtktydIkP0jyWPv38vlWX5hkpP0OjSRZ0OqT/t7NFklOSPJgku+15eVJ7mlj/la7KYUkJ7XlsbZ+2YwOvIck85Pc3v7NPJ7kowN7bapqTj+A3wH+HjipLf9qez4D+BFwErAceJLOxekTWvv9wImtzxkzPY8Jc1pK52L6M8BprXYhcDcQYCVwT6svBJ5qzwtae8FMz6FrLquBea19PXD9XH992vjnxDh7jPt04KzW/hXg/7bX4ivAhlbf0PU69fy9m00P4L8AfwV8ry1vAy5t7W8An2ntq4BvtPalwLdmeuw95rIF+I+tfSIwf1CvzbFwZPAZ4LqqehWgqva2+lrgtqp6tap+CozR+dqLN776oqpeAw5+9cVscgPwR0D31f21wNbq2AXMT3I6cD4wUlX7q+oAMAKsGfiIJ1FVf1dVr7fFXXQ+QwJz+/WBuTPOt6iq56rqgdb+J+BxOt8AsJbO/4hozxe39mS/d7NCkiXARcDNbTnAecDtrcvEuRyc4+3AqtZ/VkhyKvDbwC0AVfVaVb3IgF6bYyEMfh34t+2w7x+S/Gar9/qKi8VT1GeFJGuBPVX1owmr5uR8JvgDOu9kYO7PZ66Mc1LtNMlHgHuARVX1XFv1PLCotWf7PL9K543TL9rye4EXu96AdI/3jbm09S+1/rPFcmAf8OfttNfNSd7DgF6bWfE5g+kk+XvgX/VY9SU6c1hI5zDpN4FtSd4/wOEdsmnm88d0Tq3MGVPNp6ruaH2+BLwOfHOQY1NvSX4Z+BvgC1X1cvcb5KqqJLP+nvMknwT2VtX9ST4+w8M5EuYBZwGfq6p7knyNzmmhNxzN12ZOhEFV/bvJ1iX5DPCd6pxEuzfJL+h8mdNUX3Exo199Mdl8kvwbOu8OftT+cS4BHkhyDpPPZw/w8Qn1Hx7xQU9hqtcHIMl/AD4JrGqvE8zi16dPc/YrVJK8i04QfLOqvtPKLyQ5vaqea6caDp5unc3z/BjwqSQXAu8GTqHzN1HmJ5nX3v13j/fgXMaTzANOBX42+GFPahwYr6p72vLtdMJgMK/NTF8wOQIXXP4zcE1r/zqdw6YAH+StFyifonPRb15rL+fNC38fnOl5TDK3p3nzAvJFvPVi0b2tvhD4KZ2Lxwtae+FMj71rDmvofBX50IT6nH595so4e4w7wFbgqxPqf8pbL1J+Zarfu9n2oPOG6OAF5G/z1gvIV7X21bz1AvK2mR53j3n8b+ADrf3f2usykNdmxid/BP7jnQj8JfBj4AHgvK51X6Jzx8cTwAVd9Qvp3EXxJJ1TGTM+j0nm1h0GofMHgJ4EHgGGu/r9AZ0LsGPA5TM97glzGKMT0A+1xzeOoddnToxzwph/i86NCQ93vSYX0jl3vhPYTefuvIXT/d7NpseEMHg/cG/73fs2b95p+O62PNbWv3+mx91jHmcCo+31+Z903uAN5LXx6ygkScfE3USSpMNkGEiSDANJkmEgScIwkCRhGEiSMAwkScD/B/uJlQ2NeHvcAAAAAElFTkSuQmCC\n",
      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
+       "0.4421759033203125"
      ]
     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    },
+     "execution_count": 430,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def huffman6(image):\n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "\n",
+    "    \n",
+    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    boundary = boundary - image[0,0]\n",
+    "    boundary[0] = image[0,0]\n",
+    "\n",
+    "    string = [str(i) for i in boundary]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode1 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff <= 5\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode2 = huffman_code_tree(node)\n",
+    "\n",
+    "    \n",
+    "    mask = diff > 5\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 15\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode3 = huffman_code_tree(node)\n",
+    "    \n",
+    "\n",
+    "    mask = diff > 15\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 30\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode4 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff > 30\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 50\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode5 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff > 50\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode6 = huffman_code_tree(node)\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    return encode1, encode2, encode3, encode4, encode5, encode6, np.ravel(image), error, diff, boundary\n",
+    "\n",
+    "def compress_rate6(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5, encode6):\n",
+    "    #original = original.reshape(-1)\n",
+    "    #error = error.reshape(-1)\n",
+    "    o_len = 0\n",
+    "    c_len = 0\n",
+    "    im = np.reshape(image,(512, 640))\n",
+    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
+    "    original = im[1:-1,1:-1].reshape(-1)\n",
+    "\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        c_len += len(encode1[str(bound[i])])\n",
+    "    \n",
+    "    for i in range(0, len(original)):\n",
+    "        o_len += len(bin(original[i])[2:])\n",
+    "        if diff[i] <= 5:\n",
+    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "\n",
+    "        if diff[i] <= 15 and diff[i] > 5:\n",
+    "            c_len += len(encode3[str(int(error[i]))])\n",
+    "   \n",
+    "        if diff[i] <= 30 and diff[i] > 15:\n",
+    "            c_len += len(encode4[str(int(error[i]))])\n",
+    "            \n",
+    "        if diff[i] <= 50 and diff[i] > 30:\n",
+    "            c_len += len(encode5[str(int(error[i]))])\n",
+    "            \n",
+    "        if diff[i] > 50:\n",
+    "            c_len += len(encode6[str(int(error[i]))])\n",
+    " \n",
+    "    return c_len/o_len\n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "encode1, encode2, encode3, encode4, encode5, encode6, image, error, diff, boundary = huffman(images[0])\n",
+    "compress_rate(image, error, diff, boundary, encode1, encode2, encode3, encode4, encode5, encode6)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 431,
+   "id": "f8a8c717",
+   "metadata": {},
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "458\n",
-      "{'61': '000000000', '87': '000000001000', '140': '000000001001000', '142': '000000001001001', '250': '000000001001010', '179': '000000001001011', '151': '000000001001100', '207': '0000000010011010', '141': '0000000010011011', '101': '00000000100111', '77': '00000000101', '69': '0000000011', '53': '00000001', '44': '0000001', '33': '000001', '20': '00001', '19': '00010', '18': '00011', '43': '0010000', '75': '00100010000', '84': '001000100010', '104': '00100010001100', '122': '001000100011010', '120': '001000100011011', '94': '0010001000111', '83': '001000100100', '153': '001000100101000', '130': '001000100101001', '143': '001000100101010', '127': '001000100101011', '93': '0010001001011', '115': '0010001001100000', '375': '0010001001100001', '366': '0010001001100010', '373': '0010001001100011', '377': '0010001001100100', '314': '0010001001100101', '402': '0010001001100110', '367': '0010001001100111', '133': '0010001001101000', '210': '0010001001101001', '334': '0010001001101010', '298': '0010001001101011', '176': '0010001001101100', '332': '0010001001101101', '335': '0010001001101110', '362': '0010001001101111', '371': '0010001001110000', '333': '0010001001110001', '251': '0010001001110010', '226': '0010001001110011', '247': '0010001001110100', '315': '0010001001110101', '304': '0010001001110110', '258': '0010001001110111', '306': '0010001001111000', '261': '0010001001111001', '289': '0010001001111010', '372': '0010001001111011', '239': '0010001001111100', '252': '0010001001111101', '224': '0010001001111110', '205': '0010001001111111', '60': '001000101', '52': '00100011', '32': '001001', '17': '00101', '51': '00110000', '139': '001100010000000', '149': '001100010000001', '132': '001100010000010', '154': '001100010000011', '163': '001100010000100', '162': '001100010000101', '281': '001100010000110', '346': '001100010000111', '384': '00110001000100000', '331': '00110001000100001', '383': '00110001000100010', '155': '00110001000100011', '354': '00110001000100100', '456': '00110001000100101', '229': '00110001000100110', '388': '00110001000100111', '294': '00110001000101000', '413': '00110001000101001', '211': '00110001000101010', '292': '00110001000101011', '376': '00110001000101100', '382': '00110001000101101', '387': '00110001000101110', '309': '00110001000101111', '158': '001100010001100', '159': '001100010001101', '313': '00110001000111000', '126': '00110001000111001', '327': '00110001000111010', '319': '00110001000111011', '302': '00110001000111100', '330': '00110001000111101', '385': '00110001000111110', '573': '00110001000111111', '76': '00110001001', '214': '0011000101000000', '259': '0011000101000001', '243': '0011000101000010', '255': '0011000101000011', '410': '0011000101000100', '399': '0011000101000101', '134': '0011000101000110', '270': '0011000101000111', '216': '0011000101001000', '236': '0011000101001001', '213': '0011000101001010', '196': '0011000101001011', '290': '0011000101001100', '231': '0011000101001101', '128': '0011000101001110', '193': '0011000101001111', '169': '0011000101010000', '178': '0011000101010001', '328': '0011000101010010', '160': '0011000101010011', '394': '0011000101010100', '336': '0011000101010101', '204': '0011000101010110', '227': '0011000101010111', '200': '0011000101011000', '212': '0011000101011001', '352': '0011000101011010', '147': '0011000101011011', '342': '0011000101011100', '308': '0011000101011101', '329': '0011000101011110', '379': '0011000101011111', '221': '00110001011000000', '267': '00110001011000001', '269': '001100010110000100', '177': '001100010110000101', '220': '001100010110000110', '202': '001100010110000111', '225': '00110001011000100', '185': '00110001011000101', '170': '00110001011000110', '198': '00110001011000111', '429': '001100010110010000', '206': '001100010110010001', '426': '001100010110010010', '438': '001100010110010011', '403': '001100010110010100', '424': '001100010110010101', '299': '001100010110010110', '325': '001100010110010111', '237': '001100010110011000', '152': '001100010110011001', '145': '001100010110011010', '230': '001100010110011011', '411': '001100010110011100', '286': '001100010110011101', '374': '001100010110011110', '469': '001100010110011111', '293': '00110001011010000', '406': '00110001011010001', '407': '00110001011010010', '421': '00110001011010011', '301': '00110001011010100', '275': '00110001011010101', '423': '00110001011010110', '395': '00110001011010111', '244': '00110001011011000', '337': '00110001011011001', '300': '00110001011011010', '233': '00110001011011011', '322': '00110001011011100', '400': '00110001011011101', '253': '00110001011011110', '361': '00110001011011111', '297': '001100010111000000', '390': '001100010111000001', '444': '001100010111000010', '242': '001100010111000011', '606': '001100010111000100', '498': '001100010111000101', '397': '001100010111000110', '532': '001100010111000111', '209': '001100010111001000', '283': '001100010111001001', '430': '001100010111001010', '351': '001100010111001011', '539': '001100010111001100', '530': '001100010111001101', '256': '001100010111001110', '491': '001100010111001111', '511': '001100010111010000', '570': '001100010111010001', '559': '001100010111010010', '478': '001100010111010011', '359': '001100010111010100', '22554': '001100010111010101', '512': '001100010111010110', '503': '001100010111010111', '474': '001100010111011000', '489': '001100010111011001', '404': '001100010111011010', '380': '001100010111011011', '519': '001100010111011100', '568': '001100010111011101', '515': '001100010111011110', '543': '001100010111011111', '439': '001100010111100000', '418': '001100010111100001', '419': '001100010111100010', '425': '001100010111100011', '454': '001100010111100100', '228': '001100010111100101', '447': '001100010111100110', '452': '001100010111100111', '414': '001100010111101000', '435': '001100010111101001', '416': '001100010111101010', '345': '001100010111101011', '401': '001100010111101100', '440': '001100010111101101', '409': '001100010111101110', '350': '001100010111101111', '494': '001100010111110000', '482': '001100010111110001', '502': '001100010111110010', '393': '001100010111110011', '422': '001100010111110100', '370': '001100010111110101', '461': '001100010111110110', '369': '001100010111110111', '398': '001100010111111000', '445': '001100010111111001', '203': '001100010111111010', '249': '001100010111111011', '405': '001100010111111100', '486': '001100010111111101', '467': '001100010111111110', '470': '001100010111111111', '68': '0011000110', '74': '00110001110', '91': '0011000111100', '114': '001100011110100', '307': '001100011110101', '199': '001100011110110', '110': '001100011110111', '89': '0011000111110', '109': '00110001111110', '105': '001100011111110', '150': '0011000111111110', '129': '0011000111111111', '42': '0011001', '31': '001101', '16': '00111', '15': '01000', '67': '0100100000', '66': '0100100001', '59': '010010001', '50': '01001001', '41': '0100101', '30': '010011', '14': '01010', '13': '01011', '12': '01100', '29': '011010', '49': '01101100', '73': '01101101000', '82': '011011010010', '88': '0110110100110', '90': '0110110100111', '264': '0110110101000000', '272': '0110110101000001', '181': '0110110101000010', '218': '0110110101000011', '368': '0110110101000100', '347': '0110110101000101', '186': '0110110101000110', '248': '0110110101000111', '365': '0110110101001000', '187': '0110110101001001', '165': '0110110101001010', '164': '0110110101001011', '305': '0110110101001100', '161': '0110110101001101', '271': '0110110101001110', '340': '0110110101001111', '386': '0110110101010000', '280': '0110110101010001', '112': '0110110101010010', '137': '0110110101010011', '100': '01101101010101', '326': '0110110101011000', '277': '0110110101011001', '288': '0110110101011010', '316': '0110110101011011', '148': '0110110101011100', '287': '0110110101011101', '131': '0110110101011110', '183': '0110110101011111', '81': '011011010110', '188': '011011010111000', '125': '011011010111001', '171': '0110110101110100', '166': '0110110101110101', '167': '011011010111011', '116': '011011010111100', '174': '011011010111101', '108': '011011010111110', '107': '011011010111111', '58': '011011011', '40': '0110111', '11': '01110', '10': '01111', '28': '100000', '39': '1000010', '57': '100001100', '65': '1000011010', '72': '10000110110', '70': '10000110111', '48': '10000111', '9': '10001', '7': '10010', '8': '10011', '27': '101000', '38': '1010010', '47': '10100110', '63': '1010011100', '71': '10100111010', '79': '101001110110', '97': '10100111011100', '103': '10100111011101', '96': '10100111011110', '138': '10100111011111000', '146': '10100111011111001', '282': '10100111011111010', '222': '10100111011111011', '118': '101001110111111', '56': '101001111', '6': '10101', '5': '10110', '0': '101110', '26': '101111', '3': '11000', '2': '11001', '1': '11010', '4': '11011', '25': '111000', '37': '1110010', '64': '1110011000', '119': '11100110010000000', '349': '11100110010000001', '364': '11100110010000010', '392': '11100110010000011', '358': '11100110010000100', '378': '11100110010000101', '197': '11100110010000110', '279': '11100110010000111', '323': '11100110010001000', '318': '11100110010001001', '360': '11100110010001010', '436': '11100110010001011', '217': '11100110010001100', '124': '11100110010001101', '311': '11100110010001110', '324': '11100110010001111', '274': '11100110010010000', '278': '11100110010010001', '310': '11100110010010010', '296': '11100110010010011', '353': '11100110010010100', '357': '11100110010010101', '262': '11100110010010110', '223': '11100110010010111', '303': '11100110010011000', '284': '11100110010011001', '396': '11100110010011010', '338': '11100110010011011', '135': '11100110010011100', '355': '11100110010011101', '234': '11100110010011110', '441': '11100110010011111', '195': '1110011001010000', '320': '1110011001010001', '257': '11100110010100100', '215': '11100110010100101', '254': '11100110010100110', '263': '11100110010100111', '157': '1110011001010100', '285': '1110011001010101', '172': '1110011001010110', '192': '1110011001010111', '381': '11100110010110000', '389': '11100110010110001', '321': '11100110010110010', '189': '11100110010110011', '415': '11100110010110100', '190': '11100110010110101', '232': '11100110010110110', '245': '11100110010110111', '180': '11100110010111000', '235': '11100110010111001', '273': '11100110010111010', '317': '11100110010111011', '240': '11100110010111100', '219': '11100110010111101', '182': '11100110010111110', '246': '11100110010111111', '113': '1110011001100000', '276': '1110011001100001', '265': '1110011001100010', '175': '1110011001100011', '121': '1110011001100100', '123': '1110011001100101', '144': '1110011001100110', '156': '1110011001100111', '184': '1110011001101000', '344': '1110011001101001', '266': '1110011001101010', '173': '1110011001101011', '348': '1110011001101100', '268': '1110011001101101', '363': '1110011001101110', '312': '1110011001101111', '80': '111001100111', '55': '111001101', '46': '11100111', '24': '111010', '36': '1110110', '54': '111011100', '62': '1110111010', '92': '11101110110000', '98': '11101110110001', '86': '1110111011001', '95': '11101110110100', '99': '11101110110101', '106': '111011101101100', '117': '111011101101101', '136': '1110111011011100', '241': '1110111011011101', '102': '111011101101111', '85': '1110111011100', '201': '1110111011101000', '238': '1110111011101001', '191': '1110111011101010', '208': '1110111011101011', '343': '1110111011101100', '111': '1110111011101101', '168': '1110111011101110', '339': '1110111011101111', '78': '111011101111', '45': '11101111', '23': '111100', '35': '1111010', '34': '1111011', '22': '111110', '21': '111111'}\n"
+      "Compression rate of huffman with different bins: 0.448723882039388\n"
     ]
    }
   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "start = time.time()\n",
-    "origin, predict, diff, error, A = plot_hist(images[0])\n",
-    "image = Image.open(images[0])    #Open the image and read it as an Image object\n",
-    "image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
-    "image = image.astype(int)\n",
-    "new_error = np.copy(image)\n",
-    "new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
-    "keep = new_error[0,0]\n",
-    "new_error[0,:] = new_error[0,:] - keep\n",
-    "new_error[-1,:] = new_error[-1,:] - keep\n",
-    "new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
-    "new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
-    "new_error[0,0] = keep\n",
-    "new_error = np.ravel(new_error)\n",
-    "plt.hist(new_error[1:],bins=100)\n",
-    "plt.show()\n",
-    "ab_error = np.abs(new_error)\n",
-    "string = [str(i) for i in ab_error]\n",
-    "#string = [str(i) for i in new_error]\n",
-    "#string = [str(i) for i in np.arange(0,5)] + [str(i) for i in np.arange(0,5)] + [str(i) for i in np.arange(0,2)]*2\n",
-    "freq = dict(Counter(string))\n",
-    "#print(freq)\n",
-    "freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "node = make_tree(freq)\n",
-    "encoding = huffman_code_tree(node)\n",
-    "#encoded = [\"1\"+encoding[str(-i)] if i < 0 else \"0\"+encoding[str(i)] for i in error]\n",
-    "#print(time.time()-start)\n",
-    "print(len(encoding))\n",
-    "print(encoding)"
+    "num_images = im_distribution(images, \"_9\")\n",
+    "rate = []\n",
+    "\n",
+    "for i in range(len(num_images)):\n",
+    "    encode1, encode2, encode3, encode4, encode5, encode6, image, error, diff, bound = huffman6(num_images[i])\n",
+    "    r = compress_rate6(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5, encode6)\n",
+    "    rate.append(r)\n",
+    "    \n",
+    "    \n",
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 154,
-   "id": "14075c94",
+   "execution_count": 238,
+   "id": "992dd8bb",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def compress_rate(original, error, encoding):\n",
-    "    original = original.reshape(-1)\n",
-    "    error = error.reshape(-1)\n",
-    "    o_len = 0\n",
-    "    c_len = 0\n",
-    "    for i in range(0, len(original)):\n",
-    "        o_len += len(bin(original[i])[2:])\n",
-    "        c_len += len(encoding[str(abs(error[i]))])\n",
-    "        c_len += 1\n",
-    "        \n",
-    "    return c_len/o_len"
+    "origin, predict, diff, error, A = plot_hist(images[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 461,
+   "id": "d3b29278",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def enc_experiment(image, plot=True):\n",
+    "    \n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "    \n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    \n",
+    "    '''origin, predict, diff, error, A = plot_hist(images)\n",
+    "    image = Image.open(images)    #Open the image and read it as an Image object\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error[1:-1,1:-1],(510, 638))\n",
+    "    #new_error[1:-1, 1:-1] = error[1:-1, 1:-1]\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    new_error = np.ravel(new_error)\n",
+    "    if plot:\n",
+    "        plt.hist(new_error[1:],bins=100)\n",
+    "        plt.show()'''\n",
+    "    \n",
+    "    #ab_error = np.abs(new_error)\n",
+    "    #string = [str(i) for i in ab_error]\n",
+    "    string = [str(i) for i in new_error]\n",
+    "    #string = [str(i) for i in np.arange(0,5)] + [str(i) for i in np.arange(0,5)] + [str(i) for i in np.arange(0,2)]*2\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    \n",
+    "    node = make_tree(freq)\n",
+    "    encoding_dict = huffman_code_tree(node)\n",
+    "    #encoded = [\"1\"+encoding[str(-i)] if i < 0 else \"0\"+encoding[str(i)] for i in error]\n",
+    "    #print(time.time()-start)\n",
+    "    encoded = new_error.reshape((512,640)).copy().astype(str).astype(object)\n",
+    "\n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
+    "            else:\n",
+    "                #print(encoding_dict[encoded[i][j]])\n",
+    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
+    "                #print(encoded[i][j])\n",
+    "                \n",
+    "    return encoding_dict, encoded, new_error.reshape((512,640)), image\n",
+    "    #print(encoding)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 155,
-   "id": "b93c068b",
+   "execution_count": 496,
+   "id": "f4665493",
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "0.4473590087890625"
-      ]
-     },
-     "execution_count": 155,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "IndentationError",
+     "evalue": "expected an indented block (591304675.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  File \u001b[0;32m\"/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_20810/591304675.py\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    def encoder():\u001b[0m\n\u001b[0m                  ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n"
+     ]
    }
   ],
   "source": [
-    "compress_rate(image,new_error,encoding)"
+    "def encoder(list_dic, diff):\n",
+    "    encoded = new_error.reshape((512,640)).copy().astype(str).astype(object)\n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            else:\n",
+    "                #print(encoding_dict[encoded[i][j]])\n",
+    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
+    "                #print(encoded[i][j])\n",
+    "                \n",
+    "    return encoding_dict, encoded, new_error.reshape((512,640)), image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 471,
+   "id": "1ec5c5e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoding_dict, encoded_matrix, error, orig_image = enc_experiment(images[0], plot=False)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 140,
-   "id": "a8dc8674",
+   "execution_count": 472,
+   "id": "4927a12f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[['111001110100111000' '110001' '11111000' ... '001010' '1110000'\n",
+      "  '0010010']\n",
+      " ['101000' '100101' '110001' ... '0010111' '110111' '00111111']\n",
+      " ['110011' '010011' '010000' ... '110110' '110001' '1110000']\n",
+      " ...\n",
+      " ['1111111101011101' '010010' '101110' ... '0010111' '110111' '01111110']\n",
+      " ['1111111101011110' '100110' '110110' ... '11111000' '1110111' '000101']\n",
+      " ['011111110000000' '111111110110010' '01011010111001' ...\n",
+      "  '01011010001001' '11111001001' '0110100110']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(encoding)\n",
+    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 478,
+   "id": "f145c221",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decoder(A, encoded_matrix, encoding_dict):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "    the_keys = list(encode_dict.keys())\n",
+    "    the_values = list(encode_dict.values())\n",
+    "    error_matrix = encoded_matrix.copy()\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                if j == 1 and i == 1:\n",
+    "                    z0, z1, z2, z3 = error_matrix[i-1][j-1], error_matrix[i-1][j], \\\n",
+    "                    error_matrix[i-1][j+1], error_matrix[i][j-1]\n",
+    "\n",
+    "                    y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\n",
+    "                    #Real solution that works, DO NOT DELETE\n",
+    "                    #new_e[r][c] = int(np.ceil(new_e[r][c] + np.linalg.solve(A,y)[-1]))\n",
+    "\n",
+    "                    print(int(the_keys[the_values.index(error_matrix[i,j])]))\n",
+    "                    print(np.linalg.solve(A,y)[-1])\n",
+    "                    error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])]) + \\\n",
+    "                                                np.floor(np.linalg.solve(A,y)[-1][0])\n",
+    "                    #error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                break\n",
+    "                \n",
+    "    return error_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 481,
+   "id": "23b4c68b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decoder(A, encoded_matrix, encoding_dict):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "    the_keys = list(encode_dict.keys())\n",
+    "    the_values = list(encode_dict.values())\n",
+    "    error_matrix = encoded_matrix.copy()\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                \n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                \"\"\"z0, z1, z2, z3 = error_matrix[i-1][j-1], error_matrix[i-1][j], \\\n",
+    "                error_matrix[i-1][j+1], error_matrix[i][j-1]\n",
+    "                y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\"\"\"\n",
+    "                \n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                \n",
+    "    return error_matrix.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 487,
+   "id": "f33ed5ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_error = decoder(A, encoded_matrix, encoding_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 488,
+   "id": "74eafda3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def reconstruct(error, A):\n",
+    "    \"\"\"\n",
+    "    Function that reconstructs the original image\n",
+    "    from the error matrix and using the predictive\n",
+    "    algorithm developed in the encoding.\n",
+    "    \n",
+    "    Parameters:\n",
+    "        error (array): matrix of errors computed in encoding. Same \n",
+    "                       shape as the original image (512, 640) in this case\n",
+    "        A (array): Matrix used for the system of equations to create predictions\n",
+    "    Returns: cd cdcd\n",
+    "        image (array): The reconstructed image\n",
+    "    \"\"\"\n",
+    "    new_e = error.copy()\n",
+    "    rows, columns = new_e.shape\n",
+    "\n",
+    "    for r in range(1, rows-1):\n",
+    "        for c in range(1, columns-1):\n",
+    "            z0, z1, z2, z3 = new_e[r-1][c-1], new_e[r-1][c], new_e[r-1][c+1], new_e[r][c-1]\n",
+    "            y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\n",
+    "            \n",
+    "            if r == 1 and c == 1:\n",
+    "                print(np.linalg.solve(A,y)[-1])\n",
+    "                \n",
+    "            #Real solution that works, DO NOT DELETE\n",
+    "            #new_e[r][c] = int(np.ceil(new_e[r][c] + np.linalg.solve(A,y)[-1]))\n",
+    "            \n",
+    "            new_e[r][c] = np.round(new_e[r][c] + np.linalg.solve(A,y)[-1], 1)\n",
+    "            \n",
+    "    return new_e.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 489,
+   "id": "0c40bfe1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[22543.5]\n"
+     ]
+    }
+   ],
+   "source": [
+    "new_image = reconstruct(image_error, A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 494,
+   "id": "5495cb59",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[22554 22552 22519 ... 22537 22529 22523]\n",
+      " [22561 22552 22543 ... 22544 22533 22513]\n",
+      " [22559 22565 22548 ... 22526 22508 22529]\n",
+      " ...\n",
+      " [22674 22661 22654 ... 22670 22617 22594]\n",
+      " [22656 22652 22644 ... 22640 22625 22573]\n",
+      " [22659 22653 22642 ... 22649 22615 22613]]\n",
+      "[[22554 22552 22519 ... 22537 22529 22523]\n",
+      " [22561 22552 22543 ... 22544 22533 22513]\n",
+      " [22559 22565 22548 ... 22526 22508 22529]\n",
+      " ...\n",
+      " [22674 22661 22654 ... 22670 22617 22594]\n",
+      " [22656 22652 22644 ... 22640 22625 22573]\n",
+      " [22659 22653 22642 ... 22649 22615 22613]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(new_image)\n",
+    "print(orig_image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 495,
+   "id": "c1f26059",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "4"
+       "True"
      ]
     },
-     "execution_count": 140,
+     "execution_count": 495,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "int('0100',2)"
+    "np.all(new_image - orig_image == 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "992dd8bb",
+   "id": "9200fa53",
   "metadata": {},
   "outputs": [],
   "source": []

--- a/.ipynb_checkpoints/Encoding_decoding-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Encoding_decoding-checkpoint.ipynb
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/Encoding_Kelly.ipynb
+++ b/Encoding_Kelly.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 447,
+   "execution_count": 1,
   "id": "8868bc30",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 448,
+   "execution_count": 2,
   "id": "76317b02",
   "metadata": {},
   "outputs": [],
@@ -79,7 +79,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 449,
+   "execution_count": 3,
   "id": "be1ff8a1",
   "metadata": {},
   "outputs": [],
@@ -125,7 +125,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 440,
+   "execution_count": 4,
   "id": "8483903e",
   "metadata": {},
   "outputs": [],
@@ -181,15 +181,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 401,
+   "execution_count": 6,
   "id": "a43f3f1c",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.444949904726781\n"
+     "ename": "NameError",
+     "evalue": "name 'original' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_15308/409627503.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhuffman_nb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompress_rate_nb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_15308/409627503.py\u001b[0m in \u001b[0;36mcompress_rate_nb\u001b[0;34m(image, error, encoding)\u001b[0m\n\u001b[1;32m     38\u001b[0m     \u001b[0mo_len\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0mc_len\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moriginal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     41\u001b[0m         \u001b[0mo_len\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moriginal\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     42\u001b[0m         \u001b[0mc_len\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'original' is not defined"
     ]
    }
   ],
@@ -255,17 +260,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 407,
+   "execution_count": 55,
   "id": "207b0bd2",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "325380\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "0.44205322265625"
      ]
     },
-     "execution_count": 407,
+     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -273,10 +285,10 @@
   "source": [
    "def huffman(image):\n",
    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    \n",
    "    image = Image.open(image)\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
-    "\n",
    "    \n",
    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
    "    boundary = boundary - image[0,0]\n",
@@ -324,8 +336,6 @@
    "    node = make_tree(freq)\n",
    "    encode5 = huffman_code_tree(node)\n",
    "\n",
-    "    \n",
-    "    \n",
    "\n",
    "    new_error = np.copy(image)\n",
    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
@@ -335,14 +345,19 @@
    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
    "    new_error[0,0] = keep\n",
-    "    new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    \n",
+    "    #new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    bins = [25,40,70]\n",
    "    \n",
    "    # return the huffman dictionary\n",
-    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
+    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary, bins\n",
    "\n",
    "def compress_rate(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
    "    #original = original.reshape(-1)\n",
    "    #error = error.reshape(-1)\n",
+    "\n",
    "    o_len = 0\n",
    "    c_len = 0\n",
    "    im = np.reshape(image,(512, 640))\n",
@@ -370,7 +385,7 @@
    "    return c_len/o_len\n",
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "encode1, encode2, encode3, encode4, encode5, image, error, diff, boundary = huffman(images[0])\n",
+    "encode1, encode2, encode3, encode4, encode5, image, error, diff, boundary, bins = huffman(images[0])\n",
    "compress_rate(image, error, diff, boundary, encode1, encode2, encode3, encode4, encode5)\n"
   ]
  },
@@ -384,17 +399,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 415,
+   "execution_count": 57,
   "id": "14075c94",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "325380\n",
+      "325380\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "0.4432273356119792"
      ]
     },
-     "execution_count": 415,
+     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -405,7 +428,7 @@
    "    image = Image.open(image)\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
-    "\n",
+    "    print(len(diff))\n",
    "    \n",
    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
    "    boundary = boundary - image[0,0]\n",
@@ -469,6 +492,7 @@
    "    \n",
    "    # return the huffman dictionary\n",
    "    #return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
+    "    print(len(diff))\n",
    "    return encode1, encode2, encode3, np.ravel(image), error, diff, boundary\n",
    "\n",
    "#def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
@@ -800,42 +824,141 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 496,
+   "execution_count": 53,
   "id": "f4665493",
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "IndentationError",
-     "evalue": "expected an indented block (591304675.py, line 1)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_20810/591304675.py\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    def encoder():\u001b[0m\n\u001b[0m                  ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
-    "def encoder(list_dic,diff):\n",
-    "    encoded = new_error.reshape((512,640)).copy().astype(str).astype(object)\n",
+    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "    \n",
    "    for i in range(encoded.shape[0]):\n",
    "        for j in range(encoded.shape[1]):\n",
-    "            if i == 0 and j == 0:\n",
-    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
-    "            else:\n",
-    "                #print(encoding_dict[encoded[i][j]])\n",
-    "                encoded[i][j] = encoding_dict[encoded[i][j]]\n",
-    "                #print(encoded[i][j])\n",
+    "            '''if i == 0 and j == 0:\n",
+    "                encoded[i][j] = encoding_dict[encoded[i][j]]'''\n",
+    "            if i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                print(i,j)\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            elif diff[i+1][j+1] <= bins[0]:\n",
+    "                encoded[i][j] = list_dic[1][encoded[i][j]]\n",
+    "            elif diff[i+1][j+1] <= bins[1] and diff[i+1][j+1] > bins[0]:\n",
+    "                encoded[i][j] = list_dic[2][encoded[i][j]]\n",
+    "            elif diff[i+1][j+1] <= bins[2] and diff[i+1][j+1] > bins[1]:\n",
+    "                encoded[i][j] = list_dic[3][encoded[i][j]]\n",
+    "            else: \n",
+    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
+    "\n",
    "                \n",
-    "    return encoding_dict, encoded, new_error.reshape((512,640)), image"
+    "    return encoded"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 471,
+   "execution_count": 54,
   "id": "1ec5c5e3",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "510 702\n",
+      "0 0\n",
+      "0 1\n",
+      "0 2\n",
+      "0 3\n",
+      "0 4\n",
+      "0 5\n",
+      "0 6\n",
+      "0 7\n",
+      "0 8\n",
+      "0 9\n",
+      "0 10\n",
+      "0 11\n",
+      "0 12\n",
+      "0 13\n",
+      "0 14\n",
+      "0 15\n",
+      "0 16\n",
+      "0 17\n",
+      "0 18\n",
+      "0 19\n",
+      "0 20\n",
+      "0 21\n",
+      "0 22\n",
+      "0 23\n",
+      "0 24\n",
+      "0 25\n",
+      "0 26\n",
+      "0 27\n",
+      "0 28\n",
+      "0 29\n",
+      "0 30\n",
+      "0 31\n",
+      "0 32\n",
+      "0 33\n",
+      "0 34\n",
+      "0 35\n",
+      "0 36\n",
+      "0 37\n",
+      "0 38\n",
+      "0 39\n",
+      "0 40\n",
+      "0 41\n",
+      "0 42\n",
+      "0 43\n",
+      "0 44\n",
+      "0 45\n",
+      "0 46\n",
+      "0 47\n",
+      "0 48\n",
+      "0 49\n",
+      "0 50\n",
+      "0 51\n",
+      "0 52\n",
+      "0 53\n",
+      "0 54\n",
+      "0 55\n",
+      "0 56\n",
+      "0 57\n",
+      "0 58\n",
+      "0 59\n",
+      "0 60\n",
+      "0 61\n",
+      "0 62\n",
+      "0 63\n",
+      "0 64\n",
+      "0 65\n",
+      "0 66\n",
+      "0 67\n",
+      "0 68\n",
+      "0 69\n",
+      "0 70\n",
+      "0 71\n",
+      "0 72\n",
+      "0 73\n",
+      "0 74\n",
+      "0 75\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'-36'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_29322/362411884.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mencode1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhuffman\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mencoded_matrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mencoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m510\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m638\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mencode1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_29322/153815811.py\u001b[0m in \u001b[0;36mencoder\u001b[0;34m(error, list_dic, diff, bound, bins)\u001b[0m\n\u001b[1;32m      9\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0merror_matrix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0merror_matrix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m                 \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m                 \u001b[0mencoded\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist_dic\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mencoded\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m                 \u001b[0mencoded\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist_dic\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mencoded\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: '-36'"
+     ]
+    }
+   ],
   "source": [
-    "encoding_dict, encoded_matrix, error, orig_image = enc_experiment(images[0], plot=False)"
+    "encode1, encode2, encode3, encode4, encode5, image, error, diff, bound, bins = huffman(images[0])\n",
+    "encoded_matrix = encoder(np.reshape(error,(510,638)), [encode1, encode2, encode3, encode4, encode5], diff, bound, bins)\n",
+    "\n"
   ]
  },
  {

--- a/Encoding_decoding.ipynb
+++ b/Encoding_decoding.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "14f74f21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "from itertools import product\n",
+    "import os\n",
+    "import sys\n",
+    "from PIL import Image\n",
+    "from scipy.optimize import minimize,linprog\n",
+    "import time\n",
+    "import seaborn as sns\n",
+    "from sklearn.neighbors import KernelDensity\n",
+    "import pandas as pd\n",
+    "from collections import Counter\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c16af61f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def file_extractor(dirname=\"images\"):\n",
+    "    files = os.listdir(dirname)\n",
+    "    scenes = []\n",
+    "    for file in files:\n",
+    "        scenes.append(os.path.join(dirname, file))\n",
+    "    return scenes\n",
+    "\n",
+    "def image_extractor(scenes):\n",
+    "    image_folder = []\n",
+    "    for scene in scenes:\n",
+    "        files = os.listdir(scene)\n",
+    "        for file in files:\n",
+    "            image_folder.append(os.path.join(scene, file))\n",
+    "    images = []\n",
+    "    for folder in image_folder:\n",
+    "        ims = os.listdir(folder)\n",
+    "        for im in ims:\n",
+    "            if im[-4:] == \".jp4\" or im[-7:] == \"_6.tiff\":\n",
+    "                continue\n",
+    "            else:\n",
+    "                images.append(os.path.join(folder, im))\n",
+    "    return images #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
+    "\n",
+    "def im_distribution(images, num):\n",
+    "    \"\"\"\n",
+    "    Function that extracts tiff files from specific cameras and returns a list of all\n",
+    "    the tiff files corresponding to that camera. i.e. all pictures labeled \"_7.tiff\" or otherwise\n",
+    "    specified camera numbers.\n",
+    "    \n",
+    "    Parameters:\n",
+    "        images (list): list of all tiff files, regardless of classification. This is NOT a list of directories but\n",
+    "        of specific tiff files that can be opened right away. This is the list that we iterate through and \n",
+    "        divide.\n",
+    "        \n",
+    "        num (str): a string designation for the camera number that we want to extract i.e. \"14\" for double digits\n",
+    "        of \"_1\" for single digits.\n",
+    "        \n",
+    "    Returns:\n",
+    "        tiff (list): A list of tiff files that have the specified designation from num. They are the files extracted\n",
+    "        from the 'images' list that correspond to the given num.\n",
+    "    \"\"\"\n",
+    "    tiff = []\n",
+    "    for im in images:\n",
+    "        if im[-7:-5] == num:\n",
+    "            tiff.append(im)\n",
+    "    return tiff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "aceba613",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_hist(tiff_list):\n",
+    "    \"\"\"\n",
+    "    This function is the leftovers from the first attempt to plot histograms.\n",
+    "    As it stands it needs some work in order to function again. We will\n",
+    "    fix this later. 1/25/22\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    image = tiff_list\n",
+    "    image = Image.open(image)    #Open the image and read it as an Image object\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation\n",
+    "    z0 = image[0:-2,0:-2]   # get all the first pixel for the entire image\n",
+    "    z1 = image[0:-2,1:-1]   # get all the second pixel for the entire image\n",
+    "    z2 = image[0:-2,2::]    # get all the third pixel for the entire image\n",
+    "    z3 = image[1:-1,0:-2]   # get all the forth pixel for the entire image\n",
+    "    # calculate the out put of the system of equation\n",
+    "    y0 = np.ravel(-z0+z2-z3)\n",
+    "    y1 = np.ravel(z0+z1+z2)\n",
+    "    y2 = np.ravel(-z0-z1-z2-z3)\n",
+    "    y = np.vstack((y0,y1,y2))\n",
+    "    # use numpy solver to solve the system of equations all at once\n",
+    "    predict = np.floor(np.linalg.solve(A,y)[-1])\n",
+    "    # flatten the neighbor pixlels and stack them together\n",
+    "    z0 = np.ravel(z0)\n",
+    "    z1 = np.ravel(z1)\n",
+    "    z2 = np.ravel(z2)\n",
+    "    z3 = np.ravel(z3)\n",
+    "    neighbor = np.vstack((z0,z1,z2,z3)).T\n",
+    "    # calculate the difference\n",
+    "    diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
+    "    \n",
+    "    # flatten the image to a vector\n",
+    "    image = np.ravel(image[1:-1,1:-1])\n",
+    "    error = image-predict\n",
+    "    \n",
+    "    return image, predict, diff, error, A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6b965751",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NodeTree(object):\n",
+    "    def __init__(self, left=None, right=None):\n",
+    "        self.left = left\n",
+    "        self.right = right\n",
+    "\n",
+    "    def children(self):\n",
+    "        return self.left, self.right\n",
+    "\n",
+    "    def __str__(self):\n",
+    "        return self.left, self.right\n",
+    "\n",
+    "\n",
+    "def huffman_code_tree(node, binString=''):\n",
+    "    '''\n",
+    "    Function to find Huffman Code\n",
+    "    '''\n",
+    "    if type(node) is str:\n",
+    "        return {node: binString}\n",
+    "    (l, r) = node.children()\n",
+    "    d = dict()\n",
+    "    d.update(huffman_code_tree(l, binString + '0'))\n",
+    "    d.update(huffman_code_tree(r, binString + '1'))\n",
+    "    return d\n",
+    "\n",
+    "\n",
+    "def make_tree(nodes):\n",
+    "    '''\n",
+    "    Function to make tree\n",
+    "    :param nodes: Nodes\n",
+    "    :return: Root of the tree\n",
+    "    '''\n",
+    "    while len(nodes) > 1:\n",
+    "        (key1, c1) = nodes[-1]\n",
+    "        (key2, c2) = nodes[-2]\n",
+    "        nodes = nodes[:-2]\n",
+    "        node = NodeTree(key1, key2)\n",
+    "        nodes.append((node, c1 + c2))\n",
+    "        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n",
+    "    return nodes[0][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b7561883",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def huffman(image):\n",
+    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    \n",
+    "    image = Image.open(image)\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "    \n",
+    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    boundary = boundary - image[0,0]\n",
+    "    boundary[0] = image[0,0]\n",
+    "\n",
+    "    string = [str(i) for i in boundary]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode1 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff <= 25\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode2 = huffman_code_tree(node)\n",
+    "\n",
+    "    \n",
+    "    mask = diff > 25\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 40\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode3 = huffman_code_tree(node)\n",
+    "    \n",
+    "\n",
+    "    mask = diff > 40\n",
+    "    new_error = error[mask]\n",
+    "    mask2 = diff[mask] <= 70\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode4 = huffman_code_tree(node)\n",
+    "    \n",
+    "    \n",
+    "    mask = diff > 70\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    freq = dict(Counter(string))\n",
+    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    node = make_tree(freq)\n",
+    "    encode5 = huffman_code_tree(node)\n",
+    "\n",
+    "\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    \n",
+    "    \n",
+    "    #new_error = np.ravel(new_error)\n",
+    "    \n",
+    "    bins = [25,40,70]\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, new_error, diff, boundary, bins\n",
+    " \n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "encode1, encode2, encode3, encode4, encode5, image, error, new_error, diff, boundary, bins = huffman(images[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2eb774d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
+    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "    \n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[0]:\n",
+    "                encoded[i][j] = list_dic[1][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:\n",
+    "                encoded[i][j] = list_dic[2][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:\n",
+    "                encoded[i][j] = list_dic[3][encoded[i][j]]\n",
+    "            else: \n",
+    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
+    "\n",
+    "                \n",
+    "    return encoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "8eeb40d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decoder(A, encoded_matrix, encoding_dict):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "    the_keys = list(encode_dict.keys())\n",
+    "    the_values = list(encode_dict.values())\n",
+    "    error_matrix = encoded_matrix.copy()\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                \n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                \"\"\"z0, z1, z2, z3 = error_matrix[i-1][j-1], error_matrix[i-1][j], \\\n",
+    "                error_matrix[i-1][j+1], error_matrix[i][j-1]\n",
+    "                y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\"\"\"\n",
+    "                \n",
+    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                \n",
+    "    return error_matrix.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3e0e9742",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encode1, encode2, encode3, encode4, encode5, image, error, new_error, diff, bound, bins = huffman(images[0])\n",
+    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), [encode1, encode2, encode3, encode4, encode5], diff, bound, bins)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e6ea4f99",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[['01100010001' '11000110' '100101010' ... '101110011' '00010100'\n",
+      "  '1111000100']\n",
+      " ['10011100' '100001' '111000' ... '10111011' '00111' '1111001101']\n",
+      " ['10101111' '100100' '100000' ... '111100' '111000' '00010100']\n",
+      " ...\n",
+      " ['110001000' '100001' '111011' ... '1010010' '100000' '10011000']\n",
+      " ['0100011101' '111010' '00110' ... '1000101' '1100100' '10011010']\n",
+      " ['00100010' '110111101' '110110100' ... '00010010' '10100000'\n",
+      "  '110110101']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(encoded_matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c07a23e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}