kelly changes

cd29faff · Kelly Chang · 3a858f29 · cd29faff · cd29faff · cd29faff
Commit cd29faff authored Apr 07, 2022 by Kelly Chang
3 changed files
--- a/.ipynb_checkpoints/Compression_Rate_Kelly-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Compression_Rate_Kelly-checkpoint.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 47,
   "id": "8868bc30",
   "metadata": {},
   "outputs": [],
@@ -24,8 +24,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 48,
-   "id": "76317b02",
+   "id": "0f944705",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -33,7 +33,10 @@
    "    files = os.listdir(dirname)\n",
    "    scenes = []\n",
    "    for file in files:\n",
-    "        scenes.append(os.path.join(dirname, file))\n",
+    "        if file == '.DS_Store':\n",
+    "            continue\n",
+    "        else:\n",
+    "            scenes.append(os.path.join(dirname, file))\n",
    "    return scenes\n",
    "\n",
    "def image_extractor(scenes):\n",
@@ -41,7 +44,12 @@
    "    for scene in scenes:\n",
    "        files = os.listdir(scene)\n",
    "        for file in files:\n",
-    "            image_folder.append(os.path.join(scene, file))\n",
+    "            #if file[-4:] == \".jp4\" or file[-7:] == \"_6.tiff\":\n",
+    "            if file[-5:] != \".tiff\" or file[-7:] == \"_6.tiff\":\n",
+    "                continue\n",
+    "            else:\n",
+    "                image_folder.append(os.path.join(scene, file))\n",
+    "    '''print(image_folder)\n",
    "    images = []\n",
    "    for folder in image_folder:\n",
    "        ims = os.listdir(folder)\n",
@@ -49,8 +57,8 @@
    "            if im[-4:] == \".jp4\" or im[-7:] == \"_6.tiff\":\n",
    "                continue\n",
    "            else:\n",
-    "                images.append(os.path.join(folder, im))\n",
+    "                images.append(os.path.join(folder, im))'''\n",
-    "    return images #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
+    "    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
    "\n",
    "def im_distribution(images, num):\n",
    "    \"\"\"\n",
@@ -79,8 +87,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 49,
-   "id": "be1ff8a1",
+   "id": "b18d5e38",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -126,8 +134,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 50,
-   "id": "8483903e",
+   "id": "35d4f6a0",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -182,13 +190,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 51,
-   "id": "a43f3f1c",
+   "id": "c50169ed",
   "metadata": {},
   "outputs": [],
   "source": [
-    "scenes = file_extractor()\n",
-    "images = image_extractor(scenes)\n",
    "def huffman_nb(image):\n",
    "    origin, predict, diff, error, A = plot_hist(image)\n",
    "    image = Image.open(image)\n",
@@ -222,195 +228,32 @@
    "    \n",
    "    \n",
    "def compress_rate_nb(image, error, encoding):\n",
-    "    #original = original.reshape(-1)\n",
+    "    original = image.reshape(-1)\n",
-    "    #error = error.reshape(-1)\n",
+    "    error = error.reshape(-1)\n",
    "    o_len = 0\n",
    "    c_len = 0\n",
    "    for i in range(0, len(original)):\n",
    "        o_len += len(bin(original[i])[2:])\n",
    "        c_len += len(encoding[str(int(error[i]))])\n",
    "\n",
-    "        \n",
    "    return c_len/o_len\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "eac2f456",
+   "id": "e34201fd",
-   "metadata": {},
-   "source": [
-    "### Huffman with dividing into non-uniform bins"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3a3f06a5",
   "metadata": {},
   "source": [
-    "### Huffman with dividing into uniform bins"
+    "### Huffman dividing into bins"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "14075c94",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "325380\n",
-      "325380\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.4432273356119792"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "def huffman_u(image):\n",
-    "    origin, predict, diff, error, A = plot_hist(image)\n",
-    "    image = Image.open(image)\n",
-    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
-    "    image = image.astype(int)\n",
-    "    print(len(diff))\n",
-    "    \n",
-    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
-    "    boundary = boundary - image[0,0]\n",
-    "    boundary[0] = image[0,0]\n",
-    "\n",
-    "    string = [str(i) for i in boundary]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode1 = huffman_code_tree(node)\n",
-    "    \n",
-    "    \n",
-    "    mask = diff <= 100\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode2 = huffman_code_tree(node)\n",
-    "\n",
-    "    \n",
-    "    mask = diff > 100\n",
-    "    #new_error = error[mask]\n",
-    "    #mask2 = diff[mask] <= 200\n",
-    "    #string = [str(i) for i in new_error[mask2].astype(int)]\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode3 = huffman_code_tree(node)\n",
-    "    \n",
-    "\n",
-    "    '''mask = diff > 200\n",
-    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= 300\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode4 = huffman_code_tree(node)\n",
-    "    \n",
-    "    \n",
-    "    mask = diff > 300\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode5 = huffman_code_tree(node)'''\n",
-    "\n",
-    "    \n",
-    "    \n",
-    "\n",
-    "    new_error = np.copy(image)\n",
-    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
-    "    keep = new_error[0,0]\n",
-    "    new_error[0,:] = new_error[0,:] - keep\n",
-    "    new_error[-1,:] = new_error[-1,:] - keep\n",
-    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
-    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
-    "    new_error[0,0] = keep\n",
-    "    new_error = np.ravel(new_error)\n",
-    "    \n",
-    "    # return the huffman dictionary\n",
-    "    #return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
-    "    print(len(diff))\n",
-    "    return encode1, encode2, encode3, np.ravel(image), error, diff, boundary\n",
-    "\n",
-    "#def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
-    "def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3):\n",
-    "    #original = original.reshape(-1)\n",
-    "    #error = error.reshape(-1)\n",
-    "    o_len = 0\n",
-    "    c_len = 0\n",
-    "    im = np.reshape(image,(512, 640))\n",
-    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
-    "    original = im[1:-1,1:-1].reshape(-1)\n",
-    "\n",
-    "    for i in range(0,len(bound)):\n",
-    "        o_len += len(bin(real_b[i])[2:])\n",
-    "        c_len += len(encode1[str(bound[i])])\n",
-    "    \n",
-    "    for i in range(0, len(original)):\n",
-    "        o_len += len(bin(original[i])[2:])\n",
-    "        if diff[i] <= 100:\n",
-    "            c_len += len(encode2[str(int(error[i]))])\n",
-    "            \n",
-    "        if diff[i] > 100:\n",
-    "            c_len += len(encode3[str(int(error[i]))])\n",
-    "\n",
-    "        '''if diff[i] <= 200 and diff[i] > 100:\n",
-    "            c_len += len(encode3[str(int(error[i]))])'''\n",
-    "   \n",
-    "        '''if diff[i] <= 300 and diff[i] > 200:\n",
-    "            c_len += len(encode4[str(int(error[i]))])\n",
-    "            \n",
-    "        if diff[i] > 300:\n",
-    "            c_len += len(encode5[str(int(error[i]))])'''\n",
-    " \n",
-    "    return c_len/o_len\n",
-    "scenes = file_extractor()\n",
-    "images = image_extractor(scenes)\n",
-    "encode1, encode2, encode3, image, error, diff, boundary = huffman_u(images[0])\n",
-    "compress_rate_u(image, error, diff, boundary, encode1, encode2, encode3)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 52,
-   "id": "207b0bd2",
+   "id": "205c4731",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "325380\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.44205322265625"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
    "def huffman(image):\n",
    "    origin, predict, diff, error, A = plot_hist(image)\n",
@@ -479,13 +322,11 @@
    "    #new_error = np.ravel(new_error)\n",
    "    \n",
    "    bins = [25,40,70]\n",
-    "    \n",
+    "    list_dic = [encode1, encode2, encode3, encode4, encode5]\n",
    "    # return the huffman dictionary\n",
-    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, new_error, diff, boundary, bins, predict\n",
+    "    return list_dic, np.ravel(image), error, new_error, diff, boundary, bins, predict\n",
-    " \n",
-    "\n",
    "\n",
-    "def compress_rate(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
+    "def compress_rate(image, error, diff, bound, list_dic, bins):\n",
    "    #original = original.reshape(-1)\n",
    "    #error = error.reshape(-1)\n",
    "\n",
@@ -494,64 +335,53 @@
    "    im = np.reshape(image,(512, 640))\n",
    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
    "    original = im[1:-1,1:-1].reshape(-1)\n",
-    "\n",
+    "    diff = diff.reshape(-1)\n",
+    "    \n",
+    "    # calculate the bit for boundary\n",
    "    for i in range(0,len(bound)):\n",
    "        o_len += len(bin(real_b[i])[2:])\n",
-    "        c_len += len(encode1[str(bound[i])])\n",
+    "        c_len += len(list_dic[0][str(bound[i])])\n",
    "    \n",
-    "    for i in range(0, len(original)):\n",
+    "    \n",
+    "    for i in range(0,len(original)):\n",
    "        o_len += len(bin(original[i])[2:])\n",
-    "        if diff[i] <= 25:\n",
+    "        if diff[i] <= bins[0]:\n",
-    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
-    "\n",
-    "        if diff[i] <= 40 and diff[i] > 25:\n",
-    "            c_len += len(encode3[str(int(error[i]))])\n",
-    "   \n",
-    "        if diff[i] <= 70 and diff[i] > 40:\n",
-    "            c_len += len(encode4[str(int(error[i]))])\n",
    "            \n",
-    "        if diff[i] > 70:\n",
+    "        elif diff[i] <= bins[1] and diff[i] > bins[0]:\n",
-    "            c_len += len(encode5[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[2][str(int(error[i]))])\n",
-    " \n",
+    "            \n",
-    "    return c_len/o_len\n",
+    "        elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
-    "scenes = file_extractor()\n",
+    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
-    "images = image_extractor(scenes)\n",
+    "        else: \n",
-    "encode1, encode2, encode3, encode4, encode5, image, error, diff, boundary, bins = huffman(images[0])\n",
+    "            c_len += len(list_dic[4][str(int(error[i]))])\n",
-    "compress_rate(image, error, diff, boundary, encode1, encode2, encode3, encode4, encode5)\n"
+    "\n",
+    "\n",
+    "    return c_len/o_len\n"
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "816764c9",
+   "id": "2e84c206",
   "metadata": {},
   "source": [
-    "## Huffman Divide into 6 bins"
+    "### Huffman dividing into uniform bins"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 430,
+   "execution_count": 64,
-   "id": "15eecad3",
+   "id": "18e44483",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "0.4421759033203125"
-      ]
-     },
-     "execution_count": 430,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
-    "def huffman6(image):\n",
+    "def huffman_u(image):\n",
    "    origin, predict, diff, error, A = plot_hist(image)\n",
    "    image = Image.open(image)\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
-    "\n",
+    "    \n",
+    "    bins = np.linspace(min(diff),max(diff),5)[1:-1]\n",
    "    \n",
    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
    "    boundary = boundary - image[0,0]\n",
@@ -564,7 +394,7 @@
    "    encode1 = huffman_code_tree(node)\n",
    "    \n",
    "    \n",
-    "    mask = diff <= 5\n",
+    "    mask = diff <= bins[0]\n",
    "    string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
@@ -572,19 +402,20 @@
    "    encode2 = huffman_code_tree(node)\n",
    "\n",
    "    \n",
-    "    mask = diff > 5\n",
+    "    mask = diff > bins[0]\n",
    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= 15\n",
+    "    mask2 = diff[mask] <= bins[1]\n",
    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    #string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
    "    encode3 = huffman_code_tree(node)\n",
    "    \n",
    "\n",
-    "    mask = diff > 15\n",
+    "    mask = diff > bins[1]\n",
    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= 30\n",
+    "    mask2 = diff[mask] <= bins[2]\n",
    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
@@ -592,22 +423,13 @@
    "    encode4 = huffman_code_tree(node)\n",
    "    \n",
    "    \n",
-    "    mask = diff > 30\n",
+    "    mask = diff > bins[2]\n",
-    "    new_error = error[mask]\n",
+    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    mask2 = diff[mask] <= 50\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
    "    encode5 = huffman_code_tree(node)\n",
    "    \n",
-    "    \n",
-    "    mask = diff > 50\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode6 = huffman_code_tree(node)\n",
    "\n",
    "    \n",
    "    \n",
@@ -623,9 +445,11 @@
    "    new_error = np.ravel(new_error)\n",
    "    \n",
    "    # return the huffman dictionary\n",
-    "    return encode1, encode2, encode3, encode4, encode5, encode6, np.ravel(image), error, diff, boundary\n",
+    "    #return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
+    "    return [encode1, encode2, encode3, encode4, encode5], np.ravel(image), error, diff, boundary, bins\n",
    "\n",
-    "def compress_rate6(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5, encode6):\n",
+    "#def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
+    "def compress_rate_u(image, error, diff, bound, list_dic, bins):\n",
    "    #original = original.reshape(-1)\n",
    "    #error = error.reshape(-1)\n",
    "    o_len = 0\n",
@@ -636,118 +460,174 @@
    "\n",
    "    for i in range(0,len(bound)):\n",
    "        o_len += len(bin(real_b[i])[2:])\n",
-    "        c_len += len(encode1[str(bound[i])])\n",
+    "        c_len += len(list_dic[0][str(bound[i])])\n",
    "    \n",
    "    for i in range(0, len(original)):\n",
    "        o_len += len(bin(original[i])[2:])\n",
-    "        if diff[i] <= 5:\n",
+    "        if diff[i] <= bins[0]:\n",
-    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
-    "\n",
-    "        if diff[i] <= 15 and diff[i] > 5:\n",
-    "            c_len += len(encode3[str(int(error[i]))])\n",
-    "   \n",
-    "        if diff[i] <= 30 and diff[i] > 15:\n",
-    "            c_len += len(encode4[str(int(error[i]))])\n",
    "            \n",
-    "        if diff[i] <= 50 and diff[i] > 30:\n",
+    "        if diff[i] <= bins[1] and diff[i] > bins[0]:\n",
-    "            c_len += len(encode5[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[2][str(int(error[i]))])\n",
+    "   \n",
+    "        if diff[i] <= bins[2] and diff[i] > bins[1]:\n",
+    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
    "            \n",
-    "        if diff[i] > 50:\n",
+    "        if diff[i] > bins[2]:\n",
-    "            c_len += len(encode6[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[4][str(int(error[i]))])\n",
    " \n",
-    "    return c_len/o_len\n",
+    "    return c_len/o_len\n"
-    "scenes = file_extractor()\n",
-    "images = image_extractor(scenes)\n",
-    "encode1, encode2, encode3, encode4, encode5, encode6, image, error, diff, boundary = huffman(images[0])\n",
-    "compress_rate(image, error, diff, boundary, encode1, encode2, encode3, encode4, encode5, encode6)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 431,
+   "execution_count": null,
-   "id": "f8a8c717",
+   "id": "e1ce9912",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Compression rate of huffman with different bins: 0.448723882039388\n"
-     ]
-    }
-   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "num_images = im_distribution(images, \"_9\")\n",
-    "rate = []\n",
    "\n",
-    "for i in range(len(num_images)):\n",
+    "rate = []\n",
-    "    encode1, encode2, encode3, encode4, encode5, encode6, image, error, diff, bound = huffman6(num_images[i])\n",
+    "rate_nb = []\n",
-    "    r = compress_rate6(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5, encode6)\n",
+    "rate_u = []\n",
+    "for i in range(len(images)):\n",
+    "    list_dic, image, error, new_error, diff, bound, bins, predict = huffman(images[i])\n",
+    "    r = compress_rate(image, error, diff, bound, list_dic, bins)\n",
    "    rate.append(r)\n",
    "    \n",
+    "    encoding, error, image = huffman_nb(images[i])\n",
+    "    r = compress_rate_nb(image, error, encoding)\n",
+    "    rate_nb.append(r)\n",
    "    \n",
-    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n"
+    "    list_dic, image, error, diff, bound, bins = huffman_u(images[i])\n",
+    "    r = compress_rate_u(image, error, diff, bound, list_dic, bins)\n",
+    "    rate_u.append(r)\n",
+    "    "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 63,
-   "id": "6abed5da",
+   "id": "c71591f2",
   "metadata": {},
   "outputs": [
    {
-     "ename": "NameError",
+     "name": "stdout",
-     "evalue": "name 'file_extractor' is not defined",
+     "output_type": "stream",
-     "output_type": "error",
+     "text": [
-     "traceback": [
+      "Compression rate of huffman with different bins: 0.40459069242931545\n",
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "Compression rate of huffman with uniform bins: 0.40775704713851685\n",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Compression rate of huffman without bins: 0.410545890687004\n"
-      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_3263/2742763429.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnum_images\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfile_extractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'im'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mrate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mrate_nb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mrate_u\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'file_extractor' is not defined"
     ]
    }
   ],
   "source": [
-    "num_images = file_extractor('im')\n",
-    "\n",
-    "rate = []\n",
-    "rate_nb = []\n",
-    "rate_u = []\n",
-    "for i in range(len(num_images)):\n",
-    "    encode1, encode2, encode3, encode4, encode5, image, error, diff, bound = huffman(num_images[i])\n",
-    "    r = compress_rate(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5)\n",
-    "    rate.append(r)\n",
-    "    encoding, error, image = huffman_nb(num_images[i])\n",
-    "    r = compress_rate_nb(image, error, encoding)\n",
-    "    rate_nb.append(r)\n",
-    "    encode1, encode2, encode3, image, error, diff, bound = huffman_u(num_images[i])\n",
-    "    r = compress_rate_u(image, error, diff, bound, encode1, encode2, encode3)\n",
-    "    rate_u.append(r)\n",
-    "    \n",
    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
-    "print(f\"Compression rate of huffman without bins: {np.mean(rate_nb)}\")\n",
+    "print(f\"Compression rate of huffman with uniform bins: {np.mean(rate_u)}\")\n",
-    "print(f\"Compression rate of huffman with uniform bins: {np.mean(rate_u)}\")"
+    "print(f\"Compression rate of huffman without bins: {np.mean(rate_nb)}\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 238,
+   "execution_count": null,
-   "id": "992dd8bb",
+   "id": "9200fa53",
   "metadata": {},
   "outputs": [],
   "source": [
-    "origin, predict, diff, error, A = plot_hist(images[0])"
+    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
+    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "    \n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[0]:\n",
+    "                encoded[i][j] = list_dic[1][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:\n",
+    "                encoded[i][j] = list_dic[2][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:\n",
+    "                encoded[i][j] = list_dic[3][encoded[i][j]]\n",
+    "            else: \n",
+    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
+    "\n",
+    "                \n",
+    "    return encoded\n",
+    "\n",
+    "def decoder(A, encoded_matrix, list_dic, bins):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    the_keys0 = list(list_dic[0].keys())\n",
+    "    the_values0 = list(list_dic[0].values())\n",
+    "    \n",
+    "    the_keys1 = list(list_dic[1].keys())\n",
+    "    the_values1 = list(list_dic[1].values())\n",
+    "    \n",
+    "    the_keys2 = list(list_dic[2].keys())\n",
+    "    the_values2 = list(list_dic[2].values())\n",
+    "    \n",
+    "    the_keys3 = list(list_dic[3].keys())\n",
+    "    the_values3 = list(list_dic[3].values())\n",
+    "    \n",
+    "    the_keys4 = list(list_dic[4].keys())\n",
+    "    the_values4 = list(list_dic[4].values())\n",
+    "    \n",
+    "    error_matrix = np.zeros((512,640))\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
+    "                \n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                z0 = error_matrix[i-1][j-1]\n",
+    "                z1 = error_matrix[i-1][j]\n",
+    "                z2 = error_matrix[i-1][j+1]\n",
+    "                z3 = error_matrix[i][j-1]\n",
+    "                y0 = int(-z0+z2-z3)\n",
+    "                y1 = int(z0+z1+z2)\n",
+    "                y2 = int(-z0-z1-z2-z3)\n",
+    "                y = np.vstack((y0,y1,y2))\n",
+    "                difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
+    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
+    "\n",
+    "                if difference <= bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[1] and difference > bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[2] and difference > bins[1]:\n",
+    "                    error_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                else:\n",
+    "                    error_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                \n",
+    "                \n",
+    "    return error_matrix.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "9200fa53",
+   "id": "96a73eba",
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "bins = [25,40,70]\n",
+    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "list_dic, image, error, new_error, diff, bound, predict = huffman(images[0], bins)\n",
+    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), list_dic, diff, bound, bins)\n",
+    "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
+    "print(np.allclose(image.reshape(512,640), reconstruct_image))"
+   ]
  }
 ],
 "metadata": {

--- a/Compression_Rate_Kelly.ipynb
+++ b/Compression_Rate_Kelly.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "id": "8868bc30",
   "metadata": {},
   "outputs": [],
@@ -19,12 +19,13 @@
    "from sklearn.neighbors import KernelDensity\n",
    "import pandas as pd\n",
    "from collections import Counter\n",
-    "import time"
+    "import time\n",
+    "import numpy.linalg as la"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "id": "0f944705",
   "metadata": {},
   "outputs": [],
@@ -87,7 +88,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "id": "b18d5e38",
   "metadata": {},
   "outputs": [],
@@ -126,15 +127,75 @@
    "    diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
    "    \n",
    "    # flatten the image to a vector\n",
-    "    image = np.ravel(image[1:-1,1:-1])\n",
+    "    #image = np.ravel(image[1:-1,1:-1])\n",
+    "    image = np.ravel(image)\n",
    "    error = image-predict\n",
    "    \n",
-    "    return image, predict, diff, error, A"
+    "    return image, predict, diff, error, A\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b0c3eaa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_hist_lstsq(tiff_list):\n",
+    "\n",
+    "    image = tiff_list\n",
+    "    image = Image.open(image)    #Open the image and read it as an Image object\n",
+    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image = image.astype(int)\n",
+    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation\n",
+    "    z0 = image[0:-2,0:-2]   # get all the first pixel for the entire image\n",
+    "    z1 = image[0:-2,1:-1]   # get all the second pixel for the entire image\n",
+    "    z2 = image[0:-2,2::]    # get all the third pixel for the entire image\n",
+    "    z3 = image[1:-1,0:-2]   # get all the forth pixel for the entire image\n",
+    "    # calculate the out put of the system of equation\n",
+    "    y0 = np.ravel(-z0+z2-z3)\n",
+    "    y1 = np.ravel(z0+z1+z2)\n",
+    "    y2 = np.ravel(-z0-z1-z2-z3)\n",
+    "    y = np.vstack((y0,y1,y2))\n",
+    "    # use numpy solver to solve the system of equations all at once\n",
+    "    #predict = np.floor(np.linalg.solve(A,y)[-1])\n",
+    "    predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))\n",
+    "    \n",
+    "    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])\n",
+    "    #fit = la.solve(A,y)\n",
+    "    \n",
+    "    #mse_start = (points@fit).T\n",
+    "    \n",
+    "    \n",
+    "    # flatten the neighbor pixels and stack them together\n",
+    "    z0 = np.ravel(z0)\n",
+    "    z1 = np.ravel(z1)\n",
+    "    z2 = np.ravel(z2)\n",
+    "    z3 = np.ravel(z3)\n",
+    "    neighbor = np.vstack((z0,z1,z2,z3)).T\n",
+    "    \n",
+    "    f, res, rank, s = la.lstsq(points, neighbor.T, rcond=None)      \n",
+    "    \n",
+    "    #mse_finish = (neighbor-mse_start)**2\n",
+    "    #lstsqur = np.sum(mse_finish, axis=1) / 4\n",
+    "    \n",
+    "    # calculate the difference\n",
+    "    diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
+    "    \n",
+    "    # flatten the image to a vector\n",
+    "    #image = np.ravel(image[1:-1,1:-1])\n",
+    "    error = np.ravel(image[1:-1,1:-1])-predict\n",
+    "    \n",
+    "    return image, predict, res, error, A, diff\n",
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "i, p, l, e, A, d = plot_hist_lstsq(images[0])"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "id": "35d4f6a0",
   "metadata": {},
   "outputs": [],
@@ -190,7 +251,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
   "id": "c50169ed",
   "metadata": {},
   "outputs": [],
@@ -250,13 +311,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "205c4731",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def huffman(image):\n",
+    "def huffman(image, res = False):\n",
-    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    if res:\n",
+    "        image, predict, res, error, A, diff = plot_hist_lstsq(images[0])\n",
+    "        il = l.astype(int)\n",
+    "        bins = []\n",
+    "        num_bins = 5\n",
+    "        data_points_per_bin = len(il) // num_bins   #l is the list of data that you want to create bins for\n",
+    "\n",
+    "        sorted_l = il.copy()\n",
+    "        sorted_l.sort()\n",
+    "\n",
+    "        bins = [sorted_l[_ * data_points_per_bin: (_+1)*data_points_per_bin] for _ in range(num_bins)]\n",
+    "    else:\n",
+    "        origin, predict, diff, error, A = plot_hist(image)\n",
    "    \n",
    "    image = Image.open(image)\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
@@ -365,7 +438,7 @@
   "id": "2e84c206",
   "metadata": {},
   "source": [
-    "### Huffman dividing into bins"
+    "### Huffman dividing into uniform bins"
   ]
  },
  {
@@ -380,7 +453,8 @@
    "    image = Image.open(image)\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
-    "\n",
+    "    \n",
+    "    bins = np.linspace(min(diff),max(diff),5)[1:-1]\n",
    "    \n",
    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
    "    boundary = boundary - image[0,0]\n",
@@ -393,7 +467,7 @@
    "    encode1 = huffman_code_tree(node)\n",
    "    \n",
    "    \n",
-    "    mask = diff <= 100\n",
+    "    mask = diff <= bins[0]\n",
    "    string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
@@ -401,20 +475,20 @@
    "    encode2 = huffman_code_tree(node)\n",
    "\n",
    "    \n",
-    "    mask = diff > 100\n",
+    "    mask = diff > bins[0]\n",
-    "    #new_error = error[mask]\n",
+    "    new_error = error[mask]\n",
-    "    #mask2 = diff[mask] <= 200\n",
+    "    mask2 = diff[mask] <= bins[1]\n",
-    "    #string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    #string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
    "    encode3 = huffman_code_tree(node)\n",
    "    \n",
    "\n",
-    "    '''mask = diff > 200\n",
+    "    mask = diff > bins[1]\n",
    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= 300\n",
+    "    mask2 = diff[mask] <= bins[2]\n",
    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
@@ -422,12 +496,13 @@
    "    encode4 = huffman_code_tree(node)\n",
    "    \n",
    "    \n",
-    "    mask = diff > 300\n",
+    "    mask = diff > bins[2]\n",
    "    string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode5 = huffman_code_tree(node)'''\n",
+    "    encode5 = huffman_code_tree(node)\n",
+    "    \n",
    "\n",
    "    \n",
    "    \n",
@@ -444,10 +519,10 @@
    "    \n",
    "    # return the huffman dictionary\n",
    "    #return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, diff, boundary\n",
-    "    return encode1, encode2, encode3, np.ravel(image), error, diff, boundary\n",
+    "    return [encode1, encode2, encode3, encode4, encode5], np.ravel(image), error, diff, boundary, bins\n",
    "\n",
    "#def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3, encode4, encode5):\n",
-    "def compress_rate_u(image, error, diff, bound, encode1, encode2, encode3):\n",
+    "def compress_rate_u(image, error, diff, bound, list_dic, bins):\n",
    "    #original = original.reshape(-1)\n",
    "    #error = error.reshape(-1)\n",
    "    o_len = 0\n",
@@ -458,47 +533,31 @@
    "\n",
    "    for i in range(0,len(bound)):\n",
    "        o_len += len(bin(real_b[i])[2:])\n",
-    "        c_len += len(encode1[str(bound[i])])\n",
+    "        c_len += len(list_dic[0][str(bound[i])])\n",
    "    \n",
    "    for i in range(0, len(original)):\n",
    "        o_len += len(bin(original[i])[2:])\n",
-    "        if diff[i] <= 100:\n",
+    "        if diff[i] <= bins[0]:\n",
-    "            c_len += len(encode2[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
    "            \n",
-    "        if diff[i] > 100:\n",
+    "        if diff[i] <= bins[1] and diff[i] > bins[0]:\n",
-    "            c_len += len(encode3[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[2][str(int(error[i]))])\n",
-    "\n",
-    "        '''if diff[i] <= 200 and diff[i] > 100:\n",
-    "            c_len += len(encode3[str(int(error[i]))])'''\n",
    "   \n",
-    "        '''if diff[i] <= 300 and diff[i] > 200:\n",
+    "        if diff[i] <= bins[2] and diff[i] > bins[1]:\n",
-    "            c_len += len(encode4[str(int(error[i]))])\n",
+    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
    "            \n",
-    "        if diff[i] > 300:\n",
+    "        if diff[i] > bins[2]:\n",
-    "            c_len += len(encode5[str(int(error[i]))])'''\n",
+    "            c_len += len(list_dic[4][str(int(error[i]))])\n",
    " \n",
-    "    return c_len/o_len\n",
+    "    return c_len/o_len\n"
-    "scenes = file_extractor()\n",
-    "images = image_extractor(scenes)\n",
-    "encode1, encode2, encode3, image, error, diff, boundary = huffman_u(images[0])\n",
-    "compress_rate_u(image, error, diff, boundary, encode1, encode2, encode3)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
   "id": "e1ce9912",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Compression rate of huffman with different bins: 0.40459069242931545\n",
-      "Compression rate of huffman without bins: 0.410545890687004\n"
-     ]
-    }
-   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
@@ -512,29 +571,40 @@
    "    rate.append(r)\n",
    "    \n",
    "    encoding, error, image = huffman_nb(images[i])\n",
-    "    \n",
    "    r = compress_rate_nb(image, error, encoding)\n",
    "    rate_nb.append(r)\n",
    "    \n",
-    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "    list_dic, image, error, diff, bound, bins = huffman_u(images[i])\n",
-    "print(f\"Compression rate of huffman without bins: {np.mean(rate_nb)}\")\n"
+    "    r = compress_rate_u(image, error, diff, bound, list_dic, bins)\n",
+    "    rate_u.append(r)\n",
+    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "7a9a31f6",
+   "id": "c71591f2",
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "print(f\"Compression rate of huffman with uniform bins: {np.mean(rate_u)}\")\n",
+    "print(f\"Compression rate of huffman without bins: {np.mean(rate_nb)}\")"
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "c71591f2",
+   "id": "d3e6d819",
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "mask = np.array(rate) < np.array(rate_u)\n",
+    "print(sum(mask)/len(mask))\n",
+    "\n",
+    "mask = np.array(rate) < np.array(rate_nb)\n",
+    "print(sum(mask)/len(mask))"
+   ]
  },
  {
   "cell_type": "code",
@@ -542,6 +612,416 @@
   "id": "9200fa53",
   "metadata": {},
   "outputs": [],
+   "source": [
+    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
+    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "    \n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[0]:\n",
+    "                encoded[i][j] = list_dic[1][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:\n",
+    "                encoded[i][j] = list_dic[2][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:\n",
+    "                encoded[i][j] = list_dic[3][encoded[i][j]]\n",
+    "            else: \n",
+    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
+    "\n",
+    "                \n",
+    "    return encoded\n",
+    "\n",
+    "def decoder(A, encoded_matrix, list_dic, bins):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    the_keys0 = list(list_dic[0].keys())\n",
+    "    the_values0 = list(list_dic[0].values())\n",
+    "    \n",
+    "    the_keys1 = list(list_dic[1].keys())\n",
+    "    the_values1 = list(list_dic[1].values())\n",
+    "    \n",
+    "    the_keys2 = list(list_dic[2].keys())\n",
+    "    the_values2 = list(list_dic[2].values())\n",
+    "    \n",
+    "    the_keys3 = list(list_dic[3].keys())\n",
+    "    the_values3 = list(list_dic[3].values())\n",
+    "    \n",
+    "    the_keys4 = list(list_dic[4].keys())\n",
+    "    the_values4 = list(list_dic[4].values())\n",
+    "    \n",
+    "    error_matrix = np.zeros((512,640))\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
+    "                \n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                z0 = error_matrix[i-1][j-1]\n",
+    "                z1 = error_matrix[i-1][j]\n",
+    "                z2 = error_matrix[i-1][j+1]\n",
+    "                z3 = error_matrix[i][j-1]\n",
+    "                y0 = int(-z0+z2-z3)\n",
+    "                y1 = int(z0+z1+z2)\n",
+    "                y2 = int(-z0-z1-z2-z3)\n",
+    "                y = np.vstack((y0,y1,y2))\n",
+    "                difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
+    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
+    "\n",
+    "                if difference <= bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[1] and difference > bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[2] and difference > bins[1]:\n",
+    "                    error_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                else:\n",
+    "                    error_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                \n",
+    "                \n",
+    "    return error_matrix.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23a39f8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "bins = [25,40,70]\n",
+    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "list_dic, image, error, new_error, diff, bound, predict = huffman(images[0], bins)\n",
+    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), list_dic, diff, bound, bins)\n",
+    "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
+    "print(np.allclose(image.reshape(512,640), reconstruct_image))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d0e4be69",
+   "metadata": {},
+   "source": [
+    "## use res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "id": "bd7e39d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def huffman(image, num_bins, use_res = False):\n",
+    "    if use_res:\n",
+    "        image, predict, res, error, A, diff = plot_hist_lstsq(image)\n",
+    "        il = res.astype(int)\n",
+    "        data_points_per_bin = len(il) // num_bins   #l is the list of data that you want to create bins for\n",
+    "\n",
+    "        sorted_l = il.copy()\n",
+    "        sorted_l.sort()\n",
+    "        bins = [sorted_l[i*data_points_per_bin] for i in range(1,num_bins)]\n",
+    "                \n",
+    "        boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "        boundary = boundary - image[0,0]\n",
+    "        boundary[0] = image[0,0]\n",
+    "\n",
+    "        string = [str(i) for i in boundary]\n",
+    "        freq = dict(Counter(string))\n",
+    "        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "        node = make_tree(freq)\n",
+    "        encode = huffman_code_tree(node)\n",
+    "\n",
+    "        list_dic = [encode]\n",
+    "        n = len(bins)\n",
+    "        \n",
+    "        for i in range (0,n):\n",
+    "            if i == 0 :\n",
+    "                mask = res <= bins[i]\n",
+    "                string = [str(i) for i in error[mask].astype(int)]\n",
+    "                freq = dict(Counter(string))\n",
+    "                freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "                node = make_tree(freq)\n",
+    "                encode = huffman_code_tree(node)\n",
+    "                list_dic.append(encode)\n",
+    "\n",
+    "            else:\n",
+    "                mask = res > bins[i-1]\n",
+    "                new_error = error[mask]\n",
+    "                mask2 = res[mask] <= bins[i]\n",
+    "                string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "                freq = dict(Counter(string))\n",
+    "                freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "                node = make_tree(freq)\n",
+    "                encode = huffman_code_tree(node)\n",
+    "                list_dic.append(encode)\n",
+    "\n",
+    "        mask = res > bins[-1]\n",
+    "        string = [str(i) for i in error[mask].astype(int)]\n",
+    "        freq = dict(Counter(string))\n",
+    "        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "        node = make_tree(freq)\n",
+    "        encode = huffman_code_tree(node)\n",
+    "        list_dic.append(encode)\n",
+    "    else:\n",
+    "        image, predict, res, error, A, diff = plot_hist_lstsq(image)\n",
+    "        data_points_per_bin = len(diff) // num_bins   #l is the list of data that you want to create bins for\n",
+    "\n",
+    "        sorted_l = diff.copy()\n",
+    "        sorted_l.sort()\n",
+    "        bins = [sorted_l[i*data_points_per_bin] for i in range(1,num_bins)]\n",
+    "        boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "        boundary = boundary - image[0,0]\n",
+    "        boundary[0] = image[0,0]\n",
+    "\n",
+    "        string = [str(i) for i in boundary]\n",
+    "        freq = dict(Counter(string))\n",
+    "        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "        node = make_tree(freq)\n",
+    "        encode = huffman_code_tree(node)\n",
+    "        \n",
+    "        list_dic = [encode]\n",
+    "        n = len(bins)\n",
+    "        for i in range (0,n):\n",
+    "            if i == 0 :\n",
+    "                mask = diff <= bins[i]\n",
+    "                string = [str(i) for i in error[mask].astype(int)]\n",
+    "                freq = dict(Counter(string))\n",
+    "                freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "                node = make_tree(freq)\n",
+    "                encode = huffman_code_tree(node)\n",
+    "                list_dic.append(encode)\n",
+    "\n",
+    "            else:\n",
+    "                mask = diff > bins[i-1]\n",
+    "                new_error = error[mask]\n",
+    "                mask2 = diff[mask] <= bins[i]\n",
+    "                string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "                freq = dict(Counter(string))\n",
+    "                freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "                node = make_tree(freq)\n",
+    "                encode = huffman_code_tree(node)\n",
+    "                list_dic.append(encode)\n",
+    "            \n",
+    "        mask = diff > bins[-1]\n",
+    "        string = [str(i) for i in error[mask].astype(int)]\n",
+    "        freq = dict(Counter(string))\n",
+    "        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "        node = make_tree(freq)\n",
+    "        encode = huffman_code_tree(node)\n",
+    "        list_dic.append(encode)\n",
+    "\n",
+    "    new_error = np.copy(image)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "    \n",
+    "    # return the huffman dictionary\n",
+    "    return list_dic, np.ravel(image), error, new_error, diff, boundary, predict, bins, res\n",
+    " \n",
+    "\n",
+    "\n",
+    "def compress_rate(image, error, diff, bound, list_dic, bins, res, use_res = False):\n",
+    "    \n",
+    "    o_len = 0\n",
+    "    c_len = 0\n",
+    "    im = np.reshape(image,(512, 640))\n",
+    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
+    "    original = im[1:-1,1:-1].reshape(-1)\n",
+    "    diff = diff.reshape(-1)\n",
+    "    \n",
+    "    # calculate the bit for boundary\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        c_len += len(list_dic[0][str(bound[i])])\n",
+    "    if use_res:\n",
+    "        for i in range(0,len(original)):\n",
+    "            o_len += len(bin(original[i])[2:])\n",
+    "            if res[i] <= bins[0]:\n",
+    "                c_len += len(list_dic[1][str(int(error[i]))])\n",
+    "\n",
+    "            elif res[i] <= bins[1] and res[i] > bins[0]:\n",
+    "                c_len += len(list_dic[2][str(int(error[i]))])\n",
+    "\n",
+    "            elif res[i] <= bins[2] and res[i] > bins[1]:\n",
+    "                c_len += len(list_dic[3][str(int(error[i]))])\n",
+    "\n",
+    "            elif res[i] <= bins[3] and res[i] > bins[2]:\n",
+    "                c_len += len(list_dic[4][str(int(error[i]))])\n",
+    "                \n",
+    "            elif res[i] <= bins[4] and res[i] > bins[3]:\n",
+    "                c_len += len(list_dic[5][str(int(error[i]))])\n",
+    "                \n",
+    "            else: \n",
+    "                c_len += len(list_dic[6][str(int(error[i]))])\n",
+    "                \n",
+    "    else:\n",
+    "        for i in range(0, len(original)):\n",
+    "            o_len += len(bin(original[i])[2:])\n",
+    "            if diff[i] <= bins[0]:\n",
+    "                c_len += len(list_dic[1][str(int(error[i]))])\n",
+    "            \n",
+    "            elif diff[i] <= bins[1] and diff[i] > bins[0]:\n",
+    "                c_len += len(list_dic[2][str(int(error[i]))])\n",
+    "\n",
+    "            elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
+    "                c_len += len(list_dic[3][str(int(error[i]))])\n",
+    "            else: \n",
+    "                c_len += len(list_dic[4][str(int(error[i]))])\n",
+    "            '''    \n",
+    "            elif diff[i] <= bins[3] and diff[i] > bins[2]:\n",
+    "                c_len += len(list_dic[4][str(int(error[i]))])'''\n",
+    "            \n",
+    "            \n",
+    "            '''elif diff[i] <= bins[4] and diff[i] > bins[3]:\n",
+    "                c_len += len(list_dic[5][str(int(error[i]))])\n",
+    "                \n",
+    "            else: \n",
+    "                c_len += len(list_dic[6][str(int(error[i]))])'''\n",
+    "            \n",
+    "\n",
+    "\n",
+    "    return c_len/o_len\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b2e63e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "rate = []\n",
+    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "for i in range(len(images)):\n",
+    "    list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[i], 6, True)\n",
+    "    r = compress_rate(image, error, diff, bound, list_dic, bins, res, use_res = True)\n",
+    "    rate.append(r)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2eaf807",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Compression rate of huffman with different bins in res: {np.mean(rate)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "faece884",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "rate = []\n",
+    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "start = time.time()\n",
+    "for i in range(len(images)):\n",
+    "    list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[i], 6, False)\n",
+    "    r = compress_rate(image, error, diff, bound, list_dic, bins, res, False)\n",
+    "    rate.append(r)\n",
+    "end = time.time()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7ca02b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "print(f\"time: {end-start}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cf44d7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start = time.time()\n",
+    "for i in range(len(images)):\n",
+    "    list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[i], 5, False)\n",
+    "    r = compress_rate(image, error, diff, bound, list_dic, bins, res, False)\n",
+    "    rate.append(r)\n",
+    "end = time.time()\n",
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "print(f\"time: {end-start}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "id": "d4051c85",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Compression rate of huffman with different bins: 0.4045967177402825\n",
+      "time: 109.86948299407959\n"
+     ]
+    }
+   ],
+   "source": [
+    "start = time.time()\n",
+    "for i in range(len(images)):\n",
+    "    list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[i], 4, False)\n",
+    "    r = compress_rate(image, error, diff, bound, list_dic, bins, res, False)\n",
+    "    rate.append(r)\n",
+    "end = time.time()\n",
+    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n",
+    "print(f\"time: {end-start}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46f62de1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# [15,25,45,75] 0.40429\n",
+    "# [15,25,35,45,55,65,75] 0.404036\n",
+    "# [25,40,70] 0.40459\n",
+    "# [50, 100, 150] 0.40698\n",
+    "# [30, 50, 100] 0.40497\n",
+    "# [5,15,25,35,45,55,65,75] 0.4040099826388888\n",
+    "# [5,15,30,45,75] 0.4043133622426835\n",
+    "# res with four dividing 0.4080680784195189\n",
+    "# diff with four dividing 0.40421\n",
+    "# diff with 3 bins 0.40459"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3e3d266",
+   "metadata": {},
+   "outputs": [],
   "source": []
  }
 ],

--- a/Encoding_decoding.ipynb
+++ b/Encoding_decoding.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 54,
   "id": "14f74f21",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 55,
   "id": "c16af61f",
   "metadata": {},
   "outputs": [],
@@ -49,15 +49,6 @@
    "                continue\n",
    "            else:\n",
    "                image_folder.append(os.path.join(scene, file))\n",
-    "    '''print(image_folder)\n",
-    "    images = []\n",
-    "    for folder in image_folder:\n",
-    "        ims = os.listdir(folder)\n",
-    "        for im in ims:\n",
-    "            if im[-4:] == \".jp4\" or im[-7:] == \"_6.tiff\":\n",
-    "                continue\n",
-    "            else:\n",
-    "                images.append(os.path.join(folder, im))'''\n",
    "    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
    "\n",
    "def im_distribution(images, num):\n",
@@ -87,7 +78,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 56,
   "id": "aceba613",
   "metadata": {},
   "outputs": [],
@@ -124,17 +115,15 @@
    "    neighbor = np.vstack((z0,z1,z2,z3)).T\n",
    "    # calculate the difference\n",
    "    diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
-    "    \n",
+    "    # calculate the error\n",
-    "    # flatten the image to a vector\n",
+    "    error = np.ravel(image[1:-1,1:-1])-predict\n",
-    "    image = np.ravel(image[1:-1,1:-1])\n",
-    "    error = image-predict\n",
    "    \n",
    "    return image, predict, diff, error, A"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 57,
   "id": "6b965751",
   "metadata": {},
   "outputs": [],
@@ -182,65 +171,78 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 58,
   "id": "b7561883",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def huffman(image, bins):\n",
+    "def huffman(image, num_bins):\n",
-    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    # get the prediction error and difference\n",
+    "    image, predict, diff, error, A = plot_hist(image)\n",
    "    \n",
-    "    image = Image.open(image)\n",
+    "    # get the number of points in each bins\n",
-    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    data_points_per_bin = len(diff) // num_bins\n",
-    "    image = image.astype(int)\n",
    "    \n",
+    "    # sort the difference and create the bins\n",
+    "    sorted_diff = diff.copy()\n",
+    "    sorted_diff.sort()\n",
+    "    bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
+    "    \n",
+    "    # get the boundary \n",
    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    \n",
+    "    # take the difference of the boundary with the very first pixel\n",
    "    boundary = boundary - image[0,0]\n",
    "    boundary[0] = image[0,0]\n",
-    "\n",
-    "    string = [str(i) for i in boundary]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode1 = huffman_code_tree(node)\n",
    "    \n",
-    "    \n",
+    "    # huffman encode the boundary\n",
-    "    mask = diff <= bins[0]\n",
+    "    string = [str(i) for i in boundary]\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode2 = huffman_code_tree(node)\n",
-    "\n",
-    "    \n",
-    "    mask = diff > bins[0]\n",
-    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= bins[1]\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode3 = huffman_code_tree(node)\n",
-    "    \n",
-    "\n",
-    "    mask = diff > bins[1]\n",
-    "    new_error = error[mask]\n",
-    "    mask2 = diff[mask] <= bins[2]\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode4 = huffman_code_tree(node)\n",
+    "    encode = huffman_code_tree(node)\n",
    "    \n",
+    "    # create a list of huffman table\n",
+    "    list_dic = [encode]\n",
+    "    n = len(bins)\n",
    "    \n",
-    "    mask = diff > bins[2]\n",
+    "    # loop through different bins\n",
+    "    for i in range (0,n):\n",
+    "        # the fisrt bin\n",
+    "        if i == 0 :\n",
+    "            # get the point within the bin and huffman encode\n",
+    "            mask = diff <= bins[i]\n",
+    "            string = [str(i) for i in error[mask].astype(int)]\n",
+    "            freq = dict(Counter(string))\n",
+    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "            node = make_tree(freq)\n",
+    "            encode = huffman_code_tree(node)\n",
+    "            list_dic.append(encode)\n",
+    "            \n",
+    "        # the middle bins\n",
+    "        else:\n",
+    "            # get the point within the bin and huffman encode\n",
+    "            mask = diff > bins[i-1]\n",
+    "            new_error = error[mask]\n",
+    "            mask2 = diff[mask] <= bins[i]\n",
+    "            string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "            freq = dict(Counter(string))\n",
+    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "            node = make_tree(freq)\n",
+    "            encode = huffman_code_tree(node)\n",
+    "            list_dic.append(encode)\n",
+    "            \n",
+    "    # the last bin       \n",
+    "    # get the point within the bin and huffman encode\n",
+    "    mask = diff > bins[-1]\n",
    "    string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode5 = huffman_code_tree(node)\n",
+    "    encode = huffman_code_tree(node)\n",
-    "\n",
+    "    list_dic.append(encode)\n",
    "\n",
+    "    # create a error matrix that includes the boundary (used in encoding matrix)\n",
    "    new_error = np.copy(image)\n",
    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
    "    keep = new_error[0,0]\n",
@@ -250,28 +252,23 @@
    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
    "    new_error[0,0] = keep\n",
    "    \n",
-    "    \n",
-    "    #new_error = np.ravel(new_error)\n",
-    "    \n",
-    "    \n",
-    "    list_dic = [encode1, encode2, encode3, encode4, encode5]\n",
    "    # return the huffman dictionary\n",
-    "    return list_dic, np.ravel(image), error, new_error, diff, boundary, predict\n",
+    "    return list_dic, np.ravel(image), error, new_error, diff, boundary, predict, bins\n",
    " \n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 59,
   "id": "2eb774d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    # copy the error matrix (including the boundary)\n",
    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
-    "    \n",
    "    diff = np.reshape(diff,(510,638))\n",
-    "    \n",
+    "    # loop through all the pixel to encode\n",
    "    for i in range(encoded.shape[0]):\n",
    "        for j in range(encoded.shape[1]):\n",
    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
@@ -285,13 +282,12 @@
    "            else: \n",
    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
    "\n",
-    "                \n",
    "    return encoded"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 60,
   "id": "8eeb40d0",
   "metadata": {},
   "outputs": [],
@@ -301,7 +297,7 @@
    "    Function that accecpts the prediction matrix A for the linear system,\n",
    "    the encoded matrix of error values, and the encoding dicitonary.\n",
    "    \"\"\"\n",
-    "\n",
+    "    # change the dictionary back to list\n",
    "    the_keys0 = list(list_dic[0].keys())\n",
    "    the_values0 = list(list_dic[0].values())\n",
    "    \n",
@@ -318,15 +314,18 @@
    "    the_values4 = list(list_dic[4].values())\n",
    "    \n",
    "    error_matrix = np.zeros((512,640))\n",
-    "    \n",
+    "    # loop through all the element in the matrix\n",
    "    for i in range(error_matrix.shape[0]):\n",
    "        for j in range(error_matrix.shape[1]):\n",
+    "            # if it's the very first pixel on the image\n",
    "            if i == 0 and j == 0:\n",
    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
-    "                \n",
+    "            # if it's on the boundary\n",
    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            # if not the boundary\n",
    "            else:\n",
+    "                # predict the image with the known pixel value\n",
    "                z0 = error_matrix[i-1][j-1]\n",
    "                z1 = error_matrix[i-1][j]\n",
    "                z2 = error_matrix[i-1][j+1]\n",
@@ -337,7 +336,8 @@
    "                y = np.vstack((y0,y1,y2))\n",
    "                difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
-    "\n",
+    "                \n",
+    "                # add on the difference by searching the dictionary\n",
    "                if difference <= bins[0]:\n",
    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
    "                elif difference <= bins[1] and difference > bins[0]:\n",
@@ -353,17 +353,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 61,
   "id": "f959fe93",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compress_rate(image, error, diff, bound, list_dic, bins):\n",
-    "    #original = original.reshape(-1)\n",
+    "    # the bits for the original image\n",
-    "    #error = error.reshape(-1)\n",
-    "\n",
    "    o_len = 0\n",
+    "    # the bits for the compressed image\n",
    "    c_len = 0\n",
+    "    # initializing the varible \n",
    "    im = np.reshape(image,(512, 640))\n",
    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
    "    original = im[1:-1,1:-1].reshape(-1)\n",
@@ -374,9 +374,12 @@
    "        o_len += len(bin(real_b[i])[2:])\n",
    "        c_len += len(list_dic[0][str(bound[i])])\n",
    "    \n",
-    "    \n",
+    "    # calculate the bit for the pixels inside the boundary\n",
    "    for i in range(0,len(original)):\n",
+    "        # for the original image\n",
    "        o_len += len(bin(original[i])[2:])\n",
+    "        \n",
+    "        # check the difference and find the coresponding huffman table\n",
    "        if diff[i] <= bins[0]:\n",
    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
    "            \n",
@@ -385,247 +388,68 @@
    "            \n",
    "        elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
-    "        else: \n",
-    "            c_len += len(list_dic[4][str(int(error[i]))])\n",
    "\n",
+    "        else: \n",
+    "            c_len += len(list_dic[5][str(int(error[i]))])\n",
    "\n",
    "    return c_len/o_len"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": null,
   "id": "3e0e9742",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_3109/243266254.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mlist_dic\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_error\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhuffman\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0mencoded_matrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mencoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_error\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m512\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m640\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist_dic\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdiff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m     \u001b[0mreconstruct_image\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdecoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoded_matrix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist_dic\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m512\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m640\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreconstruct_image\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/var/folders/z2/plvrsqjs023g1cmx7k19mhzr0000gn/T/ipykernel_3109/3881165701.py\u001b[0m in \u001b[0;36mdecoder\u001b[0;34m(A, encoded_matrix, list_dic, bins)\u001b[0m\n\u001b[1;32m     39\u001b[0m                 \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m                 \u001b[0mdifference\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mz0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz3\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mz0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m                 \u001b[0mpredict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mround\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mround\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msolve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     43\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mdifference\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mround_\u001b[0;34m(*args, **kwargs)\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mround_\u001b[0;34m(a, decimals, out)\u001b[0m\n\u001b[1;32m   3635\u001b[0m     \u001b[0maround\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mequivalent\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0msee\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdetails\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3636\u001b[0m     \"\"\"\n\u001b[0;32m-> 3637\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0maround\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecimals\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecimals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3638\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3639\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36maround\u001b[0;34m(*args, **kwargs)\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36maround\u001b[0;34m(a, decimals, out)\u001b[0m\n\u001b[1;32m   3260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3261\u001b[0m     \"\"\"\n\u001b[0;32m-> 3262\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_wrapfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'round'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecimals\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecimals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3263\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36m_wrapfunc\u001b[0;34m(obj, method, *args, **kwds)\u001b[0m\n\u001b[1;32m     56\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     59\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m         \u001b[0;31m# A TypeError occurs if the object does have such a method in its\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "bins = [25,40,70]\n",
+    "#bins = [25,40,70]\n",
    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
-    "for image in images:\n",
+    "list_dic, image, error, new_error, diff, bound, predict, bins = huffman(images[0], 4)\n",
-    "    list_dic, image, error, new_error, diff, bound, predict = huffman(image, bins)\n",
+    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), list_dic, diff, bound, bins)\n",
-    "    encoded_matrix = encoder(np.reshape(new_error,(512,640)), list_dic, diff, bound, bins)\n",
+    "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
-    "    reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
+    "print(np.allclose(image.reshape(512,640), reconstruct_image))\n",
-    "    print(np.allclose(image.reshape(512,640), reconstruct_image))\n"
+    "print(len(list_dic))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": null,
-   "id": "ceb0b957",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 120,
-   "id": "60297ad0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 120,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 79,
   "id": "f0948ab2",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1626032871_679543_6.tiff\n",
-      "1640843156_813147\n",
-      "1640843156_729797\n",
-      "1640843156_846487\n",
-      "1640843156_829817\n",
-      "1640843156_763137\n",
-      "1640843156_779807\n",
-      "1640843156_796477\n",
-      "1640843156_746467\n",
-      "1626032832_004943_6.tiff\n",
-      "1626032832_004943_18.jp4\n",
-      "1626032832_004943_19.jp4\n",
-      "1626032832_004943_17.jp4\n",
-      "1626032832_004943_16.jp4\n",
-      "1626033289_579773_6.tiff\n",
-      "1626033496_437803_16.jp4\n",
-      "1626033496_437803_17.jp4\n",
-      "1626033496_437803_19.jp4\n",
-      "1626033496_437803_18.jp4\n",
-      "1626033496_437803_6.tiff\n",
-      "1626032610_393963_6.tiff\n",
-      "1626033067_752083_19.jp4\n",
-      "1626033067_752083_18.jp4\n",
-      "1626033067_752083_6.tiff\n",
-      "1626033067_752083_16.jp4\n",
-      "1626033067_752083_17.jp4\n",
-      "1626032738_736293_6.tiff\n",
-      "Compression rate of huffman with different bins: 0.40459069242931545\n"
-     ]
-    }
-   ],
   "source": [
-    "scenes = file_extractor()\n",
+    "rate1 = []\n",
-    "images = image_extractor(scenes)\n",
+    "rate2 = []\n",
-    "rate = []\n",
+    "rate3 = []\n",
-    "\n",
+    "bins1 = [25,40,70]\n",
-    "\n",
+    "bins2 = [50,100,150]\n",
+    "bins3 = [30,50,100]\n",
+    "B = [bins1, bins2, bins3]\n",
    "for i in range(len(images)):\n",
-    "    list_dic, image, error, new_error, diff, bound, bins, predict = huffman(images[i])\n",
+    "    for j, bins in enumerate(B):\n",
-    "    r = compress_rate(image, error, diff, bound, list_dic, bins)\n",
+    "        list_dic, image, error, new_error, diff, bound, predict = huffman(images[i], bins)\n",
-    "    rate.append(r)\n",
+    "        r = compress_rate(image, error, diff, bound, list_dic, bins)\n",
-    "    \n",
+    "        if j == 0:\n",
-    "    \n",
+    "            rate1.append(r)\n",
-    "print(f\"Compression rate of huffman with different bins: {np.mean(rate)}\")\n"
+    "        elif j == 1:\n",
+    "            rate2.append(r)\n",
+    "        else:\n",
+    "            rate3.append(r)\n",
+    "    "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
-   "id": "792b7936",
+   "id": "7d615dcd",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0.41151123046875, 0.4466471354166667, 0.38627176920572914, 0.41287577311197915, 0.3977305094401042, 0.41076253255208334]\n"
-     ]
-    }
-   ],
   "source": [
-    "print(rate)"
+    "print(f\"Compression rate of huffman with bins {bins1}: {np.mean(rate1)}\")\n",
+    "print(f\"Compression rate of huffman with bins {bins2}: {np.mean(rate2)}\")\n",
+    "print(f\"Compression rate of huffman with bins {bins3}: {np.mean(rate3)}\")\n"
   ]
  },
  {