kelly changes

1e1a4ced · Kelly Chang · 829e9198 · 1e1a4ced · 1e1a4ced · 1e1a4ced
Commit 1e1a4ced authored Apr 14, 2022 by Kelly Chang
3 changed files
--- a/.ipynb_checkpoints/Encoding_decoding-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Encoding_decoding-checkpoint.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 12,
   "id": "14f74f21",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 13,
   "id": "c16af61f",
   "metadata": {},
   "outputs": [],
@@ -33,7 +33,10 @@
    "    files = os.listdir(dirname)\n",
    "    scenes = []\n",
    "    for file in files:\n",
-    "        scenes.append(os.path.join(dirname, file))\n",
+    "        if file == '.DS_Store':\n",
+    "            continue\n",
+    "        else:\n",
+    "            scenes.append(os.path.join(dirname, file))\n",
    "    return scenes\n",
    "\n",
    "def image_extractor(scenes):\n",
@@ -41,16 +44,12 @@
    "    for scene in scenes:\n",
    "        files = os.listdir(scene)\n",
    "        for file in files:\n",
-    "            image_folder.append(os.path.join(scene, file))\n",
+    "            #if file[-4:] == \".jp4\" or file[-7:] == \"_6.tiff\":\n",
-    "    images = []\n",
+    "            if file[-5:] != \".tiff\" or file[-7:] == \"_6.tiff\":\n",
-    "    for folder in image_folder:\n",
-    "        ims = os.listdir(folder)\n",
-    "        for im in ims:\n",
-    "            if im[-4:] == \".jp4\" or im[-7:] == \"_6.tiff\":\n",
    "                continue\n",
    "            else:\n",
-    "                images.append(os.path.join(folder, im))\n",
+    "                image_folder.append(os.path.join(scene, file))\n",
-    "    return images #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
+    "    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n",
    "\n",
    "def im_distribution(images, num):\n",
    "    \"\"\"\n",
@@ -79,23 +78,39 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 14,
   "id": "aceba613",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def plot_hist(tiff_list):\n",
+    "def predict_pix(tiff_image):\n",
-    "    \"\"\"\n",
-    "    This function is the leftovers from the first attempt to plot histograms.\n",
-    "    As it stands it needs some work in order to function again. We will\n",
-    "    fix this later. 1/25/22\n",
    "    \"\"\"\n",
+    "    This function predict the pixel values excluding the boundary.\n",
+    "    Using the 4 neighbor pixel values and MSE to predict the next pixel value\n",
+    "    (-1,1) (0,1) (1,1)  => relative position of the 4 other given values\n",
+    "    (-1,0) (0,0)        => (0,0) is the one we want to predict\n",
+    "    take the derivative of mean square error to solve for the system of equation \n",
+    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "    A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n",
+    "    and the predicted pixel value is c.\n",
+    "    \n",
+    "    Input:\n",
+    "    tiff_image (string): path to the tiff file\n",
    "    \n",
-    "    image = tiff_list\n",
+    "    Return:\n",
-    "    image = Image.open(image)    #Open the image and read it as an Image object\n",
+    "    image   (512 X 640): original image \n",
+    "    predict (325380,): predicted image exclude the boundary\n",
+    "    diff.   (325380,): difference between the min and max of four neighbors exclude the boundary\n",
+    "    error   (325380,): difference between the original image and predicted image\n",
+    "    A       (3 X 3): system of equation\n",
+    "    \"\"\"\n",
+    "    image = Image.open(tiff_image)    #Open the image and read it as an Image object\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
+    "    print(image.shape)\n",
+    "    # use \n",
    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation\n",
+    "    # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n",
    "    z0 = image[0:-2,0:-2]   # get all the first pixel for the entire image\n",
    "    z1 = image[0:-2,1:-1]   # get all the second pixel for the entire image\n",
    "    z2 = image[0:-2,2::]    # get all the third pixel for the entire image\n",
@@ -106,7 +121,8 @@
    "    y2 = np.ravel(-z0-z1-z2-z3)\n",
    "    y = np.vstack((y0,y1,y2))\n",
    "    # use numpy solver to solve the system of equations all at once\n",
-    "    predict = np.floor(np.linalg.solve(A,y)[-1])\n",
+    "    #predict = np.floor(np.linalg.solve(A,y)[-1])\n",
+    "    predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))\n",
    "    # flatten the neighbor pixlels and stack them together\n",
    "    z0 = np.ravel(z0)\n",
    "    z1 = np.ravel(z1)\n",
@@ -115,21 +131,24 @@
    "    neighbor = np.vstack((z0,z1,z2,z3)).T\n",
    "    # calculate the difference\n",
    "    diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
+    "    # calculate the error\n",
+    "    error = np.ravel(image[1:-1,1:-1])-predict\n",
    "    \n",
-    "    # flatten the image to a vector\n",
+    "    return image, predict, diff, error, A\n"
-    "    image = np.ravel(image[1:-1,1:-1])\n",
-    "    error = image-predict\n",
-    "    \n",
-    "    return image, predict, diff, error, A"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 15,
   "id": "6b965751",
   "metadata": {},
   "outputs": [],
   "source": [
+    "\"\"\"\n",
+    "this huffman coding code is found online\n",
+    "https://favtutor.com/blogs/huffman-coding\n",
+    "\"\"\"\n",
+    "\n",
    "class NodeTree(object):\n",
    "    def __init__(self, left=None, right=None):\n",
    "        self.left = left\n",
@@ -173,65 +192,97 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 24,
   "id": "b7561883",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def huffman(image):\n",
+    "def huffman(image, num_bins=4):\n",
-    "    origin, predict, diff, error, A = plot_hist(image)\n",
+    "    \"\"\"\n",
+    "    This function is used to encode the error based on the difference\n",
+    "    and split the difference into different bins\n",
    "    \n",
-    "    image = Image.open(image)\n",
+    "    Input:\n",
-    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
+    "    image    (string): path to the tiff file\n",
-    "    image = image.astype(int)\n",
+    "    num_bins (int): number of bins\n",
    "    \n",
-    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    Return:\n",
-    "    boundary = boundary - image[0,0]\n",
+    "    list_dic   (num_bins + 1): a list of dictionary\n",
-    "    boundary[0] = image[0,0]\n",
+    "    image      (512, 640): original image\n",
-    "\n",
+    "    new_error  (512, 640): error that includes the boundary\n",
-    "    string = [str(i) for i in boundary]\n",
+    "    diff       (510, 638): difference of min and max of the 4 neighbors\n",
-    "    freq = dict(Counter(string))\n",
+    "    boundary   (2300,): the boundary values after subtracting the very first pixel value\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "    predict    (325380,): the list of predicted values\n",
-    "    node = make_tree(freq)\n",
+    "    bins       (num_bins)\n",
-    "    encode1 = huffman_code_tree(node)\n",
+    "    A       (3 X 3): system of equation\n",
    "    \n",
+    "    \"\"\"\n",
+    "    # get the prediction error and difference\n",
+    "    image, predict, diff, error, A = predict_pix(image)\n",
    "    \n",
-    "    mask = diff <= 25\n",
+    "    # get the number of points in each bins\n",
-    "    string = [str(i) for i in error[mask].astype(int)]\n",
+    "    data_points_per_bin = len(diff) // num_bins\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode2 = huffman_code_tree(node)\n",
-    "\n",
    "    \n",
-    "    mask = diff > 25\n",
+    "    # sort the difference and create the bins\n",
-    "    new_error = error[mask]\n",
+    "    sorted_diff = diff.copy()\n",
-    "    mask2 = diff[mask] <= 40\n",
+    "    sorted_diff.sort()\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
-    "    freq = dict(Counter(string))\n",
-    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
-    "    node = make_tree(freq)\n",
-    "    encode3 = huffman_code_tree(node)\n",
    "    \n",
-    "\n",
+    "    # get the boundary \n",
-    "    mask = diff > 40\n",
+    "    boundary = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
-    "    new_error = error[mask]\n",
+    "    \n",
-    "    mask2 = diff[mask] <= 70\n",
+    "    # take the difference of the boundary with the very first pixel\n",
-    "    string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "    boundary = boundary - image[0,0]\n",
+    "    boundary[0] = image[0,0]\n",
+    "    \n",
+    "    # huffman encode the boundary\n",
+    "    string = [str(i) for i in boundary]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode4 = huffman_code_tree(node)\n",
+    "    encode = huffman_code_tree(node)\n",
    "    \n",
+    "    # create a list of huffman table\n",
+    "    list_dic = [encode]\n",
+    "    n = len(bins)\n",
    "    \n",
-    "    mask = diff > 70\n",
+    "    # loop through different bins\n",
+    "    for i in range (0,n):\n",
+    "        # the fisrt bin\n",
+    "        if i == 0 :\n",
+    "            # get the point within the bin and huffman encode\n",
+    "            mask = diff <= bins[i]\n",
+    "            string = [str(i) for i in error[mask].astype(int)]\n",
+    "            freq = dict(Counter(string))\n",
+    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "            node = make_tree(freq)\n",
+    "            encode = huffman_code_tree(node)\n",
+    "            list_dic.append(encode)\n",
+    "            \n",
+    "        # the middle bins\n",
+    "        else:\n",
+    "            # get the point within the bin and huffman encode\n",
+    "            mask = diff > bins[i-1]\n",
+    "            new_error = error[mask]\n",
+    "            mask2 = diff[mask] <= bins[i]\n",
+    "            string = [str(i) for i in new_error[mask2].astype(int)]\n",
+    "            freq = dict(Counter(string))\n",
+    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "            node = make_tree(freq)\n",
+    "            encode = huffman_code_tree(node)\n",
+    "            list_dic.append(encode)\n",
+    "            \n",
+    "    # the last bin       \n",
+    "    # get the point within the bin and huffman encode\n",
+    "    mask = diff > bins[-1]\n",
    "    string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode5 = huffman_code_tree(node)\n",
+    "    encode = huffman_code_tree(node)\n",
-    "\n",
+    "    list_dic.append(encode)\n",
    "\n",
+    "    # create a error matrix that includes the boundary (used in encoding matrix)\n",
    "    new_error = np.copy(image)\n",
    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
    "    keep = new_error[0,0]\n",
@@ -241,31 +292,24 @@
    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
    "    new_error[0,0] = keep\n",
    "    \n",
-    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
-    "    #new_error = np.ravel(new_error)\n",
-    "    \n",
-    "    bins = [25,40,70]\n",
-    "    \n",
    "    # return the huffman dictionary\n",
-    "    return encode1, encode2, encode3, encode4, encode5, np.ravel(image), error, new_error, diff, boundary, bins\n",
+    "    return list_dic, image, new_error, diff, boundary, predict, bins, A\n",
-    " \n",
+    " \n"
-    "scenes = file_extractor()\n",
-    "images = image_extractor(scenes)\n",
-    "encode1, encode2, encode3, encode4, encode5, image, error, new_error, diff, boundary, bins = huffman(images[0])"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 17,
   "id": "2eb774d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    # copy the error matrix (including the boundary)\n",
    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
-    "    \n",
+    "    #diff = np.reshape(diff,(510,638))\n",
-    "    diff = np.reshape(diff,(510,638))\n",
+    "    # loop through all the pixel to encode\n",
-    "    \n",
    "    for i in range(encoded.shape[0]):\n",
    "        for j in range(encoded.shape[1]):\n",
    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
@@ -279,85 +323,171 @@
    "            else: \n",
    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
    "\n",
-    "                \n",
    "    return encoded"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 23,
   "id": "8eeb40d0",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def decoder(A, encoded_matrix, encoding_dict):\n",
+    "def decoder(A, encoded_matrix, list_dic, bins):\n",
    "    \"\"\"\n",
    "    Function that accecpts the prediction matrix A for the linear system,\n",
    "    the encoded matrix of error values, and the encoding dicitonary.\n",
    "    \"\"\"\n",
-    "    the_keys = list(encode_dict.keys())\n",
+    "    # change the dictionary back to list\n",
-    "    the_values = list(encode_dict.values())\n",
+    "    # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
-    "    error_matrix = encoded_matrix.copy()\n",
+    "    the_keys0 = list(list_dic[0].keys())\n",
+    "    the_values0 = list(list_dic[0].values())\n",
+    "    \n",
+    "    the_keys1 = list(list_dic[1].keys())\n",
+    "    the_values1 = list(list_dic[1].values())\n",
+    "    \n",
+    "    the_keys2 = list(list_dic[2].keys())\n",
+    "    the_values2 = list(list_dic[2].values())\n",
+    "    \n",
+    "    the_keys3 = list(list_dic[3].keys())\n",
+    "    the_values3 = list(list_dic[3].values())\n",
    "    \n",
+    "    the_keys4 = list(list_dic[4].keys())\n",
+    "    the_values4 = list(list_dic[4].values())\n",
+    "    \n",
+    "    error_matrix = np.zeros((512,640))\n",
+    "    # loop through all the element in the matrix\n",
    "    for i in range(error_matrix.shape[0]):\n",
    "        for j in range(error_matrix.shape[1]):\n",
+    "            # if it's the very first pixel on the image\n",
    "            if i == 0 and j == 0:\n",
-    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
-    "                \n",
+    "            # if it's on the boundary\n",
    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
-    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])]) + error_matrix[0][0]\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            # if not the boundary\n",
    "            else:\n",
-    "                \"\"\"z0, z1, z2, z3 = error_matrix[i-1][j-1], error_matrix[i-1][j], \\\n",
+    "                # predict the image with the known pixel value\n",
-    "                error_matrix[i-1][j+1], error_matrix[i][j-1]\n",
+    "                z0 = error_matrix[i-1][j-1]\n",
-    "                y = np.vstack((-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3))\"\"\"\n",
+    "                z1 = error_matrix[i-1][j]\n",
+    "                z2 = error_matrix[i-1][j+1]\n",
+    "                z3 = error_matrix[i][j-1]\n",
+    "                y0 = int(-z0+z2-z3)\n",
+    "                y1 = int(z0+z1+z2)\n",
+    "                y2 = int(-z0-z1-z2-z3)\n",
+    "                y = np.vstack((y0,y1,y2))\n",
+    "                difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
+    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
+    "                \n",
+    "                # add on the difference by searching the dictionary\n",
+    "                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
+    "                if difference <= bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[1] and difference > bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[2] and difference > bins[1]:\n",
+    "                    error_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                else:\n",
+    "                    error_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n",
    "                \n",
-    "                error_matrix[i][j] = int(the_keys[the_values.index(error_matrix[i,j])])\n",
    "                \n",
    "    return error_matrix.astype(int)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 19,
-   "id": "3e0e9742",
+   "id": "f959fe93",
   "metadata": {},
   "outputs": [],
   "source": [
-    "encode1, encode2, encode3, encode4, encode5, image, error, new_error, diff, bound, bins = huffman(images[0])\n",
+    "def compress_rate(image, error, diff, bound, list_dic, bins):\n",
-    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), [encode1, encode2, encode3, encode4, encode5], diff, bound, bins)\n",
+    "    # the bits for the original image\n",
-    "\n"
+    "    o_len = 0\n",
+    "    # the bits for the compressed image\n",
+    "    c_len = 0\n",
+    "    # initializing the varible \n",
+    "    im = np.reshape(image,(512, 640))\n",
+    "    real_b = np.hstack((im[0,:],im[-1,:],im[1:-1,0],im[1:-1,-1]))\n",
+    "    original = im[1:-1,1:-1].reshape(-1)\n",
+    "    diff = diff.reshape(-1)\n",
+    "    \n",
+    "    # calculate the bit for boundary\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        c_len += len(list_dic[0][str(bound[i])])\n",
+    "    \n",
+    "    # calculate the bit for the pixels inside the boundary\n",
+    "    for i in range(0,len(original)):\n",
+    "        # for the original image\n",
+    "        o_len += len(bin(original[i])[2:])\n",
+    "        \n",
+    "        # check the difference and find the coresponding huffman table\n",
+    "        # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
+    "        if diff[i] <= bins[0]:\n",
+    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
+    "            \n",
+    "        elif diff[i] <= bins[1] and diff[i] > bins[0]:\n",
+    "            c_len += len(list_dic[2][str(int(error[i]))])\n",
+    "            \n",
+    "        elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
+    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
+    "\n",
+    "        else: \n",
+    "            c_len += len(list_dic[5][str(int(error[i]))])\n",
+    "\n",
+    "    return c_len/o_len"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "3e0e9742",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(512, 640)\n",
+      "True\n",
+      "5\n"
+     ]
+    }
+   ],
+   "source": [
+    "scenes = file_extractor()\n",
+    "images = image_extractor(scenes)\n",
+    "list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[0], 4)\n",
+    "encoded_matrix = encoder(new_error, list_dic, diff, bound, bins)\n",
+    "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
+    "print(np.allclose(image, reconstruct_image))\n",
+    "print(len(list_dic))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 32,
-   "id": "e6ea4f99",
+   "id": "004e8ba8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[['01100010001' '11000110' '100101010' ... '101110011' '00010100'\n",
+      "[26, 40, 62]\n"
-      "  '1111000100']\n",
-      " ['10011100' '100001' '111000' ... '10111011' '00111' '1111001101']\n",
-      " ['10101111' '100100' '100000' ... '111100' '111000' '00010100']\n",
-      " ...\n",
-      " ['110001000' '100001' '111011' ... '1010010' '100000' '10011000']\n",
-      " ['0100011101' '111010' '00110' ... '1000101' '1100100' '10011010']\n",
-      " ['00100010' '110111101' '110110100' ... '00010010' '10100000'\n",
-      "  '110110101']]\n"
     ]
    }
   ],
   "source": [
-    "print(encoded_matrix)"
+    "\n",
+    "print(bins)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "0c07a23e",
+   "id": "a282f9e6",
   "metadata": {},
   "outputs": [],
   "source": []
@@ -379,7 +509,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.11"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,

--- a/Compression_Rate_Kelly.ipynb
+++ b/Compression_Rate_Kelly.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "8868bc30",
   "metadata": {},
   "outputs": [],
@@ -20,12 +20,13 @@
    "import pandas as pd\n",
    "from collections import Counter\n",
    "import time\n",
-    "import numpy.linalg as la"
+    "import numpy.linalg as la\n",
+    "from scipy.stats import entropy"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "0f944705",
   "metadata": {},
   "outputs": [],
@@ -88,7 +89,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "b18d5e38",
   "metadata": {},
   "outputs": [],
@@ -137,7 +138,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 200,
+   "execution_count": 4,
   "id": "3b0c3eaa",
   "metadata": {},
   "outputs": [],
@@ -182,7 +183,7 @@
    "    \n",
    "    # calculate the difference\n",
    "    #diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n",
-    "    diff = (np.max(neighbor,axis = 1)*5 - np.min(neighbor, axis=1))\n",
+    "    diff = (np.max(neighbor,axis = 1) - np.min(neighbor, axis=1))\n",
    "    \n",
    "    # flatten the image to a vector\n",
    "    #image = np.ravel(image[1:-1,1:-1])\n",
@@ -200,7 +201,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 152,
+   "execution_count": 5,
   "id": "35d4f6a0",
   "metadata": {},
   "outputs": [],
@@ -246,9 +247,92 @@
    "    return nodes[0][0]"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9200fa53",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
+    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "    \n",
+    "    for i in range(encoded.shape[0]):\n",
+    "        for j in range(encoded.shape[1]):\n",
+    "            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n",
+    "                encoded[i][j] = list_dic[0][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[0]:\n",
+    "                encoded[i][j] = list_dic[1][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:\n",
+    "                encoded[i][j] = list_dic[2][encoded[i][j]]\n",
+    "            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:\n",
+    "                encoded[i][j] = list_dic[3][encoded[i][j]]\n",
+    "            else: \n",
+    "                encoded[i][j] = list_dic[4][encoded[i][j]]\n",
+    "\n",
+    "                \n",
+    "    return encoded\n",
+    "\n",
+    "def decoder(A, encoded_matrix, list_dic, bins):\n",
+    "    \"\"\"\n",
+    "    Function that accecpts the prediction matrix A for the linear system,\n",
+    "    the encoded matrix of error values, and the encoding dicitonary.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    the_keys0 = list(list_dic[0].keys())\n",
+    "    the_values0 = list(list_dic[0].values())\n",
+    "    \n",
+    "    the_keys1 = list(list_dic[1].keys())\n",
+    "    the_values1 = list(list_dic[1].values())\n",
+    "    \n",
+    "    the_keys2 = list(list_dic[2].keys())\n",
+    "    the_values2 = list(list_dic[2].values())\n",
+    "    \n",
+    "    the_keys3 = list(list_dic[3].keys())\n",
+    "    the_values3 = list(list_dic[3].values())\n",
+    "    \n",
+    "    the_keys4 = list(list_dic[4].keys())\n",
+    "    the_values4 = list(list_dic[4].values())\n",
+    "    \n",
+    "    error_matrix = np.zeros((512,640))\n",
+    "    \n",
+    "    for i in range(error_matrix.shape[0]):\n",
+    "        for j in range(error_matrix.shape[1]):\n",
+    "            if i == 0 and j == 0:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
+    "                \n",
+    "            elif i == 0 or i == error_matrix.shape[0]-1 or j == 0 or j == error_matrix.shape[1]-1:\n",
+    "                error_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + error_matrix[0][0]\n",
+    "            else:\n",
+    "                z0 = error_matrix[i-1][j-1]\n",
+    "                z1 = error_matrix[i-1][j]\n",
+    "                z2 = error_matrix[i-1][j+1]\n",
+    "                z3 = error_matrix[i][j-1]\n",
+    "                y0 = int(-z0+z2-z3)\n",
+    "                y1 = int(z0+z1+z2)\n",
+    "                y2 = int(-z0-z1-z2-z3)\n",
+    "                y = np.vstack((y0,y1,y2))\n",
+    "                difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
+    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
+    "\n",
+    "                if difference <= bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[1] and difference > bins[0]:\n",
+    "                    error_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                elif difference <= bins[2] and difference > bins[1]:\n",
+    "                    error_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                else:\n",
+    "                    error_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                \n",
+    "                \n",
+    "    return error_matrix.astype(int)"
+   ]
+  },
  {
   "cell_type": "markdown",
-   "id": "25abd477",
+   "id": "ca9c0b4a",
   "metadata": {},
   "source": [
    "## use res"
@@ -256,7 +340,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 154,
+   "execution_count": 7,
   "id": "bd7e39d7",
   "metadata": {},
   "outputs": [],
@@ -436,29 +520,206 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
-   "id": "1b2e63e2",
+   "id": "d0a6fac2",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[8, 32, 88, 181, 383]\n",
+      "7\n"
+     ]
+    }
+   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "rate = []\n",
+    "list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[0], 6, True)\n",
-    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "print(bins)\n",
-    "for i in range(len(images)):\n",
+    "print(len(list_dic))"
-    "    list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[i], 6, True)\n",
+   ]
-    "    r = compress_rate(image, error, diff, bound, list_dic, bins, res, use_res = True)\n",
+  },
-    "    rate.append(r)\n"
+  {
+   "cell_type": "code",
+   "execution_count": 232,
+   "id": "2723b062",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rel_freq(x):\n",
+    "    freqs = [x.count(value) / len(x) for value in set(x)] \n",
+    "    return freqs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 234,
+   "id": "e9799985",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_diff = list(set(diff))\n",
+    "en = []\n",
+    "for i in list_diff:\n",
+    "    mask = diff == i\n",
+    "    list_error = list(error[mask])\n",
+    "    fre = rel_freq(list_error)\n",
+    "    \n",
+    "    en.append(entropy(fre))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 249,
+   "id": "78a370f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list_error = list(error)\n",
+    "ob = rel_freq(list_error)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 271,
+   "id": "21ba29a6",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "n = len(bins)\n",
+    "for i in range (0,n):\n",
+    "    if i == 0 :\n",
+    "        mask = diff <= bins[i]\n",
+    "        list_error = list(error[mask])\n",
+    "        fre = rel_freq(list_error)\n",
+    "        en.append(entropy(fre))\n",
+    "    else:\n",
+    "        mask = diff > bins[i-1]\n",
+    "        error2 = error[mask]\n",
+    "        mask2 = diff[mask] <= bins[i]\n",
+    "        list_error = list(error2[mask2])\n",
+    "        fre = rel_freq(list_error)\n",
+    "        en.append(entropy(fre))\n",
+    "\n",
+    "mask = diff > bins[i]\n",
+    "list_error = list(error[mask])\n",
+    "fre = rel_freq(list_error)\n",
+    "en.append(entropy(fre))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 272,
+   "id": "32a94ee7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3.2688193009259807\n",
+      "0.21792128672839872\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(np.mean(en))\n",
+    "print(np.mean(en)/15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 258,
+   "id": "51268478",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "uni = np.unique(diff)\n",
+    "uni_n = len(np.unique(diff))\n",
+    "en = []\n",
+    "for i in range(uni_n):\n",
+    "    mask = diff == uni[i]\n",
+    "    mask_error = error[mask]\n",
+    "    en.append(entropy(rel_freq(list(mask_error))))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 266,
+   "id": "2d3238cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3.2532721868611105\n",
+      "6.715361332207255\n",
+      "15\n"
+     ]
+    }
+   ],
+   "source": [
+    "imm = np.ravel(image.reshape(512,640)[1:-1,1:-1])\n",
+    "print(np.mean(en))\n",
+    "print(entropy(rel_freq(list(imm))))\n",
+    "print(len(bin(imm[0])[2:]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 269,
+   "id": "811f8139",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.44766666666666666"
+      ]
+     },
+     "execution_count": 269,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "6.715/15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 270,
+   "id": "6470dfff",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.21666666666666667"
+      ]
+     },
+     "execution_count": 270,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "3.25/15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "c2eaf807",
+   "id": "7a05f000",
   "metadata": {},
   "outputs": [],
   "source": [
-    "print(f\"Compression rate of huffman with different bins in res: {np.mean(rate)}\")"
+    "## test compression rate and time"
   ]
  },
  {
@@ -566,7 +827,7 @@
  {
   "cell_type": "code",
   "execution_count": 94,
-   "id": "7d1985a4",
+   "id": "9d5f9f50",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -591,7 +852,7 @@
  {
   "cell_type": "code",
   "execution_count": 97,
-   "id": "b898324e",
+   "id": "bceeee73",
   "metadata": {},
   "outputs": [
    {
@@ -615,7 +876,7 @@
  {
   "cell_type": "code",
   "execution_count": 102,
-   "id": "45b75db7",
+   "id": "5b02b4a3",
   "metadata": {},
   "outputs": [
    {
@@ -637,10 +898,29 @@
    "plt.show()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "id": "2253908b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[46 26  8 ... 13 16 34]\n"
+     ]
+    }
+   ],
+   "source": [
+    "list_dic, image, error, new_error, diff, bound, predict, bins, res = huffman(images[22], 6, False)\n",
+    "r = compress_rate(image, error, diff, bound, list_dic, bins, res, False)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 108,
-   "id": "55251c97",
+   "id": "88ed2a4e",
   "metadata": {},
   "outputs": [
    {
@@ -673,7 +953,7 @@
  {
   "cell_type": "code",
   "execution_count": 109,
-   "id": "92d3ae31",
+   "id": "b0a58386",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -684,7 +964,7 @@
  {
   "cell_type": "code",
   "execution_count": 110,
-   "id": "4abd8b82",
+   "id": "1a913350",
   "metadata": {},
   "outputs": [
    {
@@ -719,7 +999,7 @@
  {
   "cell_type": "code",
   "execution_count": 111,
-   "id": "07bb76a7",
+   "id": "6e9bc2e7",
   "metadata": {},
   "outputs": [
    {
@@ -734,10 +1014,31 @@
    "print(slope,intercept)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "id": "8ccd4336",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([315196, 495220, 360208, ..., 748984, 340447, 885631])"
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 113,
-   "id": "fca0420b",
+   "id": "689f98c0",
   "metadata": {},
   "outputs": [
    {
@@ -770,7 +1071,7 @@
  {
   "cell_type": "code",
   "execution_count": 197,
-   "id": "70285459",
+   "id": "b08d924b",
   "metadata": {},
   "outputs": [
    {
@@ -794,7 +1095,7 @@
  {
   "cell_type": "code",
   "execution_count": 195,
-   "id": "21b94965",
+   "id": "98c47af2",
   "metadata": {},
   "outputs": [
    {
@@ -815,7 +1116,7 @@
  {
   "cell_type": "code",
   "execution_count": 196,
-   "id": "68eb383d",
+   "id": "3c37a08f",
   "metadata": {},
   "outputs": [
    {
@@ -833,7 +1134,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "3fdebf92",
+   "id": "3ca79c1f",
   "metadata": {},
   "outputs": [],
   "source": []

--- a/Encoding_decoding.ipynb
+++ b/Encoding_decoding.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 12,
   "id": "14f74f21",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 13,
   "id": "c16af61f",
   "metadata": {},
   "outputs": [],
@@ -78,23 +78,39 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 14,
   "id": "aceba613",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def plot_hist(tiff_list):\n",
+    "def predict_pix(tiff_image):\n",
-    "    \"\"\"\n",
-    "    This function is the leftovers from the first attempt to plot histograms.\n",
-    "    As it stands it needs some work in order to function again. We will\n",
-    "    fix this later. 1/25/22\n",
    "    \"\"\"\n",
+    "    This function predict the pixel values excluding the boundary.\n",
+    "    Using the 4 neighbor pixel values and MSE to predict the next pixel value\n",
+    "    (-1,1) (0,1) (1,1)  => relative position of the 4 other given values\n",
+    "    (-1,0) (0,0)        => (0,0) is the one we want to predict\n",
+    "    take the derivative of mean square error to solve for the system of equation \n",
+    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "    A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n",
+    "    and the predicted pixel value is c.\n",
+    "    \n",
+    "    Input:\n",
+    "    tiff_image (string): path to the tiff file\n",
    "    \n",
-    "    image = tiff_list\n",
+    "    Return:\n",
-    "    image = Image.open(image)    #Open the image and read it as an Image object\n",
+    "    image   (512 X 640): original image \n",
+    "    predict (325380,): predicted image exclude the boundary\n",
+    "    diff.   (325380,): difference between the min and max of four neighbors exclude the boundary\n",
+    "    error   (325380,): difference between the original image and predicted image\n",
+    "    A       (3 X 3): system of equation\n",
+    "    \"\"\"\n",
+    "    image = Image.open(tiff_image)    #Open the image and read it as an Image object\n",
    "    image = np.array(image)[1:,:]    #Convert to an array, leaving out the first row because the first row is just housekeeping data\n",
    "    image = image.astype(int)\n",
+    "    print(image.shape)\n",
+    "    # use \n",
    "    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation\n",
+    "    # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n",
    "    z0 = image[0:-2,0:-2]   # get all the first pixel for the entire image\n",
    "    z1 = image[0:-2,1:-1]   # get all the second pixel for the entire image\n",
    "    z2 = image[0:-2,2::]    # get all the third pixel for the entire image\n",
@@ -118,16 +134,21 @@
    "    # calculate the error\n",
    "    error = np.ravel(image[1:-1,1:-1])-predict\n",
    "    \n",
-    "    return image, predict, diff, error, A"
+    "    return image, predict, diff, error, A\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 15,
   "id": "6b965751",
   "metadata": {},
   "outputs": [],
   "source": [
+    "\"\"\"\n",
+    "this huffman coding code is found online\n",
+    "https://favtutor.com/blogs/huffman-coding\n",
+    "\"\"\"\n",
+    "\n",
    "class NodeTree(object):\n",
    "    def __init__(self, left=None, right=None):\n",
    "        self.left = left\n",
@@ -171,14 +192,33 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 24,
   "id": "b7561883",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def huffman(image, num_bins):\n",
+    "def huffman(image, num_bins=4):\n",
+    "    \"\"\"\n",
+    "    This function is used to encode the error based on the difference\n",
+    "    and split the difference into different bins\n",
+    "    \n",
+    "    Input:\n",
+    "    image    (string): path to the tiff file\n",
+    "    num_bins (int): number of bins\n",
+    "    \n",
+    "    Return:\n",
+    "    list_dic   (num_bins + 1): a list of dictionary\n",
+    "    image      (512, 640): original image\n",
+    "    new_error  (512, 640): error that includes the boundary\n",
+    "    diff       (510, 638): difference of min and max of the 4 neighbors\n",
+    "    boundary   (2300,): the boundary values after subtracting the very first pixel value\n",
+    "    predict    (325380,): the list of predicted values\n",
+    "    bins       (num_bins)\n",
+    "    A       (3 X 3): system of equation\n",
+    "    \n",
+    "    \"\"\"\n",
    "    # get the prediction error and difference\n",
-    "    image, predict, diff, error, A = plot_hist(image)\n",
+    "    image, predict, diff, error, A = predict_pix(image)\n",
    "    \n",
    "    # get the number of points in each bins\n",
    "    data_points_per_bin = len(diff) // num_bins\n",
@@ -252,14 +292,15 @@
    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
    "    new_error[0,0] = keep\n",
    "    \n",
+    "    diff = np.reshape(diff,(510,638))\n",
    "    # return the huffman dictionary\n",
-    "    return list_dic, np.ravel(image), error, new_error, diff, boundary, predict, bins\n",
+    "    return list_dic, image, new_error, diff, boundary, predict, bins, A\n",
    " \n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 17,
   "id": "2eb774d2",
   "metadata": {},
   "outputs": [],
@@ -267,7 +308,7 @@
    "def encoder(error, list_dic, diff, bound, bins):\n",
    "    # copy the error matrix (including the boundary)\n",
    "    encoded = np.copy(error).astype(int).astype(str).astype(object)\n",
-    "    diff = np.reshape(diff,(510,638))\n",
+    "    #diff = np.reshape(diff,(510,638))\n",
    "    # loop through all the pixel to encode\n",
    "    for i in range(encoded.shape[0]):\n",
    "        for j in range(encoded.shape[1]):\n",
@@ -287,7 +328,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 23,
   "id": "8eeb40d0",
   "metadata": {},
   "outputs": [],
@@ -298,6 +339,7 @@
    "    the encoded matrix of error values, and the encoding dicitonary.\n",
    "    \"\"\"\n",
    "    # change the dictionary back to list\n",
+    "    # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "    the_keys0 = list(list_dic[0].keys())\n",
    "    the_values0 = list(list_dic[0].values())\n",
    "    \n",
@@ -338,6 +380,7 @@
    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
    "                \n",
    "                # add on the difference by searching the dictionary\n",
+    "                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "                if difference <= bins[0]:\n",
    "                    error_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
    "                elif difference <= bins[1] and difference > bins[0]:\n",
@@ -353,7 +396,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 19,
   "id": "f959fe93",
   "metadata": {},
   "outputs": [],
@@ -380,6 +423,7 @@
    "        o_len += len(bin(original[i])[2:])\n",
    "        \n",
    "        # check the difference and find the coresponding huffman table\n",
+    "        # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "        if diff[i] <= bins[0]:\n",
    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
    "            \n",
@@ -397,65 +441,53 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
   "id": "3e0e9742",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(512, 640)\n",
+      "True\n",
+      "5\n"
+     ]
+    }
+   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "#bins = [25,40,70]\n",
+    "list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[0], 4)\n",
-    "A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n",
+    "encoded_matrix = encoder(new_error, list_dic, diff, bound, bins)\n",
-    "list_dic, image, error, new_error, diff, bound, predict, bins = huffman(images[0], 4)\n",
-    "encoded_matrix = encoder(np.reshape(new_error,(512,640)), list_dic, diff, bound, bins)\n",
    "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins)\n",
-    "print(np.allclose(image.reshape(512,640), reconstruct_image))\n",
+    "print(np.allclose(image, reconstruct_image))\n",
    "print(len(list_dic))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
-   "id": "f0948ab2",
+   "id": "004e8ba8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rate1 = []\n",
-    "rate2 = []\n",
-    "rate3 = []\n",
-    "bins1 = [25,40,70]\n",
-    "bins2 = [50,100,150]\n",
-    "bins3 = [30,50,100]\n",
-    "B = [bins1, bins2, bins3]\n",
-    "for i in range(len(images)):\n",
-    "    for j, bins in enumerate(B):\n",
-    "        list_dic, image, error, new_error, diff, bound, predict = huffman(images[i], bins)\n",
-    "        r = compress_rate(image, error, diff, bound, list_dic, bins)\n",
-    "        if j == 0:\n",
-    "            rate1.append(r)\n",
-    "        elif j == 1:\n",
-    "            rate2.append(r)\n",
-    "        else:\n",
-    "            rate3.append(r)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7d615dcd",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[26, 40, 62]\n"
+     ]
+    }
+   ],
   "source": [
-    "print(f\"Compression rate of huffman with bins {bins1}: {np.mean(rate1)}\")\n",
+    "\n",
-    "print(f\"Compression rate of huffman with bins {bins2}: {np.mean(rate2)}\")\n",
+    "print(bins)"
-    "print(f\"Compression rate of huffman with bins {bins3}: {np.mean(rate3)}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "67cb360d",
+   "id": "a282f9e6",
   "metadata": {},
   "outputs": [],
   "source": []