Made function to create same dictionary

59865f02 · Bryce Hepner · bd332d6b · 59865f02
Commit 59865f02 authored Jun 03, 2022 by Bryce Hepner
Hide whitespace changes
Inline Side-by-side

Showing with 248 additions and 70 deletions

SameTableEncoder.ipynb SameTableEncoder.ipynb +248 -70

No files found.
--- a/SameTableEncoder.ipynb
+++ b/SameTableEncoder.ipynb
@@ -208,7 +208,110 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_dictionary(tiff_image_path_list, num_bins=4, difference = True):\n",
+    "    \"\"\"\n",
+    "    This function is used to encode the error based on the difference\n",
+    "    and split the difference into different bins\n",
+    "    \n",
+    "    Input:\n",
+    "    tiff_image_path     (string): path to the tiff file\n",
+    "    num_bins            (int): number of bins\n",
+    "    \n",
+    "    Return:\n",
+    "    huffman_encoding_list  list    (num_bins + 1): a list of dictionary\n",
+    "    image_array            ndarray (512, 640): original image\n",
+    "    new_error              ndarray (512, 640): error that includes the boundary\n",
+    "    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors\n",
+    "    boundary               ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
+    "    predict                ndarray (325380,): the list of predicted values\n",
+    "    bins                   list    (num_bins - 1,): a list of threshold to cut the bins\n",
+    "    A                      ndarray (3 X 3): system of equation\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    list_of_all_vals = []\n",
+    "    huffman_encoding_list = []\n",
+    "    for i in range(num_bins+1):\n",
+    "        list_of_all_vals.append([])\n",
+    "    for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
+    "        # get the image_array, etc\n",
+    "        image_array, predict, diff, error, A = predict_pix(tiff_image_path, difference)\n",
+    "        \n",
+    "        # calculate the number of points that will go in each bin\n",
+    "        data_points_per_bin = diff.size // num_bins\n",
+    "\n",
+    "        # sort the difference and create the bins\n",
+    "        sorted_diff = np.sort(diff.copy())\n",
+    "        bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
+    "        \n",
+    "        # get the boundary \n",
+    "        boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
+    "        \n",
+    "        # take the difference of the boundary with the very first pixel\n",
+    "        boundary = boundary - image_array[0,0]\n",
+    "        \n",
+    "        #boundary is 1dim, so boundary[0] is just the first element\n",
+    "        boundary[0] = image_array[0,0]\n",
+    "        \n",
+    "        # huffman encode the boundary\n",
+    "        for j in boundary:\n",
+    "            list_of_all_vals[0].append(str(j))\n",
+    "\n",
+    "        \n",
+    "        # create a list of huffman table\n",
+    "        n = len(bins)\n",
+    "        \n",
+    "        # loop through different bins\n",
+    "        for k in range (0,n):\n",
+    "            # the first bin\n",
+    "            if k == 0 :\n",
+    "                # get the point within the bin and huffman huffman_encoding_dict\n",
+    "                mask = diff <= bins[k]\n",
+    "                for j in error[mask].astype(int):\n",
+    "                    list_of_all_vals[k+1].append(str(j))\n",
+    "\n",
+    "                \n",
+    "            # the middle bins\n",
+    "            else:\n",
+    "                # get the point within the bin and huffman huffman_encoding_dict\n",
+    "                mask = diff > bins[k-1]\n",
+    "                new_error = error[mask]\n",
+    "                mask2 = diff[mask] <= bins[k]\n",
+    "                for j in new_error[mask2].astype(int):\n",
+    "                    list_of_all_vals[k+1].append(str(j))\n",
+    "\n",
+    "                \n",
+    "        # the last bin       \n",
+    "        # get the point within the bin and huffman huffman_encoding_dict\n",
+    "        mask = diff > bins[-1]\n",
+    "        for j in error[mask].astype(int):\n",
+    "            list_of_all_vals[-1].append(str(j))\n",
+    "    for item in list_of_all_vals:\n",
+    "        freq = dict(Counter(item))\n",
+    "        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
+    "        node = make_tree(freq)\n",
+    "        huffman_encoding_list.append(huffman_code_tree(node))\n",
+    "        # create a error matrix that includes the boundary (used in encoding matrix)\n",
+    "    new_error = np.copy(image_array)\n",
+    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
+    "    keep = new_error[0,0]\n",
+    "    new_error[0,:] = new_error[0,:] - keep\n",
+    "    new_error[-1,:] = new_error[-1,:] - keep\n",
+    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
+    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
+    "    new_error[0,0] = keep\n",
+    "        # huffman_encoding_list = list(set(huffman_encoding_list))\n",
+    "    diff = np.reshape(diff,(510,638))\n",
+    "        # return the huffman dictionary\n",
+    "    return huffman_encoding_list,bins\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -317,7 +420,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -356,7 +459,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -371,87 +474,137 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decoder(A, encoded_string, list_dic, bins, use_diff):\n",
+    "    \"\"\"\n",
+    "    This function decodes the encoded_matrix.\n",
+    "    Input:\n",
+    "    A               (3 X 3): system of equation\n",
+    "    list_dic        (num_dic + 1,): a list of huffman coding table \n",
+    "    encoded_matrix  (512, 640): encoded matrix\n",
+    "    bins            (num_bins - 1,): a list of threshold to cut the bins\n",
+    "    \n",
+    "    Return:\n",
+    "    decode_matrix   (512, 640): decoded matrix\n",
+    "    \"\"\"\n",
+    "    # change the dictionary back to list\n",
+    "    # !!!!!WARNING!!!! has to change this part, everytime you change the number of bins\n",
+    "    the_keys0 = list(list_dic[0].keys())\n",
+    "    the_values0 = list(list_dic[0].values())\n",
+    "    \n",
+    "    the_keys1 = list(list_dic[1].keys())\n",
+    "    the_values1 = list(list_dic[1].values())\n",
+    "    \n",
+    "    the_keys2 = list(list_dic[2].keys())\n",
+    "    the_values2 = list(list_dic[2].values())\n",
+    "    \n",
+    "    the_keys3 = list(list_dic[3].keys())\n",
+    "    the_values3 = list(list_dic[3].values())\n",
+    "    \n",
+    "    the_keys4 = list(list_dic[4].keys())\n",
+    "    the_values4 = list(list_dic[4].values())\n",
+    "    \n",
+    "    #Matrix system of points that will be used to solve the least squares fitting hyperplane\n",
+    "    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])\n",
+    "    \n",
+    "    decode_matrix = np.zeros((512,640))\n",
+    "    # loop through all the element in the matrix\n",
+    "    for i in range(decode_matrix.shape[0]):\n",
+    "        for j in range(decode_matrix.shape[1]):\n",
+    "            # if it's the very first pixel on the image\n",
+    "            if i == 0 and j == 0:\n",
+    "                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
+    "                decode_matrix[i][j] = colorvalue\n",
+    "                \n",
+    "            # if it's on the boundary (any of the 4 edges)\n",
+    "            elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:\n",
+    "                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
+    "                decode_matrix[i][j] = colorvalue + decode_matrix[0][0]\n",
+    "            # if not the boundary\n",
+    "            else:\n",
+    "                # predict the image with the known pixel value\n",
+    "                z0 = decode_matrix[i-1][j-1]\n",
+    "                z1 = decode_matrix[i-1][j]\n",
+    "                z2 = decode_matrix[i-1][j+1]\n",
+    "                z3 = decode_matrix[i][j-1]\n",
+    "                y0 = int(-z0+z2-z3)\n",
+    "                y1 = int(z0+z1+z2)\n",
+    "                y2 = int(-z0-z1-z2-z3)\n",
+    "                y = np.vstack((y0,y1,y2))\n",
+    "                if use_diff:\n",
+    "                    difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
+    "                else:\n",
+    "                    \n",
+    "                    f, difference, rank, s = la.lstsq(points, [z0,z1,z2,z3], rcond=None) \n",
+    "                    difference = difference.astype(int)\n",
+    "                    \n",
+    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
+    "                \n",
+    "                # add on the difference by searching the dictionary\n",
+    "                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
+    "                if difference <= bins[0]:\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys1, the_values=the_values1)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
+    "                elif difference <= bins[1] and difference > bins[0]:\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys2, the_values=the_values2)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
+    "                elif difference <= bins[2] and difference > bins[1]:\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys3, the_values=the_values3)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
+    "                else:\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys4, the_values=the_values4)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
+    "\n",
+    "    return decode_matrix.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n",
-      "Not here\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 8'\u001b[0m in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=6'>7</a>\u001b[0m encoded_string \u001b[39m=\u001b[39m encoder(new_error, oglist_dic, diff, bound, ogbins)\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=7'>8</a>\u001b[0m \u001b[39m# reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\u001b[39;00m\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=8'>9</a>\u001b[0m \u001b[39m# print(np.allclose(image, reconstruct_image))\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=9'>10</a>\u001b[0m inletters \u001b[39m=\u001b[39m bitstring_to_bytes(encoded_string)\n\u001b[1;32m     <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=10'>11</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mMatrixNowString.txt\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mw\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m     <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=11'>12</a>\u001b[0m     f\u001b[39m.\u001b[39mwrite(inletters\u001b[39m.\u001b[39mdecode(\u001b[39m\"\u001b[39m\u001b[39mISO-8859-1\u001b[39m\u001b[39m\"\u001b[39m))\n",
-      "\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 7'\u001b[0m in \u001b[0;36mbitstring_to_bytes\u001b[0;34m(s)\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=3'>4</a>\u001b[0m \u001b[39mwhile\u001b[39;00m v:\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=4'>5</a>\u001b[0m     b\u001b[39m.\u001b[39mappend(v \u001b[39m&\u001b[39m \u001b[39m0xff\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=5'>6</a>\u001b[0m     v \u001b[39m>>\u001b[39m\u001b[39m=\u001b[39m \u001b[39m8\u001b[39m\n\u001b[1;32m      <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=6'>7</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mbytes\u001b[39m(b[::\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m])\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+      "0\n",
+      "True\n",
+      "5\n",
+      "1\n",
+      "True\n",
+      "5\n",
+      "2\n",
+      "True\n",
+      "5\n",
+      "3\n",
+      "True\n",
+      "5\n",
+      "4\n",
+      "True\n",
+      "5\n"
     ]
    }
   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
-    "oglist_dic, ogimage, ognew_error, ogdiff, ogbound, ogpredict, ogbins, ogA = huffman(images[0], 4, False)\n",
+    "oglist_dic, ogbins = make_dictionary(images[:10], 4, False)\n",
    "file_size_ratios = []\n",
-    "for i in range(25,27):\n",
+    "for i in range(5):\n",
+    "    print(i)\n",
    "    list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[i], 4, False)\n",
-    "    encoded_string = encoder(new_error, oglist_dic, diff, bound, ogbins)\n",
+    "    encoded_string = encoder(new_error, oglist_dic, diff, bound, bins)\n",
    "    # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
    "    # print(np.allclose(image, reconstruct_image))\n",
    "    inletters = bitstring_to_bytes(encoded_string)\n",
+    "\n",
+    "    reconstruct_image = decoder(A, encoded_string, oglist_dic, bins, False)\n",
+    "    print(np.allclose(image, reconstruct_image))\n",
+    "    print(len(list_dic))\n",
+    "\n",
    "    with open(\"MatrixNowString.txt\", 'w') as f:\n",
    "        f.write(inletters.decode(\"ISO-8859-1\"))\n",
    "    file_size_ratios.append((os.path.getsize('MatrixNowString.txt'))/os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff'))"
@@ -459,14 +612,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6058229115470704\n"
+      "0.6032547318383986\n"
     ]
    }
   ],
@@ -501,8 +654,33 @@
   ]
  },
  {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 16,
   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 37 216  73 ...  42  88  53]\n",
+      " [ 16 140 368 ... 112  54 468]\n",
+      " [  0  42  20 ...  28  13  13]\n",
+      " ...\n",
+      " [ 37   6  32 ...   6  13 192]\n",
+      " [ 32   2 130 ...  88 150  23]\n",
+      " [ 48   6  20 ...  20  73 322]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(diff)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": []
  }
 ],