bin work

4286c54a · Bryce Hepner · 6cb62a30 · 4286c54a · 4286c54a
Commit 4286c54a authored Jun 08, 2022 by Bryce Hepner
Hide whitespace changes
Inline Side-by-side

Showing with 64 additions and 61 deletions

SameTableEncoder.ipynb SameTableEncoder.ipynb +64 -61

first_dic.npy first_dic.npy +0 -0

No files found.
--- a/SameTableEncoder.ipynb
+++ b/SameTableEncoder.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -72,7 +72,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -142,12 +142,12 @@
    "    # calculate the error\n",
    "    error = np.ravel(image_array[1:-1,1:-1])-predict\n",
    "    \n",
-    "    return image_array, predict, diff, error"
+    "    return image_array, diff, error"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -206,7 +206,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 138,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -232,19 +232,13 @@
    "    \"\"\"\n",
    "    list_of_all_vals = []\n",
    "    huffman_encoding_list = []\n",
-    "    for i in range(num_bins+1):\n",
+    "    for _ in range(num_bins+1):\n",
    "        list_of_all_vals.append([])\n",
-    "    for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
+    "    for _, tiff_image_path in enumerate(tiff_image_path_list):\n",
    "        # get the image_array, etc\n",
-    "        image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
+    "        image_array, diff, error= predict_pix(tiff_image_path, difference)\n",
-    "        \n",
-    "        # calculate the number of points that will go in each bin\n",
-    "        data_points_per_bin = diff.size // num_bins\n",
    "\n",
-    "        # sort the difference and create the bins\n",
+    "        bins = [21,32,48]\n",
-    "        sorted_diff = np.sort(diff.copy())\n",
-    "        # bins = [12,60,180]\n",
-    "        bins = [21,31,48]\n",
    "        # get the boundary \n",
    "        boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
    "        \n",
@@ -308,7 +302,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 139,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -323,7 +317,7 @@
    "    \n",
    "    Return:\n",
    "    huffman_encoding_list  list    (num_bins + 1): a list of dictionary\n",
-    "    image_array            ndarray (512, 640): original image\n",
+    "    image_as_array         ndarray (512, 640): original image\n",
    "    new_error              ndarray (512, 640): error that includes the boundary\n",
    "    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors\n",
    "    boundary               ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
@@ -331,25 +325,22 @@
    "    bins                   list    (num_bins - 1,): a list of threshold to cut the bins\n",
    "    A                      ndarray (3 X 3): system of equation\n",
    "    \"\"\"\n",
-    "    # get the image_array, etc\n",
+    "    # get the image_as_array, etc\n",
-    "    image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
+    "    image_as_array, diff, error= predict_pix(tiff_image_path, difference)\n",
    "    \n",
    "    # calculate the number of points that will go in each bin\n",
-    "    data_points_per_bin = diff.size // num_bins\n",
+    "\n",
    "\n",
    "    # sort the difference and create the bins\n",
-    "    sorted_diff = np.sort(diff.copy())\n",
+    "    bins = [21,32,48]\n",
-    "    # bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
-    "    # bins = [12,60,180]\n",
-    "    bins = [21,31,48]\n",
    "    # get the boundary \n",
-    "    boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
+    "    boundary = np.hstack((image_as_array[0,:],image_as_array[-1,:],image_as_array[1:-1,0],image_as_array[1:-1,-1]))\n",
    "    \n",
    "    # take the difference of the boundary with the very first pixel\n",
-    "    boundary = boundary - image_array[0,0]\n",
+    "    boundary = boundary - image_as_array[0,0]\n",
    "    \n",
    "    #boundary is 1dim, so boundary[0] is just the first element\n",
-    "    boundary[0] = image_array[0,0]\n",
+    "    boundary[0] = image_as_array[0,0]\n",
    "    \n",
    "    # huffman encode the boundary\n",
    "    bound_vals_as_string = [str(i) for i in boundary]\n",
@@ -399,7 +390,7 @@
    "    huffman_encoding_list.append(huffman_encoding_dict)\n",
    "\n",
    "    # create a error matrix that includes the boundary (used in encoding matrix)\n",
-    "    new_error = np.copy(image_array)\n",
+    "    new_error = np.copy(image_as_array)\n",
    "    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
    "    keep = new_error[0,0]\n",
    "    new_error[0,:] = new_error[0,:] - keep\n",
@@ -409,26 +400,23 @@
    "    new_error[0,0] = keep\n",
    "    # huffman_encoding_list = list(set(huffman_encoding_list))\n",
    "    diff = np.reshape(diff,(510,638))\n",
-    "    # return the huffman dictionary\n",
    "\n",
-    "    return huffman_encoding_list, image_array, new_error, diff, boundary, predict, bins\n",
+    "    return image_as_array, new_error, diff\n"
-    " \n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 140,
+   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
-    "def encoder(error, list_dic, diff, bound, bins):\n",
+    "def encoder(error, list_dic, diff, bins):\n",
    "    \"\"\"\n",
    "    This function encode the matrix with huffman coding tables\n",
    "    \n",
    "    Input:\n",
    "    error     (512, 640): a matrix with all the errors\n",
    "    list_dic  (num_dic + 1,): a list of huffman coding table \n",
-    "    bound     (2300,): the boundary values after subtracting the very first pixel value\n",
    "    bins       (num_bins - 1,): a list of threshold to cut the bins\n",
    "    \n",
    "    Return:\n",
@@ -456,7 +444,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -479,14 +467,12 @@
    "        for _ in range(8-length_of_string):\n",
    "            zerobuffer += \"0\"\n",
    "        int_array.append(int(input_string+zerobuffer,2))\n",
-    "    # print(int_array[0:20])\n",
-    "    # print(int_array[-12:])\n",
    "    return bytes(int_array)\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 142,
+   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -555,7 +541,6 @@
    "                    difference = difference.astype(int)\n",
    "                    \n",
    "                predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
-    "                \n",
    "                # add on the difference by searching the dictionary\n",
    "                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "                if difference <= bins[0]:\n",
@@ -576,7 +561,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 143,
+   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -587,19 +572,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 146,
+   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "scenes = file_extractor()\n",
    "newnamesforlater = []\n",
    "images = image_extractor(scenes)\n",
-    "oglist_dic, ogbins = make_dictionary(images[:10], 4, False)\n",
+    "list_dic, ogbins = make_dictionary(images[0:10], 4, False)\n",
    "file_size_ratios = []\n",
-    "np.save(\"first_dic.npy\", oglist_dic)\n",
+    "np.save(\"first_dic.npy\", list_dic)\n",
-    "for i in range(10,30):\n",
+    "for i in range(10):\n",
-    "    list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n",
+    "    image, new_error, diff = huffman(images[i], 4, False)\n",
-    "    encoded_string1 = encoder(new_error, oglist_dic, diff, bound, ogbins)\n",
+    "    encoded_string1 = encoder(new_error, list_dic, diff, ogbins)\n",
    "    # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
    "    # print(np.allclose(image, reconstruct_image))\n",
    "    inletters = bitstring_to_bytes(encoded_string1)\n",
@@ -615,14 +600,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.3955133696593259\n"
+      "0.3955120002069395\n"
     ]
    }
   ],
@@ -632,9 +617,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.39535481750525336"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "0.39535481750525336"
   ]
@@ -665,13 +661,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "True\n",
+      "True\n",
+      "True\n",
+      "True\n",
+      "True\n",
+      "True\n",
+      "True\n",
      "True\n",
      "True\n",
      "True\n"
@@ -685,21 +688,21 @@
    "    for i, item in enumerate(int_array):\n",
    "        end_string += (bin(item)[2:].zfill(8))\n",
    "    return end_string\n",
-    "oglist_dic = np.load(\"first_dic.npy\", allow_pickle=\"TRUE\")\n",
+    "list_dic = np.load(\"first_dic.npy\", allow_pickle=\"TRUE\")\n",
    "\n",
    "\n",
    "# ogbins = [12,60,180]\n",
-    "ogbins = [21,31,48]\n",
+    "ogbins = [21,32,48]\n",
-    "for i,item in enumerate(newnamesforlater[0:3]):\n",
+    "for i,item in enumerate(newnamesforlater[0:10]):\n",
-    "    list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n",
+    "    image, new_error, diff = huffman(images[i], 4, False)\n",
    "    encoded_string2 = bytes_to_bitstring(read_from_file(item))\n",
-    "    reconstruct_image = decoder(encoded_string2, oglist_dic, ogbins, False)\n",
+    "    reconstruct_image = decoder(encoded_string2, list_dic, ogbins, False)\n",
    "    print(np.allclose(image, reconstruct_image))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -726,7 +729,7 @@
    "    all_bins = []\n",
    "    for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
    "        # get the image_array, etc\n",
-    "        image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
+    "        image_array, predict, diff= predict_pix(tiff_image_path, difference)\n",
    "        \n",
    "        # calculate the number of points that will go in each bin\n",
    "        data_points_per_bin = diff.size // num_bins\n",
@@ -740,14 +743,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[21.00404858 31.92712551 48.06477733]\n"
+      "[-13.07692308   0.          13.08097166]\n"
     ]
    }
   ],

--- a/first_dic.npy
+++ b/first_dic.npy