Modified in running, tests successful

a9bf04b8 · Bryce Hepner · 9c9419fe · a9bf04b8 · a9bf04b8 · a9bf04b8
Commit a9bf04b8 authored May 31, 2022 by Bryce Hepner
Showing with 293 additions and 81 deletions

Encoding_decoding.ipynb Encoding_decoding.ipynb +15 -9

SaveableEncoderDecoder.ipynb SaveableEncoderDecoder.ipynb +233 -27

compression_benchmark.ipynb compression_benchmark.ipynb +45 -45

No files found.
--- a/Encoding_decoding.ipynb
+++ b/Encoding_decoding.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "14f74f21",
   "metadata": {},
   "outputs": [],
@@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "c16af61f",
   "metadata": {},
   "outputs": [],
@@ -77,7 +77,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "53786325",
   "metadata": {},
   "outputs": [],
@@ -153,7 +153,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "6b965751",
   "metadata": {},
   "outputs": [],
@@ -209,7 +209,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "b7561883",
   "metadata": {},
   "outputs": [],
@@ -318,7 +318,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "2eb774d2",
   "metadata": {},
   "outputs": [],
@@ -358,7 +358,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "8eeb40d0",
   "metadata": {},
   "outputs": [],
@@ -402,6 +402,9 @@
    "            # if it's the very first pixel on the image\n",
    "            if i == 0 and j == 0:\n",
    "                decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
+    "                print(encoded_matrix[i][j])\n",
+    "                print(the_values0.index(encoded_matrix[i,j]))\n",
+    "                print(int(the_keys0[the_values0.index(encoded_matrix[i,j])]))\n",
    "            # if it's on the boundary (any of the 4 edges)\n",
    "            elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:\n",
    "                decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + decode_matrix[0][0]\n",
@@ -442,7 +445,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
   "id": "f959fe93",
   "metadata": {},
   "outputs": [],
@@ -506,7 +509,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "id": "3e0e9742",
   "metadata": {},
   "outputs": [
@@ -514,6 +517,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "11100100000\n",
+      "499\n",
+      "22275\n",
      "True\n",
      "5\n"
     ]

--- a/SaveableEncoderDecoder.ipynb
+++ b/SaveableEncoderDecoder.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -72,7 +72,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -147,7 +147,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -197,12 +197,19 @@
    "\n",
    "        #There is a huge memory leak here, no idea how or why\n",
    "        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n",
-    "    return nodes[0][0]"
+    "    return nodes[0][0]\n",
+    "def decode_string(huffman_string, the_values, the_keys):\n",
+    "    for i in range(len(huffman_string)):\n",
+    "        try:\n",
+    "            return (int(the_keys[the_values.index(huffman_string[:i+1])]),huffman_string[i+1:])\n",
+    "        except:\n",
+    "            pass\n",
+    "    "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -310,7 +317,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -350,11 +357,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
-    "def decoder(A, encoded_matrix, list_dic, bins, use_diff):\n",
+    "def decoder(A, encoded_string, list_dic, bins, use_diff):\n",
    "    \"\"\"\n",
    "    This function decodes the encoded_matrix.\n",
    "    Input:\n",
@@ -392,10 +399,13 @@
    "        for j in range(decode_matrix.shape[1]):\n",
    "            # if it's the very first pixel on the image\n",
    "            if i == 0 and j == 0:\n",
-    "                decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n",
+    "                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
+    "                decode_matrix[i][j] = colorvalue\n",
+    "                \n",
    "            # if it's on the boundary (any of the 4 edges)\n",
    "            elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:\n",
-    "                decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + decode_matrix[0][0]\n",
+    "                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
+    "                decode_matrix[i][j] = colorvalue + decode_matrix[0][0]\n",
    "            # if not the boundary\n",
    "            else:\n",
    "                # predict the image with the known pixel value\n",
@@ -419,13 +429,17 @@
    "                # add on the difference by searching the dictionary\n",
    "                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "                if difference <= bins[0]:\n",
-    "                    decode_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys1, the_values=the_values1)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
    "                elif difference <= bins[1] and difference > bins[0]:\n",
-    "                    decode_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys2, the_values=the_values2)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
    "                elif difference <= bins[2] and difference > bins[1]:\n",
-    "                    decode_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys3, the_values=the_values3)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
    "                else:\n",
-    "                    decode_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n",
+    "                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys4, the_values=the_values4)\n",
+    "                    decode_matrix[i][j] = colorvalue + int(predict)\n",
    "                \n",
    "                \n",
    "    return decode_matrix.astype(int)"
@@ -433,35 +447,227 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 134,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "def compress_rate(image_array, new_error, diff, bound, huffman_encoding_list, bins):\n",
+    "    '''\n",
+    "    This function is used to calculate the compression rate.\n",
+    "    Input:\n",
+    "    image_array      (512, 640): original_core image\n",
+    "    new_error  (512, 640): error that includes the boundary\n",
+    "    diff       (510, 638): difference of min and max of the 4 neighbors\n",
+    "    bound      (2300,): the boundary values after subtracting the very first pixel value\n",
+    "    huffman_encoding_list   (num_dic + 1,): a list of huffman coding table \n",
+    "    bins       (num_bins - 1,): a list of threshold to cut the bins\n",
+    "    \n",
+    "    Return:\n",
+    "    compression rate\n",
+    "    '''\n",
+    "    # the bits for the original image\n",
+    "    o_len = 0\n",
+    "    # the bits for the compressed image\n",
+    "    c_len = 0\n",
+    "    # initializing the varible \n",
+    "    \n",
+    "    #this was unused\n",
+    "    # im = np.reshape(image,(512, 640))\n",
+    "    \n",
+    "    real_boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
+    "    #Bryce's notes: Why are they all reshaped?\n",
+    "    original_core = image_array[1:-1,1:-1].reshape(-1)\n",
+    "    diff = diff.reshape(-1)\n",
+    "    error = new_error[1:-1,1:-1].reshape(-1)\n",
+    "    \n",
+    "    # calculate the bit for boundary\n",
+    "    for i in range(0,len(bound)):\n",
+    "        o_len += len(bin(real_boundary[i])[2:])\n",
+    "        c_len += len(huffman_encoding_list[0][str(bound[i])])\n",
+    "    \n",
+    "    # calculate the bit for the pixels inside the boundary\n",
+    "    for i in range(0,len(original_core)):\n",
+    "\n",
+    "        # for the original image\n",
+    "        o_len += len(bin(original_core[i])[2:])\n",
+    "        \n",
+    "        # check the difference and find the coresponding huffman table\n",
+    "        # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
+    "        if diff[i] <= bins[0]:\n",
+    "            c_len += len(huffman_encoding_list[1][str(int(error[i]))])\n",
+    "            \n",
+    "        elif diff[i] <= bins[1] and diff[i] > bins[0]:\n",
+    "            c_len += len(huffman_encoding_list[2][str(int(error[i]))])\n",
+    "            \n",
+    "        elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
+    "            c_len += len(huffman_encoding_list[3][str(int(error[i]))])\n",
+    "\n",
+    "        else: \n",
+    "            c_len += len(huffman_encoding_list[4][str(int(error[i]))])\n",
+    "            \n",
+    "    return c_len/o_len"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "5\n"
+     ]
+    }
+   ],
   "source": [
    "scenes = file_extractor()\n",
    "images = image_extractor(scenes)\n",
    "list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[0], 4, False)\n",
-    "encoded_string = encoder(new_error, list_dic, diff, bound, bins)"
+    "encoded_string = encoder(new_error, list_dic, diff, bound, bins)\n",
+    "reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
+    "print(np.allclose(image, reconstruct_image))\n",
+    "print(len(list_dic))\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
-     "ename": "NameError",
+     "data": {
-     "evalue": "name 'encoded_string' is not defined",
+      "text/plain": [
-     "output_type": "error",
+       "0.4232928466796875"
-     "traceback": [
+      ]
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+     },
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+     "execution_count": 131,
-      "\u001b[1;32m/home/bryce/git/master/SaveableEncoderDecoder.ipynb Cell 8'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/bryce/git/master/SaveableEncoderDecoder.ipynb#ch0000007?line=0'>1</a>\u001b[0m \u001b[39mprint\u001b[39m(encoded_string)\n",
+     "metadata": {},
-      "\u001b[0;31mNameError\u001b[0m: name 'encoded_string' is not defined"
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compress_rate(image, new_error, diff, bound, list_dic, bins)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2080618\n"
     ]
    }
   ],
   "source": [
-    "print(encoded_string)"
+    "print(sys.getsizeof(encoded_string))\n",
+    "with open(\"MatrixNowString.txt\", 'wb') as f:\n",
+    "    f.write(bytearray(encoded_string, encoding = 'utf8'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "657197\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 169,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "398145\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(os.path.getsize('MatrixNowString.txt'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 167,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def bitstring_to_bytes(s):\n",
+    "    v = int(s, 2)\n",
+    "    b = bytearray()\n",
+    "    while v:\n",
+    "        b.append(v & 0xff)\n",
+    "        v >>= 8\n",
+    "    return bytes(b[::-1])\n",
+    "\n",
+    "s = \"0110100001101001\"\n",
+    "inletters = bitstring_to_bytes(encoded_string)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 168,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2080618\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sys.getsizeof(encoded_string))\n",
+    "with open(\"MatrixNowString.txt\", 'w') as f:\n",
+    "    f.write(inletters.decode(\"ISO-8859-1\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 172,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6058229115470704"
+      ]
+     },
+     "execution_count": 172,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(os.path.getsize('MatrixNowString.txt'))/os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff')"
   ]
  },
  {

--- a/compression_benchmark.ipynb
+++ b/compression_benchmark.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -33,7 +33,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -61,7 +61,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -71,7 +71,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -90,7 +90,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -99,7 +99,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -108,7 +108,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -117,7 +117,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -134,7 +134,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -152,7 +152,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -161,7 +161,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -178,7 +178,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -196,7 +196,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@@ -213,35 +213,46 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from PIL import TiffTags\n",
-    "TiffTags.LIBTIFF_CORE.add(317)\n",
+    "TiffTags.LIBTIFF_CORE.add(318)\n",
    "picture.save('Compressed_Round_2.tiff', compression='tiff_lzw', tiffinfo={317: 2})"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "404176\n"
+      "404176\n",
+      "True\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/bryce/.local/lib/python3.8/site-packages/PIL/TiffImagePlugin.py:845: UserWarning: Truncated File Read\n",
+      "  warnings.warn(str(msg))\n"
     ]
    }
   ],
   "source": [
-    "print(os.path.getsize('Compressed_Round_2.tiff'))"
+    "print(os.path.getsize('Compressed_Round_2.tiff'))\n",
+    "potentially_compressed = Image.open('Compressed_Round_2.tiff')\n",
+    "print(np.allclose(picture,potentially_compressed))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@@ -274,14 +285,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "<PIL.TiffImagePlugin.TiffImageFile image mode=I;16B size=640x513 at 0x7F3520192160>\n"
+      "(640, 513)\n"
     ]
    }
   ],
@@ -295,7 +306,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -304,7 +315,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
@@ -337,7 +348,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -346,7 +357,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
@@ -373,7 +384,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -383,7 +394,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -392,24 +403,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.31142564558267916\n"
+      "0.6885743544173208\n"
     ]
    }
   ],
   "source": [
-    "print((oldsize-thirdsize)/oldsize)"
+    "print((thirdsize)/oldsize)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
@@ -418,7 +429,7 @@
       "452578"
      ]
     },
-     "execution_count": 115,
+     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -429,20 +440,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
-    {
-     "data": {
-      "text/plain": [
-       "112"
-      ]
-     },
-     "execution_count": 116,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": []
  },
  {