big problem with the copy and pasted code

7ffcd37b · Bryce Hepner · e4fa4e0f · 7ffcd37b
Commit 7ffcd37b authored May 26, 2022 by Bryce Hepner
Hide whitespace changes
Inline Side-by-side

Showing with 59 additions and 52 deletions

Encoding_decoding.ipynb Encoding_decoding.ipynb +59 -52

No files found.
--- a/Encoding_decoding.ipynb
+++ b/Encoding_decoding.ipynb
@@ -154,7 +154,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 69,
   "id": "6b965751",
   "metadata": {},
   "outputs": [],
@@ -173,7 +173,7 @@
    "        return self.left, self.right\n",
    "\n",
    "    def __str__(self):\n",
-    "        return str(self.left) + str(self.right)\n",
+    "        return self.left, self.right\n",
    "\n",
    "\n",
    "def huffman_code_tree(node, binString=''):\n",
@@ -202,13 +202,15 @@
    "        node = NodeTree(key1, key2)\n",
    "        nodes.append((node, c1 + c2))\n",
    "        #reverse True, decending order\n",
-    "        sorted_nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n",
+    "\n",
-    "    return sorted_nodes[0][0]"
+    "        #There is a huge memory leak here, no idea how or why\n",
+    "        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n",
+    "    return nodes[0][0]"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 81,
   "id": "b7561883",
   "metadata": {},
   "outputs": [],
@@ -223,22 +225,22 @@
    "    num_bins            (int): number of bins\n",
    "    \n",
    "    Return:\n",
-    "    list_dic   (num_bins + 1): a list of dictionary\n",
+    "    huffman_encoding_list  list    (num_bins + 1): a list of dictionary\n",
-    "    image      (512, 640): original image\n",
+    "    image_array            ndarray (512, 640): original image\n",
-    "    new_error  (512, 640): error that includes the boundary\n",
+    "    new_error              ndarray (512, 640): error that includes the boundary\n",
-    "    diff       (510, 638): difference of min and max of the 4 neighbors\n",
+    "    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors\n",
-    "    boundary   (2300,): the boundary values after subtracting the very first pixel value\n",
+    "    boundary               ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
-    "    predict    (325380,): the list of predicted values\n",
+    "    predict                ndarray (325380,): the list of predicted values\n",
-    "    bins       (num_bins - 1,): a list of threshold to cut the bins\n",
+    "    bins                   list    (num_bins - 1,): a list of threshold to cut the bins\n",
-    "    A          (3 X 3): system of equation\n",
+    "    A                      ndarray (3 X 3): system of equation\n",
    "    \n",
    "    \"\"\"\n",
    "    # get the image_array, etc\n",
    "    image_array, predict, diff, error, A = predict_pix(tiff_image_path, difference)\n",
    "    \n",
    "    # calculate the number of points that will go in each bin\n",
-    "    data_points_per_bin = len(diff) // num_bins\n",
+    "    data_points_per_bin = diff.size // num_bins\n",
-    "    \n",
+    "\n",
    "    # sort the difference and create the bins\n",
    "    sorted_diff = np.sort(diff.copy())\n",
    "    bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
@@ -248,6 +250,8 @@
    "    \n",
    "    # take the difference of the boundary with the very first pixel\n",
    "    boundary = boundary - image_array[0,0]\n",
+    "    \n",
+    "    #boundary is 1dim, so boundary[0] is just the first element\n",
    "    boundary[0] = image_array[0,0]\n",
    "    \n",
    "    # huffman encode the boundary\n",
@@ -255,28 +259,28 @@
    "    freq = dict(Counter(bound_vals_as_string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode = huffman_code_tree(node)\n",
+    "    huffman_encoding_dict = huffman_code_tree(node)\n",
    "    \n",
    "    # create a list of huffman table\n",
-    "    list_dic = [encode]\n",
+    "    huffman_encoding_list = [huffman_encoding_dict]\n",
    "    n = len(bins)\n",
    "    \n",
    "    # loop through different bins\n",
    "    for i in range (0,n):\n",
-    "        # the fisrt bin\n",
+    "        # the first bin\n",
    "        if i == 0 :\n",
-    "            # get the point within the bin and huffman encode\n",
+    "            # get the point within the bin and huffman huffman_encoding_dict\n",
    "            mask = diff <= bins[i]\n",
    "            line_as_string = [str(i) for i in error[mask].astype(int)]\n",
    "            freq = dict(Counter(line_as_string))\n",
    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "            node = make_tree(freq)\n",
-    "            encode = huffman_code_tree(node)\n",
+    "            huffman_encoding_dict = huffman_code_tree(node)\n",
-    "            list_dic.append(encode)\n",
+    "            huffman_encoding_list.append(huffman_encoding_dict)\n",
    "            \n",
    "        # the middle bins\n",
    "        else:\n",
-    "            # get the point within the bin and huffman encode\n",
+    "            # get the point within the bin and huffman huffman_encoding_dict\n",
    "            mask = diff > bins[i-1]\n",
    "            new_error = error[mask]\n",
    "            mask2 = diff[mask] <= bins[i]\n",
@@ -284,18 +288,18 @@
    "            freq = dict(Counter(line_as_string))\n",
    "            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "            node = make_tree(freq)\n",
-    "            encode = huffman_code_tree(node)\n",
+    "            huffman_encoding_dict = huffman_code_tree(node)\n",
-    "            list_dic.append(encode)\n",
+    "            huffman_encoding_list.append(huffman_encoding_dict)\n",
    "            \n",
    "    # the last bin       \n",
-    "    # get the point within the bin and huffman encode\n",
+    "    # get the point within the bin and huffman huffman_encoding_dict\n",
    "    mask = diff > bins[-1]\n",
    "    line_as_string = [str(i) for i in error[mask].astype(int)]\n",
    "    freq = dict(Counter(line_as_string))\n",
    "    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
    "    node = make_tree(freq)\n",
-    "    encode = huffman_code_tree(node)\n",
+    "    huffman_encoding_dict = huffman_code_tree(node)\n",
-    "    list_dic.append(encode)\n",
+    "    huffman_encoding_list.append(huffman_encoding_dict)\n",
    "\n",
    "    # create a error matrix that includes the boundary (used in encoding matrix)\n",
    "    new_error = np.copy(image_array)\n",
@@ -306,16 +310,16 @@
    "    new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
    "    new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
    "    new_error[0,0] = keep\n",
-    "    \n",
+    "    # huffman_encoding_list = list(set(huffman_encoding_list))\n",
    "    diff = np.reshape(diff,(510,638))\n",
    "    # return the huffman dictionary\n",
-    "    return list_dic, image_array, new_error, diff, boundary, predict, bins, A\n",
+    "    return huffman_encoding_list, image_array, new_error, diff, boundary, predict, bins, A\n",
    " \n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 82,
   "id": "2eb774d2",
   "metadata": {},
   "outputs": [],
@@ -355,7 +359,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 83,
   "id": "8eeb40d0",
   "metadata": {},
   "outputs": [],
@@ -439,20 +443,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 91,
   "id": "f959fe93",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def compress_rate(image, new_error, diff, bound, list_dic, bins):\n",
+    "def compress_rate(image_array, new_error, diff, bound, huffman_encoding_list, bins):\n",
    "    '''\n",
    "    This function is used to calculate the compression rate.\n",
    "    Input:\n",
-    "    image      (512, 640): original image\n",
+    "    image_array      (512, 640): original_core image\n",
    "    new_error  (512, 640): error that includes the boundary\n",
    "    diff       (510, 638): difference of min and max of the 4 neighbors\n",
    "    bound      (2300,): the boundary values after subtracting the very first pixel value\n",
-    "    list_dic   (num_dic + 1,): a list of huffman coding table \n",
+    "    huffman_encoding_list   (num_dic + 1,): a list of huffman coding table \n",
    "    bins       (num_bins - 1,): a list of threshold to cut the bins\n",
    "    \n",
    "    Return:\n",
@@ -463,43 +467,47 @@
    "    # the bits for the compressed image\n",
    "    c_len = 0\n",
    "    # initializing the varible \n",
-    "    im = np.reshape(image,(512, 640))\n",
+    "    \n",
-    "    real_b = np.hstack((image[0,:],image[-1,:],image[1:-1,0],image[1:-1,-1]))\n",
+    "    #this was unused\n",
-    "    original = image[1:-1,1:-1].reshape(-1)\n",
+    "    # im = np.reshape(image,(512, 640))\n",
+    "    \n",
+    "    real_boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
+    "    #Bryce's notes: Why are they all reshaped?\n",
+    "    original_core = image_array[1:-1,1:-1].reshape(-1)\n",
    "    diff = diff.reshape(-1)\n",
    "    error = new_error[1:-1,1:-1].reshape(-1)\n",
    "    \n",
    "    # calculate the bit for boundary\n",
    "    for i in range(0,len(bound)):\n",
-    "        o_len += len(bin(real_b[i])[2:])\n",
+    "        o_len += len(bin(real_boundary[i])[2:])\n",
-    "        c_len += len(list_dic[0][str(bound[i])])\n",
+    "        c_len += len(huffman_encoding_list[0][str(bound[i])])\n",
    "    \n",
    "    # calculate the bit for the pixels inside the boundary\n",
-    "    for i in range(0,len(original)):\n",
+    "    for i in range(0,len(original_core)):\n",
    "\n",
    "        # for the original image\n",
-    "        o_len += len(bin(original[i])[2:])\n",
+    "        o_len += len(bin(original_core[i])[2:])\n",
    "        \n",
    "        # check the difference and find the coresponding huffman table\n",
    "        # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
    "        if diff[i] <= bins[0]:\n",
-    "            c_len += len(list_dic[1][str(int(error[i]))])\n",
+    "            c_len += len(huffman_encoding_list[1][str(int(error[i]))])\n",
    "            \n",
    "        elif diff[i] <= bins[1] and diff[i] > bins[0]:\n",
-    "            c_len += len(list_dic[2][str(int(error[i]))])\n",
+    "            c_len += len(huffman_encoding_list[2][str(int(error[i]))])\n",
    "            \n",
    "        elif diff[i] <= bins[2] and diff[i] > bins[1]:\n",
-    "            c_len += len(list_dic[3][str(int(error[i]))])\n",
+    "            c_len += len(huffman_encoding_list[3][str(int(error[i]))])\n",
    "\n",
    "        else: \n",
-    "            c_len += len(list_dic[4][str(int(error[i]))])\n",
+    "            c_len += len(huffman_encoding_list[4][str(int(error[i]))])\n",
    "\n",
    "    return c_len/o_len"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 92,
   "id": "3e0e9742",
   "metadata": {},
   "outputs": [
@@ -507,7 +515,6 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL-411 R-321 R-315 R-286 R-281 R-289 R-256 R-229 R-194 R-142 R-238 R-234 R-219 R-255 R-214 R-151 R-259 R-252 R-114 R-147 R-117 R-191 R-163 R-38 R-469 R-426 R-537 R-541 R-521 R-530 R-526 R-497 R-483 R-491 R-466 R-442 R-455 R-487 R-451 R-480 R-489 R-508 R-493 R-495 R-512 R-506 R-482 R-474 R-505 R-488 R-523 R-445 R-397 R-399 R-387 R-369 R-348 R-360 R-361 R-401 R-416 R-65 R148 R172 R189 R190 R209 R114 R111 R-241 R-213 R-143 R-107 R-44 R-334 R-371 R-317 R-352 R-240 R-66 R-92 R108 R120 R129 R144 R171 R166 R163 R175 R216 R196 R146 R140 R138 R150 R149 R145 R121 R112 R-27 R-77 R-75 R-251 R-294 R-356 R-279 R-290 R-193 R-167 R-274 R-354 R-389 R-454 R-380 R-370 R-305 R-326 R-407 R-398 R-372 R-362 R-414 R-424 R-452 R-383 R-324 R-331 R-381 R-346 R-420 R-409 R-453 R-450 R-460 R-449 R-458 R-42 R118 R104 R103 R22275 R-275 R-216 R-260 R-177 R-168 R-172 R-150 R-253 R-123 R-112 R-139 R-118 R-115 R-468 R-494 R-476 R-496 R-514 R-529 R-486 R-440 R-296 R-276 R-267 R-178 R-124 R-119 R-71 R-415 R-418 R-384 R-344 R-332 R-306 R-242 R-269 R-302 R-330 R-309 R-231 R-248 R-64 R-80 R-250 R-95 R126 R127 R-88 R-162 R-109 R-78 R-83 R-233 R-209 R-204 R-170 R-93 R-122 R-179 R-293 R-295 R-261 R-291 R-277 R-257 R-220 R-226 R-208 R-235 R-243 R-338 R-311 R-301 R-135 R-244 R-323 R-349 R-385 R-421 R-425 R-378 R-390 R-417 R-405 R-461 R-403 R-358 R-340 R-413 R-393 R-400 R-428 R-447 R-463 R-473 R-419 R-434 R-457 R-436 R-446 R-439 R-79 R-60 R-49 R-8 R-48 R-10 R-41 R102 R132 R96 R117 R113 R110 R115 R116 R105 R94 R-37 R-200 R-99 R-478 R-392 R-101 R-24 R-34 R-287 R-136 R-76 R-343 R-310 R-271 R-237 R-145 R-62 R-106 R182 R101 R106 R-84 R-81 R-152 R-222 R-120 R-264 R-206 R-131 R-113 R-96 R-108 R-134 R-258 R-266 R-299 R-239 R-307 R-304 R-284 R-297 R-249 R-196 R-199 R-186 R-265 R-230 R-138 R-192 R-236 R-245 R-273 R-254 R-298 R-337 R-373 R-365 R-345 R-319 R-313 R-335 R-320 R-410 R-443 R-432 R-477 R-444 R-437 R-435 R-57 R-128 R-102 R-61 R-94 R-73 R-68 R-70 R-30 R-51 R-74 R24 R91 R109 R107 R93 R-25 R-1 R-40 R-45 R-182 R-153 R-188 R-197 R-133 R-22 R-329 R-347 R-333 R-189 R-184 R-175 R-171 R-169 R-54 R-121 R-72 R-159 R-82 R-98 R-58 R-246 R-125 R-89 R-90 R-97 R-223 R-282 R-272 R-210 R-164 R-180 R-232 R-225 R-205 R-116 R-154 R-263 R-280 R-314 R-429 R-359 R-422 R-412 R-430 R-53 R-46 R-110 R-59 R-87 R-105 R47 R37 R-18 R-26 R21 R-23 R40 R124 R-47 R-33 R-14 R-56 R-39 R-28 R-16 R-31 R-100 R-85 R-140 R-165 R-155 R-104 R-176 R-312 R-268 R-198 R-195 R-174 R-221 R-146 R-217 R-247 R-308 R-318 R-341 R-325 R-408 R-406 R-55 R-36 R3 R-50 R18 R35 R14 R-32 R1 R89 R95 R100 R-21 R-3 R-43 R-149 R-351 R-173 R-91 R-207 R-157 R-127 R-160 R-218 R-212 R-148 R-203 R-224 R-215 R-126 R-202 R-339 R-355 R-7 R-67 R-69 R-63 R-86 R48 R-5 R59 R86 R76 R81 R99 R49 R-13 R-20 R-9 R-19 R13 R39 R-185 R-166 R-158 R-156 R-144 R-130 R-111 R-201 R-227 R-161 R-132 R-187 R-29 R-52 R5 R7 R15 R57 R45 R51 R26 R97 R64 R67 R98 R87 R46 R4 R-17 R-11 R-15 R38 R17 R-211 R-190 R-137 R8 R55 R62 R72 R74 R19 R52 R-35 R-141 R-6 R27 R2 R42 R-4 R30 R28 R41 R78 R43 R22 R0 R32 R85 R88 R66 R79 R80 R92 R77 R58 R34 R12 R11 R53 R-12 R-2 R6 R33 R10 R-183 R16 R29 R36 R90 R84 R75 R68 R56 R44 R50 R61 R60 R63 R20 R73 R23 R-181 R69 R82 R9 R54 R70 R65 R83 R25 R71 R31\n",
      "True\n",
      "5\n"
     ]
@@ -525,17 +532,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 93,
   "id": "004e8ba8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "2.090535888671875"
+       "0.4232928466796875"
      ]
     },
-     "execution_count": 47,
+     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -546,7 +553,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 94,
   "id": "a282f9e6",
   "metadata": {},
   "outputs": [