Commit 4286c54a authored by Bryce Hepner's avatar Bryce Hepner

bin work

parent 6cb62a30
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 115, "execution_count": 28,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 116, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -72,7 +72,7 @@ ...@@ -72,7 +72,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 117, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -142,12 +142,12 @@ ...@@ -142,12 +142,12 @@
" # calculate the error\n", " # calculate the error\n",
" error = np.ravel(image_array[1:-1,1:-1])-predict\n", " error = np.ravel(image_array[1:-1,1:-1])-predict\n",
" \n", " \n",
" return image_array, predict, diff, error" " return image_array, diff, error"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 118, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -206,7 +206,7 @@ ...@@ -206,7 +206,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 138, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -232,19 +232,13 @@ ...@@ -232,19 +232,13 @@
" \"\"\"\n", " \"\"\"\n",
" list_of_all_vals = []\n", " list_of_all_vals = []\n",
" huffman_encoding_list = []\n", " huffman_encoding_list = []\n",
" for i in range(num_bins+1):\n", " for _ in range(num_bins+1):\n",
" list_of_all_vals.append([])\n", " list_of_all_vals.append([])\n",
" for i, tiff_image_path in enumerate(tiff_image_path_list):\n", " for _, tiff_image_path in enumerate(tiff_image_path_list):\n",
" # get the image_array, etc\n", " # get the image_array, etc\n",
" image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n", " image_array, diff, error= predict_pix(tiff_image_path, difference)\n",
" \n",
" # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n",
"\n", "\n",
" # sort the difference and create the bins\n", " bins = [21,32,48]\n",
" sorted_diff = np.sort(diff.copy())\n",
" # bins = [12,60,180]\n",
" bins = [21,31,48]\n",
" # get the boundary \n", " # get the boundary \n",
" boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n", " boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
" \n", " \n",
...@@ -308,7 +302,7 @@ ...@@ -308,7 +302,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 139, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -323,7 +317,7 @@ ...@@ -323,7 +317,7 @@
" \n", " \n",
" Return:\n", " Return:\n",
" huffman_encoding_list list (num_bins + 1): a list of dictionary\n", " huffman_encoding_list list (num_bins + 1): a list of dictionary\n",
" image_array ndarray (512, 640): original image\n", " image_as_array ndarray (512, 640): original image\n",
" new_error ndarray (512, 640): error that includes the boundary\n", " new_error ndarray (512, 640): error that includes the boundary\n",
" diff ndarray (510, 638): difference of min and max of the 4 neighbors\n", " diff ndarray (510, 638): difference of min and max of the 4 neighbors\n",
" boundary ndarray (2300,): the boundary values after subtracting the very first pixel value\n", " boundary ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
...@@ -331,25 +325,22 @@ ...@@ -331,25 +325,22 @@
" bins list (num_bins - 1,): a list of threshold to cut the bins\n", " bins list (num_bins - 1,): a list of threshold to cut the bins\n",
" A ndarray (3 X 3): system of equation\n", " A ndarray (3 X 3): system of equation\n",
" \"\"\"\n", " \"\"\"\n",
" # get the image_array, etc\n", " # get the image_as_array, etc\n",
" image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n", " image_as_array, diff, error= predict_pix(tiff_image_path, difference)\n",
" \n", " \n",
" # calculate the number of points that will go in each bin\n", " # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n", "\n",
"\n", "\n",
" # sort the difference and create the bins\n", " # sort the difference and create the bins\n",
" sorted_diff = np.sort(diff.copy())\n", " bins = [21,32,48]\n",
" # bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
" # bins = [12,60,180]\n",
" bins = [21,31,48]\n",
" # get the boundary \n", " # get the boundary \n",
" boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n", " boundary = np.hstack((image_as_array[0,:],image_as_array[-1,:],image_as_array[1:-1,0],image_as_array[1:-1,-1]))\n",
" \n", " \n",
" # take the difference of the boundary with the very first pixel\n", " # take the difference of the boundary with the very first pixel\n",
" boundary = boundary - image_array[0,0]\n", " boundary = boundary - image_as_array[0,0]\n",
" \n", " \n",
" #boundary is 1dim, so boundary[0] is just the first element\n", " #boundary is 1dim, so boundary[0] is just the first element\n",
" boundary[0] = image_array[0,0]\n", " boundary[0] = image_as_array[0,0]\n",
" \n", " \n",
" # huffman encode the boundary\n", " # huffman encode the boundary\n",
" bound_vals_as_string = [str(i) for i in boundary]\n", " bound_vals_as_string = [str(i) for i in boundary]\n",
...@@ -399,7 +390,7 @@ ...@@ -399,7 +390,7 @@
" huffman_encoding_list.append(huffman_encoding_dict)\n", " huffman_encoding_list.append(huffman_encoding_dict)\n",
"\n", "\n",
" # create a error matrix that includes the boundary (used in encoding matrix)\n", " # create a error matrix that includes the boundary (used in encoding matrix)\n",
" new_error = np.copy(image_array)\n", " new_error = np.copy(image_as_array)\n",
" new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n", " new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
" keep = new_error[0,0]\n", " keep = new_error[0,0]\n",
" new_error[0,:] = new_error[0,:] - keep\n", " new_error[0,:] = new_error[0,:] - keep\n",
...@@ -409,26 +400,23 @@ ...@@ -409,26 +400,23 @@
" new_error[0,0] = keep\n", " new_error[0,0] = keep\n",
" # huffman_encoding_list = list(set(huffman_encoding_list))\n", " # huffman_encoding_list = list(set(huffman_encoding_list))\n",
" diff = np.reshape(diff,(510,638))\n", " diff = np.reshape(diff,(510,638))\n",
" # return the huffman dictionary\n",
"\n", "\n",
" return huffman_encoding_list, image_array, new_error, diff, boundary, predict, bins\n", " return image_as_array, new_error, diff\n"
" \n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 140, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def encoder(error, list_dic, diff, bound, bins):\n", "def encoder(error, list_dic, diff, bins):\n",
" \"\"\"\n", " \"\"\"\n",
" This function encode the matrix with huffman coding tables\n", " This function encode the matrix with huffman coding tables\n",
" \n", " \n",
" Input:\n", " Input:\n",
" error (512, 640): a matrix with all the errors\n", " error (512, 640): a matrix with all the errors\n",
" list_dic (num_dic + 1,): a list of huffman coding table \n", " list_dic (num_dic + 1,): a list of huffman coding table \n",
" bound (2300,): the boundary values after subtracting the very first pixel value\n",
" bins (num_bins - 1,): a list of threshold to cut the bins\n", " bins (num_bins - 1,): a list of threshold to cut the bins\n",
" \n", " \n",
" Return:\n", " Return:\n",
...@@ -456,7 +444,7 @@ ...@@ -456,7 +444,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 141, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -479,14 +467,12 @@ ...@@ -479,14 +467,12 @@
" for _ in range(8-length_of_string):\n", " for _ in range(8-length_of_string):\n",
" zerobuffer += \"0\"\n", " zerobuffer += \"0\"\n",
" int_array.append(int(input_string+zerobuffer,2))\n", " int_array.append(int(input_string+zerobuffer,2))\n",
" # print(int_array[0:20])\n",
" # print(int_array[-12:])\n",
" return bytes(int_array)\n" " return bytes(int_array)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 142, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -555,7 +541,6 @@ ...@@ -555,7 +541,6 @@
" difference = difference.astype(int)\n", " difference = difference.astype(int)\n",
" \n", " \n",
" predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n", " predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
" \n",
" # add on the difference by searching the dictionary\n", " # add on the difference by searching the dictionary\n",
" # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n", " # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
" if difference <= bins[0]:\n", " if difference <= bins[0]:\n",
...@@ -576,7 +561,7 @@ ...@@ -576,7 +561,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 143, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -587,19 +572,19 @@ ...@@ -587,19 +572,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 146, "execution_count": 47,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"scenes = file_extractor()\n", "scenes = file_extractor()\n",
"newnamesforlater = []\n", "newnamesforlater = []\n",
"images = image_extractor(scenes)\n", "images = image_extractor(scenes)\n",
"oglist_dic, ogbins = make_dictionary(images[:10], 4, False)\n", "list_dic, ogbins = make_dictionary(images[0:10], 4, False)\n",
"file_size_ratios = []\n", "file_size_ratios = []\n",
"np.save(\"first_dic.npy\", oglist_dic)\n", "np.save(\"first_dic.npy\", list_dic)\n",
"for i in range(10,30):\n", "for i in range(10):\n",
" list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n", " image, new_error, diff = huffman(images[i], 4, False)\n",
" encoded_string1 = encoder(new_error, oglist_dic, diff, bound, ogbins)\n", " encoded_string1 = encoder(new_error, list_dic, diff, ogbins)\n",
" # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n", " # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
" # print(np.allclose(image, reconstruct_image))\n", " # print(np.allclose(image, reconstruct_image))\n",
" inletters = bitstring_to_bytes(encoded_string1)\n", " inletters = bitstring_to_bytes(encoded_string1)\n",
...@@ -615,14 +600,14 @@ ...@@ -615,14 +600,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 145, "execution_count": 48,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"0.3955133696593259\n" "0.3955120002069395\n"
] ]
} }
], ],
...@@ -632,9 +617,20 @@ ...@@ -632,9 +617,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 49,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"0.39535481750525336"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"0.39535481750525336" "0.39535481750525336"
] ]
...@@ -665,13 +661,20 @@ ...@@ -665,13 +661,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 127, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n", "True\n",
"True\n", "True\n",
"True\n" "True\n"
...@@ -685,21 +688,21 @@ ...@@ -685,21 +688,21 @@
" for i, item in enumerate(int_array):\n", " for i, item in enumerate(int_array):\n",
" end_string += (bin(item)[2:].zfill(8))\n", " end_string += (bin(item)[2:].zfill(8))\n",
" return end_string\n", " return end_string\n",
"oglist_dic = np.load(\"first_dic.npy\", allow_pickle=\"TRUE\")\n", "list_dic = np.load(\"first_dic.npy\", allow_pickle=\"TRUE\")\n",
"\n", "\n",
"\n", "\n",
"# ogbins = [12,60,180]\n", "# ogbins = [12,60,180]\n",
"ogbins = [21,31,48]\n", "ogbins = [21,32,48]\n",
"for i,item in enumerate(newnamesforlater[0:3]):\n", "for i,item in enumerate(newnamesforlater[0:10]):\n",
" list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n", " image, new_error, diff = huffman(images[i], 4, False)\n",
" encoded_string2 = bytes_to_bitstring(read_from_file(item))\n", " encoded_string2 = bytes_to_bitstring(read_from_file(item))\n",
" reconstruct_image = decoder(encoded_string2, oglist_dic, ogbins, False)\n", " reconstruct_image = decoder(encoded_string2, list_dic, ogbins, False)\n",
" print(np.allclose(image, reconstruct_image))" " print(np.allclose(image, reconstruct_image))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 113, "execution_count": 44,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -726,7 +729,7 @@ ...@@ -726,7 +729,7 @@
" all_bins = []\n", " all_bins = []\n",
" for i, tiff_image_path in enumerate(tiff_image_path_list):\n", " for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
" # get the image_array, etc\n", " # get the image_array, etc\n",
" image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n", " image_array, predict, diff= predict_pix(tiff_image_path, difference)\n",
" \n", " \n",
" # calculate the number of points that will go in each bin\n", " # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n", " data_points_per_bin = diff.size // num_bins\n",
...@@ -740,14 +743,14 @@ ...@@ -740,14 +743,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 114, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"[21.00404858 31.92712551 48.06477733]\n" "[-13.07692308 0. 13.08097166]\n"
] ]
} }
], ],
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment