Commit 59865f02 authored by Bryce Hepner's avatar Bryce Hepner

Made function to create same dictionary

parent bd332d6b
...@@ -208,7 +208,110 @@ ...@@ -208,7 +208,110 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"def make_dictionary(tiff_image_path_list, num_bins=4, difference = True):\n",
" \"\"\"\n",
" This function is used to encode the error based on the difference\n",
" and split the difference into different bins\n",
" \n",
" Input:\n",
" tiff_image_path (string): path to the tiff file\n",
" num_bins (int): number of bins\n",
" \n",
" Return:\n",
" huffman_encoding_list list (num_bins + 1): a list of dictionary\n",
" image_array ndarray (512, 640): original image\n",
" new_error ndarray (512, 640): error that includes the boundary\n",
" diff ndarray (510, 638): difference of min and max of the 4 neighbors\n",
" boundary ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
" predict ndarray (325380,): the list of predicted values\n",
" bins list (num_bins - 1,): a list of threshold to cut the bins\n",
" A ndarray (3 X 3): system of equation\n",
" \n",
" \"\"\"\n",
" list_of_all_vals = []\n",
" huffman_encoding_list = []\n",
" for i in range(num_bins+1):\n",
" list_of_all_vals.append([])\n",
" for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
" # get the image_array, etc\n",
" image_array, predict, diff, error, A = predict_pix(tiff_image_path, difference)\n",
" \n",
" # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n",
"\n",
" # sort the difference and create the bins\n",
" sorted_diff = np.sort(diff.copy())\n",
" bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
" \n",
" # get the boundary \n",
" boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
" \n",
" # take the difference of the boundary with the very first pixel\n",
" boundary = boundary - image_array[0,0]\n",
" \n",
" #boundary is 1dim, so boundary[0] is just the first element\n",
" boundary[0] = image_array[0,0]\n",
" \n",
" # huffman encode the boundary\n",
" for j in boundary:\n",
" list_of_all_vals[0].append(str(j))\n",
"\n",
" \n",
" # create a list of huffman table\n",
" n = len(bins)\n",
" \n",
" # loop through different bins\n",
" for k in range (0,n):\n",
" # the first bin\n",
" if k == 0 :\n",
" # get the point within the bin and huffman huffman_encoding_dict\n",
" mask = diff <= bins[k]\n",
" for j in error[mask].astype(int):\n",
" list_of_all_vals[k+1].append(str(j))\n",
"\n",
" \n",
" # the middle bins\n",
" else:\n",
" # get the point within the bin and huffman huffman_encoding_dict\n",
" mask = diff > bins[k-1]\n",
" new_error = error[mask]\n",
" mask2 = diff[mask] <= bins[k]\n",
" for j in new_error[mask2].astype(int):\n",
" list_of_all_vals[k+1].append(str(j))\n",
"\n",
" \n",
" # the last bin \n",
" # get the point within the bin and huffman huffman_encoding_dict\n",
" mask = diff > bins[-1]\n",
" for j in error[mask].astype(int):\n",
" list_of_all_vals[-1].append(str(j))\n",
" for item in list_of_all_vals:\n",
" freq = dict(Counter(item))\n",
" freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n",
" node = make_tree(freq)\n",
" huffman_encoding_list.append(huffman_code_tree(node))\n",
" # create a error matrix that includes the boundary (used in encoding matrix)\n",
" new_error = np.copy(image_array)\n",
" new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n",
" keep = new_error[0,0]\n",
" new_error[0,:] = new_error[0,:] - keep\n",
" new_error[-1,:] = new_error[-1,:] - keep\n",
" new_error[1:-1,0] = new_error[1:-1,0] - keep\n",
" new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n",
" new_error[0,0] = keep\n",
" # huffman_encoding_list = list(set(huffman_encoding_list))\n",
" diff = np.reshape(diff,(510,638))\n",
" # return the huffman dictionary\n",
" return huffman_encoding_list,bins\n"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -317,7 +420,7 @@ ...@@ -317,7 +420,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 71,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -356,7 +459,7 @@ ...@@ -356,7 +459,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 72,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -371,87 +474,137 @@ ...@@ -371,87 +474,137 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"def decoder(A, encoded_string, list_dic, bins, use_diff):\n",
" \"\"\"\n",
" This function decodes the encoded_matrix.\n",
" Input:\n",
" A (3 X 3): system of equation\n",
" list_dic (num_dic + 1,): a list of huffman coding table \n",
" encoded_matrix (512, 640): encoded matrix\n",
" bins (num_bins - 1,): a list of threshold to cut the bins\n",
" \n",
" Return:\n",
" decode_matrix (512, 640): decoded matrix\n",
" \"\"\"\n",
" # change the dictionary back to list\n",
" # !!!!!WARNING!!!! has to change this part, everytime you change the number of bins\n",
" the_keys0 = list(list_dic[0].keys())\n",
" the_values0 = list(list_dic[0].values())\n",
" \n",
" the_keys1 = list(list_dic[1].keys())\n",
" the_values1 = list(list_dic[1].values())\n",
" \n",
" the_keys2 = list(list_dic[2].keys())\n",
" the_values2 = list(list_dic[2].values())\n",
" \n",
" the_keys3 = list(list_dic[3].keys())\n",
" the_values3 = list(list_dic[3].values())\n",
" \n",
" the_keys4 = list(list_dic[4].keys())\n",
" the_values4 = list(list_dic[4].values())\n",
" \n",
" #Matrix system of points that will be used to solve the least squares fitting hyperplane\n",
" points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])\n",
" \n",
" decode_matrix = np.zeros((512,640))\n",
" # loop through all the element in the matrix\n",
" for i in range(decode_matrix.shape[0]):\n",
" for j in range(decode_matrix.shape[1]):\n",
" # if it's the very first pixel on the image\n",
" if i == 0 and j == 0:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
" decode_matrix[i][j] = colorvalue\n",
" \n",
" # if it's on the boundary (any of the 4 edges)\n",
" elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)\n",
" decode_matrix[i][j] = colorvalue + decode_matrix[0][0]\n",
" # if not the boundary\n",
" else:\n",
" # predict the image with the known pixel value\n",
" z0 = decode_matrix[i-1][j-1]\n",
" z1 = decode_matrix[i-1][j]\n",
" z2 = decode_matrix[i-1][j+1]\n",
" z3 = decode_matrix[i][j-1]\n",
" y0 = int(-z0+z2-z3)\n",
" y1 = int(z0+z1+z2)\n",
" y2 = int(-z0-z1-z2-z3)\n",
" y = np.vstack((y0,y1,y2))\n",
" if use_diff:\n",
" difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n",
" else:\n",
" \n",
" f, difference, rank, s = la.lstsq(points, [z0,z1,z2,z3], rcond=None) \n",
" difference = difference.astype(int)\n",
" \n",
" predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n",
" \n",
" # add on the difference by searching the dictionary\n",
" # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n",
" if difference <= bins[0]:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys1, the_values=the_values1)\n",
" decode_matrix[i][j] = colorvalue + int(predict)\n",
" elif difference <= bins[1] and difference > bins[0]:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys2, the_values=the_values2)\n",
" decode_matrix[i][j] = colorvalue + int(predict)\n",
" elif difference <= bins[2] and difference > bins[1]:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys3, the_values=the_values3)\n",
" decode_matrix[i][j] = colorvalue + int(predict)\n",
" else:\n",
" colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys4, the_values=the_values4)\n",
" decode_matrix[i][j] = colorvalue + int(predict)\n",
"\n",
" return decode_matrix.astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Not here\n", "0\n",
"Not here\n", "True\n",
"Not here\n", "5\n",
"Not here\n", "1\n",
"Not here\n", "True\n",
"Not here\n", "5\n",
"Not here\n", "2\n",
"Not here\n", "True\n",
"Not here\n", "5\n",
"Not here\n", "3\n",
"Not here\n", "True\n",
"Not here\n", "5\n",
"Not here\n", "4\n",
"Not here\n", "True\n",
"Not here\n", "5\n"
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n",
"Not here\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 8'\u001b[0m in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=6'>7</a>\u001b[0m encoded_string \u001b[39m=\u001b[39m encoder(new_error, oglist_dic, diff, bound, ogbins)\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=7'>8</a>\u001b[0m \u001b[39m# reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=8'>9</a>\u001b[0m \u001b[39m# print(np.allclose(image, reconstruct_image))\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=9'>10</a>\u001b[0m inletters \u001b[39m=\u001b[39m bitstring_to_bytes(encoded_string)\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=10'>11</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mMatrixNowString.txt\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mw\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000006?line=11'>12</a>\u001b[0m f\u001b[39m.\u001b[39mwrite(inletters\u001b[39m.\u001b[39mdecode(\u001b[39m\"\u001b[39m\u001b[39mISO-8859-1\u001b[39m\u001b[39m\"\u001b[39m))\n",
"\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 7'\u001b[0m in \u001b[0;36mbitstring_to_bytes\u001b[0;34m(s)\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=3'>4</a>\u001b[0m \u001b[39mwhile\u001b[39;00m v:\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=4'>5</a>\u001b[0m b\u001b[39m.\u001b[39mappend(v \u001b[39m&\u001b[39m \u001b[39m0xff\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=5'>6</a>\u001b[0m v \u001b[39m>>\u001b[39m\u001b[39m=\u001b[39m \u001b[39m8\u001b[39m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000007?line=6'>7</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mbytes\u001b[39m(b[::\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m])\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
] ]
} }
], ],
"source": [ "source": [
"scenes = file_extractor()\n", "scenes = file_extractor()\n",
"images = image_extractor(scenes)\n", "images = image_extractor(scenes)\n",
"oglist_dic, ogimage, ognew_error, ogdiff, ogbound, ogpredict, ogbins, ogA = huffman(images[0], 4, False)\n", "oglist_dic, ogbins = make_dictionary(images[:10], 4, False)\n",
"file_size_ratios = []\n", "file_size_ratios = []\n",
"for i in range(25,27):\n", "for i in range(5):\n",
" print(i)\n",
" list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[i], 4, False)\n", " list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[i], 4, False)\n",
" encoded_string = encoder(new_error, oglist_dic, diff, bound, ogbins)\n", " encoded_string = encoder(new_error, oglist_dic, diff, bound, bins)\n",
" # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n", " # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
" # print(np.allclose(image, reconstruct_image))\n", " # print(np.allclose(image, reconstruct_image))\n",
" inletters = bitstring_to_bytes(encoded_string)\n", " inletters = bitstring_to_bytes(encoded_string)\n",
"\n",
" reconstruct_image = decoder(A, encoded_string, oglist_dic, bins, False)\n",
" print(np.allclose(image, reconstruct_image))\n",
" print(len(list_dic))\n",
"\n",
" with open(\"MatrixNowString.txt\", 'w') as f:\n", " with open(\"MatrixNowString.txt\", 'w') as f:\n",
" f.write(inletters.decode(\"ISO-8859-1\"))\n", " f.write(inletters.decode(\"ISO-8859-1\"))\n",
" file_size_ratios.append((os.path.getsize('MatrixNowString.txt'))/os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff'))" " file_size_ratios.append((os.path.getsize('MatrixNowString.txt'))/os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff'))"
...@@ -459,14 +612,14 @@ ...@@ -459,14 +612,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 81,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"0.6058229115470704\n" "0.6032547318383986\n"
] ]
} }
], ],
...@@ -501,8 +654,33 @@ ...@@ -501,8 +654,33 @@
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "code",
"execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 37 216 73 ... 42 88 53]\n",
" [ 16 140 368 ... 112 54 468]\n",
" [ 0 42 20 ... 28 13 13]\n",
" ...\n",
" [ 37 6 32 ... 6 13 192]\n",
" [ 32 2 130 ... 88 150 23]\n",
" [ 48 6 20 ... 20 73 322]]\n"
]
}
],
"source": [
"print(diff)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [] "source": []
} }
], ],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment