{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "14f74f21", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from matplotlib import pyplot as plt\n", "from itertools import product\n", "import os\n", "import sys\n", "from PIL import Image\n", "from scipy.optimize import minimize,linprog\n", "# import time\n", "# import seaborn as sns\n", "from sklearn.neighbors import KernelDensity\n", "# import pandas as pd\n", "from collections import Counter\n", "import numpy.linalg as la" ] }, { "cell_type": "code", "execution_count": 2, "id": "c16af61f", "metadata": {}, "outputs": [], "source": [ "def file_extractor(dirname=\"images\"):\n", " files = os.listdir(dirname)\n", " scenes = []\n", " for file in files:\n", " if file == '.DS_Store':\n", " continue\n", " else:\n", " scenes.append(os.path.join(dirname, file))\n", " return scenes\n", "\n", "def image_extractor(scenes):\n", " image_folder = []\n", " for scene in scenes:\n", " files = os.listdir(scene)\n", " for file in files:\n", " if file[-5:] != \".tiff\" or file[-7:] == \"_6.tiff\":\n", " continue\n", " else:\n", " image_folder.append(os.path.join(scene, file))\n", " return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor\n", "\n", "def im_distribution(images, num):\n", " \"\"\"\n", " Function that extracts tiff files from specific cameras and returns a list of all\n", " the tiff files corresponding to that camera. i.e. all pictures labeled \"_7.tiff\" or otherwise\n", " specified camera numbers.\n", " \n", " Parameters:\n", " images (list): list of all tiff files, regardless of classification. This is NOT a list of directories but\n", " of specific tiff files that can be opened right away. This is the list that we iterate through and \n", " divide.\n", " \n", " num (str): a string designation for the camera number that we want to extract i.e. \"14\" for double digits\n", " of \"_1\" for single digits.\n", " \n", " Returns:\n", " tiff (list): A list of tiff files that have the specified designation from num. They are the files extracted\n", " from the 'images' list that correspond to the given num.\n", " \"\"\"\n", " tiff = []\n", " for im in images:\n", " if im[-7:-5] == num:\n", " tiff.append(im)\n", " return tiff" ] }, { "cell_type": "code", "execution_count": 3, "id": "53786325", "metadata": {}, "outputs": [], "source": [ "def predict_pix(tiff_image_path, difference = True):\n", " \"\"\"\n", " This function predict the pixel values excluding the boundary.\n", " Using the 4 neighbor pixel values and MSE to predict the next pixel value\n", " (-1,1) (0,1) (1,1) => relative position of the 4 other given values\n", " (-1,0) (0,0) => (0,0) is the one we want to predict\n", " take the derivative of mean square error to solve for the system of equation \n", " A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])\n", " A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n", " and the predicted pixel value is c.\n", " \n", " Input:\n", " tiff_image_path (string): path to the tiff file\n", " \n", " Return:\n", " image ndarray(512 X 640): original image \n", " predict ndarray(325380,): predicted image excluding the boundary\n", " diff. ndarray(325380,): IF difference = TRUE, difference between the min and max of four neighbors exclude the boundary\n", " ELSE: the residuals of the four nearest pixels to a fitted hyperplane\n", " error ndarray(325380,): difference between the original image and predicted image\n", " A ndarray(3 X 3): system of equation\n", " \"\"\"\n", " image_obj = Image.open(tiff_image_path) #Open the image and read it as an Image object\n", " image_array = np.array(image_obj)[1:,:].astype(int) #Convert to an array, leaving out the first row because the first row is just housekeeping data\n", " # image_array = image_array.astype(int) \n", " A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation\n", " # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)\n", " z0 = image_array[0:-2,0:-2] # get all the first pixel for the entire image\n", " z1 = image_array[0:-2,1:-1] # get all the second pixel for the entire image\n", " z2 = image_array[0:-2,2::] # get all the third pixel for the entire image\n", " z3 = image_array[1:-1,0:-2] # get all the forth pixel for the entire image\n", " \n", " # calculate the out put of the system of equation\n", " y0 = np.ravel(-z0+z2-z3)\n", " y1 = np.ravel(z0+z1+z2)\n", " y2 = np.ravel(-z0-z1-z2-z3)\n", " y = np.vstack((y0,y1,y2))\n", " \n", " # use numpy solver to solve the system of equations all at once\n", " #predict = np.floor(np.linalg.solve(A,y)[-1])\n", " predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))\n", " \n", " #Matrix system of points that will be used to solve the least squares fitting hyperplane\n", " points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])\n", " \n", " # flatten the neighbor pixlels and stack them together\n", " z0 = np.ravel(z0)\n", " z1 = np.ravel(z1)\n", " z2 = np.ravel(z2)\n", " z3 = np.ravel(z3)\n", " neighbor = np.vstack((z0,z1,z2,z3)).T\n", " \n", " if difference:\n", " # calculate the difference\n", " diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)\n", " \n", " else:\n", " #Compute the best fitting hyperplane using least squares\n", " #The res is the residuals of the four points used to fit the hyperplane (summed distance of each of the \n", " #points to the hyperplane), it is a measure of gradient\n", " f, diff, rank, s = la.lstsq(points, neighbor.T, rcond=None)\n", " diff = diff.astype(int)\n", " \n", " # calculate the error\n", " error = np.ravel(image_array[1:-1,1:-1])-predict\n", " \n", " return image_array, predict, diff, error, A" ] }, { "cell_type": "code", "execution_count": 4, "id": "6b965751", "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "this huffman encoding code is found online\n", "https://favtutor.com/blogs/huffman-coding\n", "\"\"\"\n", "\n", "class NodeTree(object):\n", " def __init__(self, left=None, right=None):\n", " self.left = left\n", " self.right = right\n", "\n", " def children(self):\n", " return self.left, self.right\n", "\n", " def __str__(self):\n", " return self.left, self.right\n", "\n", "\n", "def huffman_code_tree(node, binString=''):\n", " '''\n", " Function to find Huffman Code\n", " '''\n", " if type(node) is str:\n", " return {node: binString}\n", " (l, r) = node.children()\n", " d = dict()\n", " d.update(huffman_code_tree(l, binString + '0'))\n", " d.update(huffman_code_tree(r, binString + '1'))\n", " return d\n", "\n", "\n", "def make_tree(nodes):\n", " '''\n", " Function to make tree\n", " :param nodes: Nodes\n", " :return: Root of the tree\n", " '''\n", " while len(nodes) > 1:\n", " (key1, c1) = nodes[-1]\n", " (key2, c2) = nodes[-2]\n", " nodes = nodes[:-2]\n", " node = NodeTree(key1, key2)\n", " nodes.append((node, c1 + c2))\n", " #reverse True, decending order\n", "\n", " #There is a huge memory leak here, no idea how or why\n", " nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n", " return nodes[0][0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "b7561883", "metadata": {}, "outputs": [], "source": [ "def huffman(tiff_image_path, num_bins=4, difference = True):\n", " \"\"\"\n", " This function is used to encode the error based on the difference\n", " and split the difference into different bins\n", " \n", " Input:\n", " tiff_image_path (string): path to the tiff file\n", " num_bins (int): number of bins\n", " \n", " Return:\n", " huffman_encoding_list list (num_bins + 1): a list of dictionary\n", " image_array ndarray (512, 640): original image\n", " new_error ndarray (512, 640): error that includes the boundary\n", " diff ndarray (510, 638): difference of min and max of the 4 neighbors\n", " boundary ndarray (2300,): the boundary values after subtracting the very first pixel value\n", " predict ndarray (325380,): the list of predicted values\n", " bins list (num_bins - 1,): a list of threshold to cut the bins\n", " A ndarray (3 X 3): system of equation\n", " \n", " \"\"\"\n", " # get the image_array, etc\n", " image_array, predict, diff, error, A = predict_pix(tiff_image_path, difference)\n", " \n", " # calculate the number of points that will go in each bin\n", " data_points_per_bin = diff.size // num_bins\n", "\n", " # sort the difference and create the bins\n", " sorted_diff = np.sort(diff.copy())\n", " bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n", " \n", " # get the boundary \n", " boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n", " \n", " # take the difference of the boundary with the very first pixel\n", " boundary = boundary - image_array[0,0]\n", " \n", " #boundary is 1dim, so boundary[0] is just the first element\n", " boundary[0] = image_array[0,0]\n", " \n", " # huffman encode the boundary\n", " bound_vals_as_string = [str(i) for i in boundary]\n", " freq = dict(Counter(bound_vals_as_string))\n", " freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n", " node = make_tree(freq)\n", " huffman_encoding_dict = huffman_code_tree(node)\n", " \n", " # create a list of huffman table\n", " huffman_encoding_list = [huffman_encoding_dict]\n", " n = len(bins)\n", " \n", " # loop through different bins\n", " for i in range (0,n):\n", " # the first bin\n", " if i == 0 :\n", " # get the point within the bin and huffman huffman_encoding_dict\n", " mask = diff <= bins[i]\n", " line_as_string = [str(i) for i in error[mask].astype(int)]\n", " freq = dict(Counter(line_as_string))\n", " freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n", " node = make_tree(freq)\n", " huffman_encoding_dict = huffman_code_tree(node)\n", " huffman_encoding_list.append(huffman_encoding_dict)\n", " \n", " # the middle bins\n", " else:\n", " # get the point within the bin and huffman huffman_encoding_dict\n", " mask = diff > bins[i-1]\n", " new_error = error[mask]\n", " mask2 = diff[mask] <= bins[i]\n", " line_as_string = [str(i) for i in new_error[mask2].astype(int)]\n", " freq = dict(Counter(line_as_string))\n", " freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n", " node = make_tree(freq)\n", " huffman_encoding_dict = huffman_code_tree(node)\n", " huffman_encoding_list.append(huffman_encoding_dict)\n", " \n", " # the last bin \n", " # get the point within the bin and huffman huffman_encoding_dict\n", " mask = diff > bins[-1]\n", " line_as_string = [str(i) for i in error[mask].astype(int)]\n", " freq = dict(Counter(line_as_string))\n", " freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)\n", " node = make_tree(freq)\n", " huffman_encoding_dict = huffman_code_tree(node)\n", " huffman_encoding_list.append(huffman_encoding_dict)\n", "\n", " # create a error matrix that includes the boundary (used in encoding matrix)\n", " new_error = np.copy(image_array)\n", " new_error[1:-1,1:-1] = np.reshape(error,(510, 638))\n", " keep = new_error[0,0]\n", " new_error[0,:] = new_error[0,:] - keep\n", " new_error[-1,:] = new_error[-1,:] - keep\n", " new_error[1:-1,0] = new_error[1:-1,0] - keep\n", " new_error[1:-1,-1] = new_error[1:-1,-1] - keep\n", " new_error[0,0] = keep\n", " # huffman_encoding_list = list(set(huffman_encoding_list))\n", " diff = np.reshape(diff,(510,638))\n", " # return the huffman dictionary\n", " return huffman_encoding_list, image_array, new_error, diff, boundary, predict, bins, A\n", " \n" ] }, { "cell_type": "code", "execution_count": 6, "id": "2eb774d2", "metadata": {}, "outputs": [], "source": [ "def encoder(error, list_dic, diff, bound, bins):\n", " \"\"\"\n", " This function encode the matrix with huffman coding tables\n", " \n", " Input:\n", " error (512, 640): a matrix with all the errors\n", " list_dic (num_dic + 1,): a list of huffman coding table \n", " bound (2300,): the boundary values after subtracting the very first pixel value\n", " bins (num_bins - 1,): a list of threshold to cut the bins\n", " \n", " Return:\n", " encoded (512, 640): encoded matrix\n", " \"\"\"\n", " # copy the error matrix (including the boundary)\n", " encoded = np.copy(error).astype(int).astype(str).astype(object)\n", " #diff = np.reshape(diff,(510,638))\n", " # loop through all the pixel to encode\n", " for i in range(encoded.shape[0]):\n", " for j in range(encoded.shape[1]):\n", " if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:\n", " encoded[i][j] = list_dic[0][encoded[i][j]]\n", " elif diff[i-1][j-1] <= bins[0]:\n", " encoded[i][j] = list_dic[1][encoded[i][j]]\n", " elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:\n", " encoded[i][j] = list_dic[2][encoded[i][j]]\n", " elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:\n", " encoded[i][j] = list_dic[3][encoded[i][j]]\n", " else: \n", " encoded[i][j] = list_dic[4][encoded[i][j]]\n", "\n", " return encoded" ] }, { "cell_type": "code", "execution_count": 7, "id": "8eeb40d0", "metadata": {}, "outputs": [], "source": [ "def decoder(A, encoded_matrix, list_dic, bins, use_diff):\n", " \"\"\"\n", " This function decodes the encoded_matrix.\n", " Input:\n", " A (3 X 3): system of equation\n", " list_dic (num_dic + 1,): a list of huffman coding table \n", " encoded_matrix (512, 640): encoded matrix\n", " bins (num_bins - 1,): a list of threshold to cut the bins\n", " \n", " Return:\n", " decode_matrix (512, 640): decoded matrix\n", " \"\"\"\n", " # change the dictionary back to list\n", " # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n", " the_keys0 = list(list_dic[0].keys())\n", " the_values0 = list(list_dic[0].values())\n", " \n", " the_keys1 = list(list_dic[1].keys())\n", " the_values1 = list(list_dic[1].values())\n", " \n", " the_keys2 = list(list_dic[2].keys())\n", " the_values2 = list(list_dic[2].values())\n", " \n", " the_keys3 = list(list_dic[3].keys())\n", " the_values3 = list(list_dic[3].values())\n", " \n", " the_keys4 = list(list_dic[4].keys())\n", " the_values4 = list(list_dic[4].values())\n", " \n", " #Matrix system of points that will be used to solve the least squares fitting hyperplane\n", " points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])\n", " \n", " decode_matrix = np.zeros((512,640))\n", " # loop through all the element in the matrix\n", " for i in range(decode_matrix.shape[0]):\n", " for j in range(decode_matrix.shape[1]):\n", " # if it's the very first pixel on the image\n", " if i == 0 and j == 0:\n", " decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])])\n", " print(encoded_matrix[i][j])\n", " print(the_values0.index(encoded_matrix[i,j]))\n", " print(int(the_keys0[the_values0.index(encoded_matrix[i,j])]))\n", " # if it's on the boundary (any of the 4 edges)\n", " elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:\n", " decode_matrix[i][j] = int(the_keys0[the_values0.index(encoded_matrix[i,j])]) + decode_matrix[0][0]\n", " # if not the boundary\n", " else:\n", " # predict the image with the known pixel value\n", " z0 = decode_matrix[i-1][j-1]\n", " z1 = decode_matrix[i-1][j]\n", " z2 = decode_matrix[i-1][j+1]\n", " z3 = decode_matrix[i][j-1]\n", " y0 = int(-z0+z2-z3)\n", " y1 = int(z0+z1+z2)\n", " y2 = int(-z0-z1-z2-z3)\n", " y = np.vstack((y0,y1,y2))\n", " if use_diff:\n", " difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)\n", " else:\n", " \n", " f, difference, rank, s = la.lstsq(points, [z0,z1,z2,z3], rcond=None) \n", " difference = difference.astype(int)\n", " \n", " predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))\n", " \n", " # add on the difference by searching the dictionary\n", " # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n", " if difference <= bins[0]:\n", " decode_matrix[i][j] = int(the_keys1[the_values1.index(encoded_matrix[i,j])]) + int(predict)\n", " elif difference <= bins[1] and difference > bins[0]:\n", " decode_matrix[i][j] = int(the_keys2[the_values2.index(encoded_matrix[i,j])]) + int(predict)\n", " elif difference <= bins[2] and difference > bins[1]:\n", " decode_matrix[i][j] = int(the_keys3[the_values3.index(encoded_matrix[i,j])]) + int(predict)\n", " else:\n", " decode_matrix[i][j] = int(the_keys4[the_values4.index(encoded_matrix[i,j])]) + int(predict)\n", " \n", " \n", " return decode_matrix.astype(int)" ] }, { "cell_type": "code", "execution_count": 8, "id": "f959fe93", "metadata": {}, "outputs": [], "source": [ "def compress_rate(image_array, new_error, diff, bound, huffman_encoding_list, bins):\n", " '''\n", " This function is used to calculate the compression rate.\n", " Input:\n", " image_array (512, 640): original_core image\n", " new_error (512, 640): error that includes the boundary\n", " diff (510, 638): difference of min and max of the 4 neighbors\n", " bound (2300,): the boundary values after subtracting the very first pixel value\n", " huffman_encoding_list (num_dic + 1,): a list of huffman coding table \n", " bins (num_bins - 1,): a list of threshold to cut the bins\n", " \n", " Return:\n", " compression rate\n", " '''\n", " # the bits for the original image\n", " o_len = 0\n", " # the bits for the compressed image\n", " c_len = 0\n", " # initializing the varible \n", " \n", " #this was unused\n", " # im = np.reshape(image,(512, 640))\n", " \n", " real_boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n", " #Bryce's notes: Why are they all reshaped?\n", " original_core = image_array[1:-1,1:-1].reshape(-1)\n", " diff = diff.reshape(-1)\n", " error = new_error[1:-1,1:-1].reshape(-1)\n", " \n", " # calculate the bit for boundary\n", " for i in range(0,len(bound)):\n", " o_len += len(bin(real_boundary[i])[2:])\n", " c_len += len(huffman_encoding_list[0][str(bound[i])])\n", " \n", " # calculate the bit for the pixels inside the boundary\n", " for i in range(0,len(original_core)):\n", "\n", " # for the original image\n", " o_len += len(bin(original_core[i])[2:])\n", " \n", " # check the difference and find the coresponding huffman table\n", " # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins\n", " if diff[i] <= bins[0]:\n", " c_len += len(huffman_encoding_list[1][str(int(error[i]))])\n", " \n", " elif diff[i] <= bins[1] and diff[i] > bins[0]:\n", " c_len += len(huffman_encoding_list[2][str(int(error[i]))])\n", " \n", " elif diff[i] <= bins[2] and diff[i] > bins[1]:\n", " c_len += len(huffman_encoding_list[3][str(int(error[i]))])\n", "\n", " else: \n", " c_len += len(huffman_encoding_list[4][str(int(error[i]))])\n", " \n", " return c_len/o_len" ] }, { "cell_type": "code", "execution_count": 9, "id": "3e0e9742", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11100100000\n", "499\n", "22275\n", "True\n", "5\n" ] } ], "source": [ "scenes = file_extractor()\n", "images = image_extractor(scenes)\n", "list_dic, image, new_error, diff, bound, predict, bins, A = huffman(images[0], 4, False)\n", "encoded_matrix = encoder(new_error, list_dic, diff, bound, bins)\n", "reconstruct_image = decoder(A, encoded_matrix, list_dic, bins, False)\n", "print(np.allclose(image, reconstruct_image))\n", "print(len(list_dic))" ] }, { "cell_type": "code", "execution_count": 10, "id": "004e8ba8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4232928466796875" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compress_rate(image, new_error, diff, bound, list_dic, bins)" ] }, { "cell_type": "code", "execution_count": 11, "id": "a282f9e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2621552\n", "2621552\n" ] } ], "source": [ "print(sys.getsizeof(encoded_matrix))\n", "print(sys.getsizeof(reconstruct_image))" ] }, { "cell_type": "code", "execution_count": null, "id": "7efe26b9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b46343b2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.10 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "vscode": { "interpreter": { "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" } } }, "nbformat": 4, "nbformat_minor": 5 }