midday_check

96e7a95d · Bryce Hepner · c6dfe9f2 · 96e7a95d · 96e7a95d · 96e7a95d
Commit 96e7a95d authored Jun 08, 2022 by Bryce Hepner
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 894 additions and 23 deletions

NoBinsDemo.ipynb NoBinsDemo.ipynb +625 -0

SameTableEncoder.ipynb SameTableEncoder.ipynb +94 -23

WorkingPyDemo.py WorkingPyDemo.py +175 -0

No files found.
--- a/NoBinsDemo.ipynb
+++ b/NoBinsDemo.ipynb
--- a/SameTableEncoder.ipynb
+++ b/SameTableEncoder.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -20,7 +20,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -72,7 +72,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -147,7 +147,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -194,8 +194,6 @@
    "        node = NodeTree(key1, key2)\n",
    "        nodes.append((node, c1 + c2))\n",
    "        #reverse True, decending order\n",
-    "\n",
-    "        #There is a huge memory leak here, no idea how or why\n",
    "        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)\n",
    "    return nodes[0][0]\n",
    "def decode_string(huffman_string, the_keys, the_values):\n",
@@ -208,7 +206,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -245,7 +243,8 @@
    "\n",
    "        # sort the difference and create the bins\n",
    "        sorted_diff = np.sort(diff.copy())\n",
-    "        bins = [12,60,180]\n",
+    "        # bins = [12,60,180]\n",
+    "        bins = [21,31,48]\n",
    "        # get the boundary \n",
    "        boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
    "        \n",
@@ -309,7 +308,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -341,7 +340,8 @@
    "    # sort the difference and create the bins\n",
    "    sorted_diff = np.sort(diff.copy())\n",
    "    # bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
-    "    bins = [12,60,180]\n",
+    "    # bins = [12,60,180]\n",
+    "    bins = [21,31,48]\n",
    "    # get the boundary \n",
    "    boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
    "    \n",
@@ -417,7 +417,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -456,7 +456,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -486,7 +486,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -576,7 +576,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -587,7 +587,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -597,7 +597,7 @@
    "oglist_dic, ogbins = make_dictionary(images[:10], 4, False)\n",
    "file_size_ratios = []\n",
    "np.save(\"first_dic.npy\", oglist_dic)\n",
-    "for i in range(10):\n",
+    "for i in range(10,30):\n",
    "    list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n",
    "    encoded_string1 = encoder(new_error, oglist_dic, diff, bound, ogbins)\n",
    "    # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
@@ -615,14 +615,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.3953878365239038\n"
+      "0.3955133696593259\n"
     ]
    }
   ],
@@ -635,7 +635,9 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "0.39535481750525336"
+   ]
  },
  {
   "cell_type": "markdown",
@@ -663,9 +665,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 127,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
   "source": [
    "def bytes_to_bitstring(input_bytearray):\n",
    "    end_string = \"\"\n",
@@ -676,14 +688,73 @@
    "oglist_dic = np.load(\"first_dic.npy\", allow_pickle=\"TRUE\")\n",
    "\n",
    "\n",
-    "ogbins = [12,60,180]\n",
+    "# ogbins = [12,60,180]\n",
-    "for i,item in enumerate(newnamesforlater[0:10]):\n",
+    "ogbins = [21,31,48]\n",
+    "for i,item in enumerate(newnamesforlater[0:3]):\n",
    "    list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 4, False)\n",
    "    encoded_string2 = bytes_to_bitstring(read_from_file(item))\n",
    "    reconstruct_image = decoder(encoded_string2, oglist_dic, ogbins, False)\n",
    "    print(np.allclose(image, reconstruct_image))"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def check_bin_size(tiff_image_path_list, num_bins=4, difference = True):\n",
+    "    \"\"\"\n",
+    "    This function is used to encode the error based on the difference\n",
+    "    and split the difference into different bins\n",
+    "    \n",
+    "    Input:\n",
+    "    tiff_image_path     (string): path to the tiff file\n",
+    "    num_bins            (int): number of bins\n",
+    "    \n",
+    "    Return:\n",
+    "    huffman_encoding_list  list    (num_bins + 1): a list of dictionary\n",
+    "    image_array            ndarray (512, 640): original image\n",
+    "    new_error              ndarray (512, 640): error that includes the boundary\n",
+    "    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors\n",
+    "    boundary               ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
+    "    predict                ndarray (325380,): the list of predicted values\n",
+    "    bins                   list    (num_bins - 1,): a list of threshold to cut the bins\n",
+    "    A                      ndarray (3 X 3): system of equation\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    all_bins = []\n",
+    "    for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
+    "        # get the image_array, etc\n",
+    "        image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
+    "        \n",
+    "        # calculate the number of points that will go in each bin\n",
+    "        data_points_per_bin = diff.size // num_bins\n",
+    "\n",
+    "        # sort the difference and create the bins\n",
+    "        sorted_diff = np.sort(diff.copy())\n",
+    "        bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
+    "        all_bins.append(bins)\n",
+    "    return np.mean(all_bins,axis = 0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[21.00404858 31.92712551 48.06477733]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(check_bin_size(images))"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,

--- a/WorkingPyDemo.py
+++ b/WorkingPyDemo.py
+import numpy as np
+from matplotlib import pyplot as plt
+from itertools import product
+import os
+import sys
+from PIL import Image
+from scipy.optimize import minimize,linprog
+from sklearn.neighbors import KernelDensity
+from collections import Counter
+import numpy.linalg as la
+def file_extractor(dirname="images"):
+    files = os.listdir(dirname)
+    scenes = []
+    for file in files:
+        if file == '.DS_Store':
+            continue
+        else:
+            scenes.append(os.path.join(dirname, file))
+    return scenes
+def image_extractor(scenes):
+    image_folder = []
+    for scene in scenes:
+        files = os.listdir(scene)
+        for file in files:
+            if file[-5:] != ".tiff" or file[-7:] == "_6.tiff":
+                continue
+            else:
+                image_folder.append(os.path.join(scene, file))
+    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor
+def im_distribution(images, num):
+    """
+    Function that extracts tiff files from specific cameras and returns a list of all
+    the tiff files corresponding to that camera. i.e. all pictures labeled "_7.tiff" or otherwise
+    specified camera numbers.
+    Parameters:
+        images (list): list of all tiff files, regardless of classification. This is NOT a list of directories but
+        of specific tiff files that can be opened right away. This is the list that we iterate through and 
+        divide.
+        num (str): a string designation for the camera number that we want to extract i.e. "14" for double digits
+        of "_1" for single digits.
+    Returns:
+        tiff (list): A list of tiff files that have the specified designation from num. They are the files extracted
+        from the 'images' list that correspond to the given num.
+    """
+    tiff = []
+    for im in images:
+        if im[-7:-5] == num:
+            tiff.append(im)
+    return tiff
+def predict_pix(tiff_image_path, difference = True):
+    """
+    This function predict the pixel values excluding the boundary.
+    Using the 4 neighbor pixel values and MSE to predict the next pixel value
+    (-1,1) (0,1) (1,1)  => relative position of the 4 other given values
+    (-1,0) (0,0)        => (0,0) is the one we want to predict
+    take the derivative of mean square error to solve for the system of equation 
+    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])
+    A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
+    and the predicted pixel value is c.
+    Input:
+    tiff_image_path (string): path to the tiff file
+    Return:
+    image   ndarray(512 X 640): original image 
+    predict ndarray(325380,): predicted image excluding the boundary
+    diff.   ndarray(325380,): IF difference = TRUE, difference between the min and max of four neighbors exclude the boundary
+                            ELSE: the residuals of the four nearest pixels to a fitted hyperplane
+    error   ndarray(325380,): difference between the original image and predicted image
+    A       ndarray(3 X 3): system of equation
+    """
+    image_obj = Image.open(tiff_image_path)    #Open the image and read it as an Image object
+    image_array = np.array(image_obj)[1:,:].astype(int)    #Convert to an array, leaving out the first row because the first row is just housekeeping data
+    # image_array = image_array.astype(int) 
+    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation
+    # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
+    z0 = image_array[0:-2,0:-2]   # get all the first pixel for the entire image
+    z1 = image_array[0:-2,1:-1]   # get all the second pixel for the entire image
+    z2 = image_array[0:-2,2::]    # get all the third pixel for the entire image
+    z3 = image_array[1:-1,0:-2]   # get all the forth pixel for the entire image
+    # calculate the out put of the system of equation
+    y0 = np.ravel(-z0+z2-z3)
+    y1 = np.ravel(z0+z1+z2)
+    y2 = np.ravel(-z0-z1-z2-z3)
+    y = np.vstack((y0,y1,y2))
+    # use numpy solver to solve the system of equations all at once
+    #predict = np.floor(np.linalg.solve(A,y)[-1])
+    predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))
+    #Matrix system of points that will be used to solve the least squares fitting hyperplane
+    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])
+    # flatten the neighbor pixlels and stack them together
+    z0 = np.ravel(z0)
+    z1 = np.ravel(z1)
+    z2 = np.ravel(z2)
+    z3 = np.ravel(z3)
+    neighbor = np.vstack((z0,z1,z2,z3)).T
+    if difference:
+        # calculate the difference
+        diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)
+    else:
+        #Compute the best fitting hyperplane using least squares
+        #The res is the residuals of the four points used to fit the hyperplane (summed distance of each of the 
+        #points to the hyperplane), it is a measure of gradient
+        f, diff, rank, s = la.lstsq(points, neighbor.T, rcond=None)
+        diff = diff.astype(int)
+    # calculate the error
+    error = np.ravel(image_array[1:-1,1:-1])-predict
+    return image_array, predict, diff, error
+"""
+this huffman encoding code is found online
+https://favtutor.com/blogs/huffman-coding
+"""
+class NodeTree(object):
+    def __init__(self, left=None, right=None):
+        self.left = left
+        self.right = right
+    def children(self):
+        return self.left, self.right
+    def __str__(self):
+        return self.left, self.right
+def huffman_code_tree(node, binString=''):
+    '''
+    Function to find Huffman Code
+    '''
+    if type(node) is str:
+        return {node: binString}
+    (l, r) = node.children()
+    d = dict()
+    d.update(huffman_code_tree(l, binString + '0'))
+    d.update(huffman_code_tree(r, binString + '1'))
+    return d
+def make_tree(nodes):
+    '''
+    Function to make tree
+    :param nodes: Nodes
+    :return: Root of the tree
+    '''
+    while len(nodes) > 1:
+        (key1, c1) = nodes[-1]
+        (key2, c2) = nodes[-2]
+        nodes = nodes[:-2]
+        node = NodeTree(key1, key2)
+        nodes.append((node, c1 + c2))
+        #reverse True, decending order
+        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)
+    return nodes[0][0]
+def decode_string(huffman_string, the_keys, the_values):
+    for i in range(len(huffman_string)):
+        try:
+            return (int(the_keys[the_values.index(huffman_string[:i+1])]),huffman_string[i+1:])
+        except:
+            pass