WorkingPyDemo.py 23.3 KB
Newer Older
Bryce Hepner's avatar
Bryce Hepner committed
1 2
import numpy as np
from matplotlib import pyplot as plt
3
# from itertools import product
Bryce Hepner's avatar
Bryce Hepner committed
4
import os
5
# import sys
Bryce Hepner's avatar
Bryce Hepner committed
6 7
from PIL import Image
from scipy.optimize import minimize,linprog
8
# from sklearn.neighbors import KernelDensity
Bryce Hepner's avatar
Bryce Hepner committed
9 10
from collections import Counter
import numpy.linalg as la
Bryce Hepner's avatar
Bryce Hepner committed
11
from time import time
12
from time import sleep
13
# import tifffile as tiff
14

Bryce Hepner's avatar
Bryce Hepner committed
15 16 17
folder_name = "images"
outputlocation = ""

Bryce Hepner's avatar
Bryce Hepner committed
18
def file_extractor(dirname="images"):
19 20 21 22 23 24 25 26
    """Find all the files in the directory
    
    Parameters:
        dirname (str): the directory name
        
    Returns:
        files (list): a list of all the files in the directory
    """
Bryce Hepner's avatar
Bryce Hepner committed
27 28 29 30 31 32 33 34 35 36
    files = os.listdir(dirname)
    scenes = []
    for file in files:
        if file == '.DS_Store':
            continue
        else:
            scenes.append(os.path.join(dirname, file))
    return scenes

def image_extractor(scenes):
37 38 39 40 41 42 43
    """
    This function gives the list of all of the valid images
    Parameters:
        scenes (list): a list of all the files in the directory
    Returns:
        images (list): a list of all the images in the directory
    """
Bryce Hepner's avatar
Bryce Hepner committed
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    image_folder = []
    for scene in scenes:
        files = os.listdir(scene)
        for file in files:
            if file[-5:] != ".tiff" or file[-7:] == "_6.tiff":
                continue
            else:
                image_folder.append(os.path.join(scene, file))
    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor


def predict_pix(tiff_image_path, difference = True):
    """
    This function predict the pixel values excluding the boundary.
    Using the 4 neighbor pixel values and MSE to predict the next pixel value
    (-1,1) (0,1) (1,1)  => relative position of the 4 other given values
    (-1,0) (0,0)        => (0,0) is the one we want to predict
    take the derivative of mean square error to solve for the system of equation 
    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])
    A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
    and the predicted pixel value is c.
    
    Input:
    tiff_image_path (string): path to the tiff file
68
    difference (bool): whether or not to use the alternate differencing system
Bryce Hepner's avatar
Bryce Hepner committed
69 70
    Return:
    image   ndarray(512 X 640): original image 
71

Bryce Hepner's avatar
Bryce Hepner committed
72 73 74 75 76
    diff.   ndarray(325380,): IF difference = TRUE, difference between the min and max of four neighbors exclude the boundary
                            ELSE: the residuals of the four nearest pixels to a fitted hyperplane
    error   ndarray(325380,): difference between the original image and predicted image
    """
    image_obj = Image.open(tiff_image_path)    #Open the image and read it as an Image object
Bryce Hepner's avatar
Bryce Hepner committed
77
    image_array = np.array(image_obj)[1:].astype(int)    #Convert to an array, leaving out the first row because the first row is just housekeeping data
Bryce Hepner's avatar
Bryce Hepner committed
78
    # image_array = image_array.astype(int) 
79 80
    # A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation
    Ainv = np.array([[0.5,-0.5,-0.5],[-0.5,1.83333333,1.5],[0.5,-1.5,-1.5]])
Bryce Hepner's avatar
Bryce Hepner committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94
    # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
    z0 = image_array[0:-2,0:-2]   # get all the first pixel for the entire image
    z1 = image_array[0:-2,1:-1]   # get all the second pixel for the entire image
    z2 = image_array[0:-2,2::]    # get all the third pixel for the entire image
    z3 = image_array[1:-1,0:-2]   # get all the forth pixel for the entire image
    
    # calculate the out put of the system of equation
    y0 = np.ravel(-z0+z2-z3)
    y1 = np.ravel(z0+z1+z2)
    y2 = np.ravel(-z0-z1-z2-z3)
    y = np.vstack((y0,y1,y2))
    
    # use numpy solver to solve the system of equations all at once
    #predict = np.floor(np.linalg.solve(A,y)[-1])
95 96
    # predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))
    predict = np.round(np.round((Ainv[-1]@y)),1)
Bryce Hepner's avatar
Bryce Hepner committed
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    #Matrix system of points that will be used to solve the least squares fitting hyperplane
    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])
    
    # flatten the neighbor pixlels and stack them together
    z0 = np.ravel(z0)
    z1 = np.ravel(z1)
    z2 = np.ravel(z2)
    z3 = np.ravel(z3)
    neighbor = np.vstack((z0,z1,z2,z3)).T
    
    if difference:
        # calculate the difference
        diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)
    
    else:
        #Compute the best fitting hyperplane using least squares
        #The res is the residuals of the four points used to fit the hyperplane (summed distance of each of the 
        #points to the hyperplane), it is a measure of gradient
        f, diff, rank, s = la.lstsq(points, neighbor.T, rcond=None)
Bryce Hepner's avatar
Bryce Hepner committed
116
        # diff = diff.astype(int)
117 118 119 120 121 122
        # Pinv = np.linalg.pinv(points)
        # b = [z0,z1,z2,z3]
        # x = Pinv@np.array(b)

        # diff = np.linalg.norm(b - points@x,ord=2)
        # diff = diff.astype(int)
Bryce Hepner's avatar
Bryce Hepner committed
123 124 125
    # calculate the error
    error = np.ravel(image_array[1:-1,1:-1])-predict
    
Bryce Hepner's avatar
Bryce Hepner committed
126
    return image_array, diff, error
Bryce Hepner's avatar
Bryce Hepner committed
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141

"""
this huffman encoding code is found online
https://favtutor.com/blogs/huffman-coding
"""

class NodeTree(object):
    def __init__(self, left=None, right=None):
        self.left = left
        self.right = right

    def children(self):
        return self.left, self.right

    def __str__(self):
142
        "Technically does not return a string, cannot be used with print"
Bryce Hepner's avatar
Bryce Hepner committed
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
        return self.left, self.right


def huffman_code_tree(node, binString=''):
    '''
    Function to find Huffman Code
    '''
    if type(node) is str:
        return {node: binString}
    (l, r) = node.children()
    d = dict()
    d.update(huffman_code_tree(l, binString + '0'))
    d.update(huffman_code_tree(r, binString + '1'))
    return d


def make_tree(nodes):
    '''
    Function to make tree
    :param nodes: Nodes
    :return: Root of the tree
    '''
    while len(nodes) > 1:
        (key1, c1) = nodes[-1]
        (key2, c2) = nodes[-2]
        nodes = nodes[:-2]
        node = NodeTree(key1, key2)
        nodes.append((node, c1 + c2))
        #reverse True, decending order
        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)
    return nodes[0][0]
def decode_string(huffman_string, the_keys, the_values):
    for i in range(len(huffman_string)):
        try:
            return (int(the_keys[the_values.index(huffman_string[:i+1])]),huffman_string[i+1:])
        except:
            pass
Bryce Hepner's avatar
Bryce Hepner committed
180 181 182 183 184 185 186 187 188

def make_dictionary(tiff_image_path_list, num_bins=4, difference = True):
    """
    This function is used to encode the error based on the difference
    and split the difference into different bins
    
    Input:
    tiff_image_path     (string): path to the tiff file
    num_bins            (int): number of bins
189 190
    difference (bool): whether or not to use the alternate differencing system

Bryce Hepner's avatar
Bryce Hepner committed
191
    Return:
192
    huffman_encoding_list  list    (num_bins + 1): a list of dictionary, each dictionary is a huffman encoding
Bryce Hepner's avatar
Bryce Hepner committed
193 194 195 196 197 198 199 200 201
    bins                   list    (num_bins - 1,): a list of threshold to cut the bins
    """
    list_of_all_vals = []
    huffman_encoding_list = []
    for _ in range(num_bins+1):
        list_of_all_vals.append([])
    for _, tiff_image_path in enumerate(tiff_image_path_list):
        # get the image_array, etc
        image_array, diff, error= predict_pix(tiff_image_path, difference)
202 203 204 205 206
        # plt.hist(np.ravel(image_array), bins=30)
        # plt.xlabel('Pixel Value')
        # plt.ylabel('Frequency')
        # plt.title('Histogram of Pixel Values')
        # plt.show()
207
        
208
        bins = [21,32,48]
Bryce Hepner's avatar
Bryce Hepner committed
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
        # get the boundary 
        boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))
        
        # take the difference of the boundary with the very first pixel
        boundary = boundary - image_array[0,0]
        
        #boundary is 1dim, so boundary[0] is just the first element
        boundary[0] = image_array[0,0]
        
        # huffman encode the boundary
        for j in boundary:
            list_of_all_vals[0].append(str(j))
        
        # create a list of huffman table
        n = len(bins)
        
        # loop through different bins
        for k in range (0,n):
            # the first bin
            if k == 0 :
                # get the point within the bin and huffman huffman_encoding_dict
                mask = diff <= bins[k]
                for j in error[mask].astype(int):
                    list_of_all_vals[k+1].append(str(j))

                
            # the middle bins
            else:
                # get the point within the bin and huffman huffman_encoding_dict
                mask = diff > bins[k-1]
                new_error = error[mask]
                mask2 = diff[mask] <= bins[k]
                for j in new_error[mask2].astype(int):
                    list_of_all_vals[k+1].append(str(j))

                
        # the last bin       
        # get the point within the bin and huffman huffman_encoding_dict
        mask = diff > bins[-1]
        for j in error[mask].astype(int):
            list_of_all_vals[-1].append(str(j))
250 251 252 253 254
    # plt.hist([int(i) for i in list_of_all_vals[2]], bins=30)
    # plt.xlabel("Error")
    # plt.ylabel("Frequency")
    # plt.title("Histogram of Errors")
    # plt.show()
Bryce Hepner's avatar
Bryce Hepner committed
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    for item in list_of_all_vals:
        freq = dict(Counter(item))
        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
        node = make_tree(freq)
        huffman_encoding_list.append(huffman_code_tree(node))
        # create a error matrix that includes the boundary (used in encoding matrix)
    new_error = np.copy(image_array)
    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))
    keep = new_error[0,0]
    new_error[0,:] = new_error[0,:] - keep
    new_error[-1,:] = new_error[-1,:] - keep
    new_error[1:-1,0] = new_error[1:-1,0] - keep
    new_error[1:-1,-1] = new_error[1:-1,-1] - keep
    new_error[0,0] = keep
        # huffman_encoding_list = list(set(huffman_encoding_list))
    diff = np.reshape(diff,(510,638))
        # return the huffman dictionary
    return huffman_encoding_list,bins

def huffman(tiff_image_path, num_bins=4, difference = True):
    """
    This function is used to encode the error based on the difference
    and split the difference into different bins
    
    Input:
    tiff_image_path     (string): path to the tiff file
    num_bins            (int): number of bins
282 283
    difference (bool): whether or not to use the alternate differencing system

Bryce Hepner's avatar
Bryce Hepner committed
284 285 286 287
    Return:
    image_as_array         ndarray (512, 640): original image
    new_error              ndarray (512, 640): error that includes the boundary
    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors
288

Bryce Hepner's avatar
Bryce Hepner committed
289 290 291 292
    """
    # get the image_as_array, etc
    image_as_array, diff, error= predict_pix(tiff_image_path, difference)
    
293
    bins = [21,32,48]
Bryce Hepner's avatar
Bryce Hepner committed
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
    # get the boundary 
    boundary = np.hstack((image_as_array[0,:],image_as_array[-1,:],image_as_array[1:-1,0],image_as_array[1:-1,-1]))
    
    # take the difference of the boundary with the very first pixel
    boundary = boundary - image_as_array[0,0]
    
    #boundary is 1dim, so boundary[0] is just the first element
    boundary[0] = image_as_array[0,0]
    
    # huffman encode the boundary
    bound_vals_as_string = [str(i) for i in boundary]
    freq = dict(Counter(bound_vals_as_string))
    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    node = make_tree(freq)
    huffman_encoding_dict = huffman_code_tree(node)
    
    # create a list of huffman table
    huffman_encoding_list = [huffman_encoding_dict]
    n = len(bins)
    
    # loop through different bins
    for i in range (0,n):
        # the first bin
        if i == 0 :
            # get the point within the bin and huffman huffman_encoding_dict
            mask = diff <= bins[i]
            line_as_string = [str(i) for i in error[mask].astype(int)]
            freq = dict(Counter(line_as_string))
            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
            node = make_tree(freq)
            huffman_encoding_dict = huffman_code_tree(node)
            huffman_encoding_list.append(huffman_encoding_dict)
            
        # the middle bins
        else:
            # get the point within the bin and huffman huffman_encoding_dict
            mask = diff > bins[i-1]
            new_error = error[mask]
            mask2 = diff[mask] <= bins[i]
            line_as_string = [str(i) for i in new_error[mask2].astype(int)]
            freq = dict(Counter(line_as_string))
            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
            node = make_tree(freq)
            huffman_encoding_dict = huffman_code_tree(node)
            huffman_encoding_list.append(huffman_encoding_dict)
            
    # the last bin       
    # get the point within the bin and huffman huffman_encoding_dict
    mask = diff > bins[-1]
    line_as_string = [str(i) for i in error[mask].astype(int)]
    freq = dict(Counter(line_as_string))
    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    node = make_tree(freq)
    huffman_encoding_dict = huffman_code_tree(node)
    huffman_encoding_list.append(huffman_encoding_dict)

    # create a error matrix that includes the boundary (used in encoding matrix)
    new_error = np.copy(image_as_array)
    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))
    keep = new_error[0,0]
    new_error[0,:] = new_error[0,:] - keep
    new_error[-1,:] = new_error[-1,:] - keep
    new_error[1:-1,0] = new_error[1:-1,0] - keep
    new_error[1:-1,-1] = new_error[1:-1,-1] - keep
    new_error[0,0] = keep
    # huffman_encoding_list = list(set(huffman_encoding_list))
    diff = np.reshape(diff,(510,638))
    # return the huffman dictionary

    return image_as_array, new_error, diff
 
def encoder(error, list_dic, diff, bins):
    """
    This function encode the matrix with huffman coding tables
    
    Input:
    error     (512, 640): a matrix with all the errors
    list_dic  (num_dic + 1,): a list of huffman coding table 
Bryce Hepner's avatar
Bryce Hepner committed
372
    diff     (510, 638): a matrix with all the differences
Bryce Hepner's avatar
Bryce Hepner committed
373 374 375
    bins       (num_bins - 1,): a list of threshold to cut the bins
    
    Return:
376
    returnable_encode   (512, 640): encoded matrix
Bryce Hepner's avatar
Bryce Hepner committed
377
    """
378

Bryce Hepner's avatar
Bryce Hepner committed
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    returnable_encode = ""
    # copy the error matrix (including the boundary)
    encoded = np.copy(error).astype(int).astype(str).astype(object)
    #diff = np.reshape(diff,(510,638))
    # loop through all the pixel to encode
    for i in range(encoded.shape[0]):
        for j in range(encoded.shape[1]):
            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:
                returnable_encode += list_dic[0][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[0]:
                returnable_encode += list_dic[1][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:
                returnable_encode +=list_dic[2][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:
                returnable_encode +=list_dic[3][encoded[i][j]]
            else:
                returnable_encode += list_dic[4][encoded[i][j]]
    return returnable_encode


def decoder(encoded_string, list_dic, bins, use_diff):
    """
    This function decodes the encoded_matrix.
    Input:
403
    encoded_string  (string): This is the input, the available information in 1s, 0s
Bryce Hepner's avatar
Bryce Hepner committed
404 405
    list_dic        (num_dic + 1,): a list of huffman coding table 
    bins            (num_bins - 1,): a list of threshold to cut the bins
406
    use_diff (bool): whether or not to use the alternate differencing system
Bryce Hepner's avatar
Bryce Hepner committed
407 408 409 410 411
    
    Return:
    decode_matrix   (512, 640): decoded matrix
    """
    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation
412
    Ainv = np.array([[0.5,-0.5,-0.5],[-0.5,1.83333333,1.5],[0.5,-1.5,-1.5]])
Bryce Hepner's avatar
Bryce Hepner committed
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
    # change the dictionary back to list
    # !!!!!WARNING!!!! has to change this part, everytime you change the number of bins
    the_keys0 = list(list_dic[0].keys())
    the_values0 = list(list_dic[0].values())
    
    the_keys1 = list(list_dic[1].keys())
    the_values1 = list(list_dic[1].values())
    
    the_keys2 = list(list_dic[2].keys())
    the_values2 = list(list_dic[2].values())
    
    the_keys3 = list(list_dic[3].keys())
    the_values3 = list(list_dic[3].values())
    
    the_keys4 = list(list_dic[4].keys())
    the_values4 = list(list_dic[4].values())
    
    #Matrix system of points that will be used to solve the least squares fitting hyperplane
    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])
432
    # Pinv = np.linalg.pinv(points)
Bryce Hepner's avatar
Bryce Hepner committed
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
    decode_matrix = np.zeros((512,640))
    # loop through all the element in the matrix
    for i in range(decode_matrix.shape[0]):
        for j in range(decode_matrix.shape[1]):
            # if it's the very first pixel on the image
            if i == 0 and j == 0:
                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)
                decode_matrix[i][j] = colorvalue
                
            # if it's on the boundary (any of the 4 edges)
            elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:
                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)
                decode_matrix[i][j] = colorvalue + decode_matrix[0][0]
            # if not the boundary
            else:
                # predict the image with the known pixel value
                z0 = decode_matrix[i-1][j-1]
                z1 = decode_matrix[i-1][j]
                z2 = decode_matrix[i-1][j+1]
                z3 = decode_matrix[i][j-1]
                y0 = int(-z0+z2-z3)
                y1 = int(z0+z1+z2)
                y2 = int(-z0-z1-z2-z3)
                y = np.vstack((y0,y1,y2))
                if use_diff:
                    difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)
                else:
460 461 462
                    # b = [z0,z1,z2,z3]
                    # x = Pinv@np.array(b)
                    # difference = np.linalg.norm(b - points@x,ord=2)
Bryce Hepner's avatar
Bryce Hepner committed
463
                    f, difference, rank, s = la.lstsq(points, [z0,z1,z2,z3], rcond=None) 
Bryce Hepner's avatar
Bryce Hepner committed
464
                    # difference = difference.astype(int)
Bryce Hepner's avatar
Bryce Hepner committed
465
                    
466 467
                # predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))
                predict = np.round(np.round((Ainv[-1]@y)[0],1))
Bryce Hepner's avatar
Bryce Hepner committed
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
                # add on the difference by searching the dictionary
                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins
                if difference <= bins[0]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys1, the_values=the_values1)
                    decode_matrix[i][j] = colorvalue + int(predict)
                elif difference <= bins[1] and difference > bins[0]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys2, the_values=the_values2)
                    decode_matrix[i][j] = colorvalue + int(predict)
                elif difference <= bins[2] and difference > bins[1]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys3, the_values=the_values3)
                    decode_matrix[i][j] = colorvalue + int(predict)
                else:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys4, the_values=the_values4)
                    decode_matrix[i][j] = colorvalue + int(predict)

    return decode_matrix.astype(int)

def read_from_file(filename):
486 487 488 489 490 491 492
    '''
    This function reads the file and return the content in a list
    Input:
        filename (string): the name of the file
    Return:
        content (list): the content of the file
    '''
Bryce Hepner's avatar
Bryce Hepner committed
493 494 495 496
    with open(filename, 'rb') as file:
        return file.read()

def bitstring_to_bytes(input_string):
497 498 499 500 501 502 503
    '''
    This function converts the bitstring to bytes
    Input:
        input_string (string): the bitstring
    Return:
        output_string (string): the bytes
    '''
Bryce Hepner's avatar
Bryce Hepner committed
504 505 506 507 508 509 510 511 512 513 514 515 516 517
    int_array = []
    length_of_string = len(input_string)
    while length_of_string >= 8:
        int_array.append(int(input_string[:8],2))
        input_string = input_string[8:]
        length_of_string = len(input_string)
    if length_of_string > 0:
        zerobuffer = ""
        for _ in range(8-length_of_string):
            zerobuffer += "0"
        int_array.append(int(input_string+zerobuffer,2))
    return bytes(int_array)

def bytes_to_bitstring(input_bytearray):
518 519 520 521 522 523 524
    '''
    This function converts the bytes to bitstring
    Input:
        input_bytearray (bytearray): the bytes
    Return:
        output_string (string): the bitstring
    '''
Bryce Hepner's avatar
Bryce Hepner committed
525 526 527 528 529
    end_string = ""
    int_array = [i for i in input_bytearray]
    for i, item in enumerate(int_array):
        end_string += (bin(item)[2:].zfill(8))
    return end_string
Bryce Hepner's avatar
Bryce Hepner committed
530
def text_to_tiff(filename, list_dic, bins):
531 532 533 534 535 536 537 538 539
    '''
    This function converts and saves the text to tiff
    Input:
        filename (string): the name of the file
        list_dic (list): the list of dictionary
        bins (list): the list of bins
    Return:
        None
    '''
Bryce Hepner's avatar
Bryce Hepner committed
540 541 542 543 544
    encoded_string = bytes_to_bitstring(read_from_file(filename))
    reconstruct_image = decoder(encoded_string, list_dic, bins, False)
    reconstruct_image = reconstruct_image.astype(np.uint16)
    reconstruct_image = Image.fromarray(reconstruct_image)
    reconstruct_image.save(filename[:-16]+"_reconstructed.tiff", "TIFF")
545

Bryce Hepner's avatar
Bryce Hepner committed
546 547
if __name__ == "__main__":
    
548
    scenes = file_extractor("images")
Bryce Hepner's avatar
Bryce Hepner committed
549 550
    images = image_extractor(scenes)
    newnamesforlater = []
551
    list_dic, bins = make_dictionary(images, 4, False)
552 553
    file_sizes_new = []
    file_sizes_old = []
554
    # list_dic = np.load("first_dict.npy", allow_pickle="True")
555
    bins = [21,32,48]
556
    #otherbins = [21,32,48]
Bryce Hepner's avatar
Bryce Hepner committed
557
    np.save("first_dict.npy", list_dic)
558
    for i in range(3):
559

560
        image, new_error, diff = huffman(images[i], 4, False)
561 562
        encoded_string = encoder(new_error, list_dic, diff, bins)
        inletters = bitstring_to_bytes(encoded_string)
Bryce Hepner's avatar
Bryce Hepner committed
563

564 565 566 567
        if images[i][-5:] == ".tiff":
            newname = images[i][:-5]
        else:
            newname = images[i][:-4]
568

569
        newnamesforlater.append(newname + "_Compressed.txt")
570 571
        with open(newname + "_Compressed.txt", 'wb') as f:
            f.write(inletters)
Bryce Hepner's avatar
Bryce Hepner committed
572 573
        file_sizes_new.append((os.path.getsize(newname + "_Compressed.txt")))
        file_sizes_old.append((os.path.getsize(images[i])))
574

575 576
    print(file_sizes_new)
    print(file_sizes_old)
Bryce Hepner's avatar
Bryce Hepner committed
577
    print(np.sum(file_sizes_new)/np.sum(file_sizes_old))
578
    file_sizes_new.append(os.path.getsize("first_dict.npy"))
Bryce Hepner's avatar
Bryce Hepner committed
579
    print(np.sum(file_sizes_new)/np.sum(file_sizes_old))
580
    # list_dic = np.load("first_dict.npy", allow_pickle="TRUE")
581 582 583 584 585 586 587 588 589 590
    bins = [21,32,48]

    for i,item in enumerate(newnamesforlater):
        # print(item)
        image, new_error, diff = huffman(images[i], 4, False)
        encoded_string2 = bytes_to_bitstring(read_from_file(item))
        starttime = time()
        reconstruct_image = decoder(encoded_string2, list_dic, bins, False)
        print(np.allclose(image, reconstruct_image))
        # print(time() - starttime)
Bryce Hepner's avatar
Bryce Hepner committed
591 592 593 594 595
    # text_to_tiff("images/1626033496_437803/1626033496_437803_3._Compressed.txt", list_dic, bins)
    # original_image = Image.open("images/1626033496_437803/1626033496_437803_3.tiff")
    # original_image = np.array(original_image)[1:]
    # secondimage = Image.open("images/1626033496_437803/1626033496_437803_3_reconstructed.tiff")
    # secondimage = np.array(secondimage)
596 597
    # print(np.allclose(original_image, secondimage))