WorkingPyDemo.py 23.3 KB
Newer Older
Bryce Hepner's avatar
Bryce Hepner committed
1 2
import numpy as np
from matplotlib import pyplot as plt
3
# from itertools import product
Bryce Hepner's avatar
Bryce Hepner committed
4
import os
5
# import sys
Bryce Hepner's avatar
Bryce Hepner committed
6 7
from PIL import Image
from scipy.optimize import minimize,linprog
8
# from sklearn.neighbors import KernelDensity
Bryce Hepner's avatar
Bryce Hepner committed
9 10
from collections import Counter
import numpy.linalg as la
Bryce Hepner's avatar
Bryce Hepner committed
11
from time import time
12
from time import sleep
13
# import tifffile as tiff
14

Bryce Hepner's avatar
Bryce Hepner committed
15 16 17
folder_name = "images"
outputlocation = ""

Bryce Hepner's avatar
Bryce Hepner committed
18
def file_extractor(dirname="images"):
19 20 21 22 23 24 25 26
    """Find all the files in the directory
    
    Parameters:
        dirname (str): the directory name
        
    Returns:
        files (list): a list of all the files in the directory
    """
Bryce Hepner's avatar
Bryce Hepner committed
27 28 29 30 31 32 33 34 35 36
    files = os.listdir(dirname)
    scenes = []
    for file in files:
        if file == '.DS_Store':
            continue
        else:
            scenes.append(os.path.join(dirname, file))
    return scenes

def image_extractor(scenes):
37 38 39 40 41 42 43
    """
    This function gives the list of all of the valid images
    Parameters:
        scenes (list): a list of all the files in the directory
    Returns:
        images (list): a list of all the images in the directory
    """
Bryce Hepner's avatar
Bryce Hepner committed
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    image_folder = []
    for scene in scenes:
        files = os.listdir(scene)
        for file in files:
            if file[-5:] != ".tiff" or file[-7:] == "_6.tiff":
                continue
            else:
                image_folder.append(os.path.join(scene, file))
    return image_folder #returns a list of file paths to .tiff files in the specified directory given in file_extractor


def predict_pix(tiff_image_path, difference = True):
    """
    This function predict the pixel values excluding the boundary.
    Using the 4 neighbor pixel values and MSE to predict the next pixel value
    (-1,1) (0,1) (1,1)  => relative position of the 4 other given values
    (-1,0) (0,0)        => (0,0) is the one we want to predict
    take the derivative of mean square error to solve for the system of equation 
    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]])
    A @ [a, b, c] = [-z0+z2-z3, z0+z1+z2, -z0-z1-z2-z3] where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
    and the predicted pixel value is c.
    
    Input:
    tiff_image_path (string): path to the tiff file
68
    difference (bool): whether or not to use the alternate differencing system
Bryce Hepner's avatar
Bryce Hepner committed
69 70
    Return:
    image   ndarray(512 X 640): original image 
71

Bryce Hepner's avatar
Bryce Hepner committed
72 73 74 75 76
    diff.   ndarray(325380,): IF difference = TRUE, difference between the min and max of four neighbors exclude the boundary
                            ELSE: the residuals of the four nearest pixels to a fitted hyperplane
    error   ndarray(325380,): difference between the original image and predicted image
    """
    image_obj = Image.open(tiff_image_path)    #Open the image and read it as an Image object
Bryce Hepner's avatar
Bryce Hepner committed
77
    image_array = np.array(image_obj)[1:].astype(int)    #Convert to an array, leaving out the first row because the first row is just housekeeping data
Bryce Hepner's avatar
Bryce Hepner committed
78
    # image_array = image_array.astype(int) 
79 80
    # A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation
    Ainv = np.array([[0.5,-0.5,-0.5],[-0.5,1.83333333,1.5],[0.5,-1.5,-1.5]])
Bryce Hepner's avatar
Bryce Hepner committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94
    # where z0 = (-1,1), z1 = (0,1), z2 = (1,1), z3 = (-1,0)
    z0 = image_array[0:-2,0:-2]   # get all the first pixel for the entire image
    z1 = image_array[0:-2,1:-1]   # get all the second pixel for the entire image
    z2 = image_array[0:-2,2::]    # get all the third pixel for the entire image
    z3 = image_array[1:-1,0:-2]   # get all the forth pixel for the entire image
    
    # calculate the out put of the system of equation
    y0 = np.ravel(-z0+z2-z3)
    y1 = np.ravel(z0+z1+z2)
    y2 = np.ravel(-z0-z1-z2-z3)
    y = np.vstack((y0,y1,y2))
    
    # use numpy solver to solve the system of equations all at once
    #predict = np.floor(np.linalg.solve(A,y)[-1])
95 96
    # predict = np.round(np.round((np.linalg.solve(A,y)[-1]),1))
    predict = np.round(np.round((Ainv[-1]@y)),1)
Bryce Hepner's avatar
Bryce Hepner committed
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    #Matrix system of points that will be used to solve the least squares fitting hyperplane
    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])
    
    # flatten the neighbor pixlels and stack them together
    z0 = np.ravel(z0)
    z1 = np.ravel(z1)
    z2 = np.ravel(z2)
    z3 = np.ravel(z3)
    neighbor = np.vstack((z0,z1,z2,z3)).T
    
    if difference:
        # calculate the difference
        diff = np.max(neighbor,axis = 1) - np.min(neighbor, axis=1)
    
    else:
        #Compute the best fitting hyperplane using least squares
        #The res is the residuals of the four points used to fit the hyperplane (summed distance of each of the 
        #points to the hyperplane), it is a measure of gradient
        f, diff, rank, s = la.lstsq(points, neighbor.T, rcond=None)
        diff = diff.astype(int)
117 118 119 120 121 122
        # Pinv = np.linalg.pinv(points)
        # b = [z0,z1,z2,z3]
        # x = Pinv@np.array(b)

        # diff = np.linalg.norm(b - points@x,ord=2)
        # diff = diff.astype(int)
Bryce Hepner's avatar
Bryce Hepner committed
123 124 125
    # calculate the error
    error = np.ravel(image_array[1:-1,1:-1])-predict
    
Bryce Hepner's avatar
Bryce Hepner committed
126
    return image_array, diff, error
Bryce Hepner's avatar
Bryce Hepner committed
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141

"""
this huffman encoding code is found online
https://favtutor.com/blogs/huffman-coding
"""

class NodeTree(object):
    def __init__(self, left=None, right=None):
        self.left = left
        self.right = right

    def children(self):
        return self.left, self.right

    def __str__(self):
142
        "Technically does not return a string, cannot be used with print"
Bryce Hepner's avatar
Bryce Hepner committed
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
        return self.left, self.right


def huffman_code_tree(node, binString=''):
    '''
    Function to find Huffman Code
    '''
    if type(node) is str:
        return {node: binString}
    (l, r) = node.children()
    d = dict()
    d.update(huffman_code_tree(l, binString + '0'))
    d.update(huffman_code_tree(r, binString + '1'))
    return d


def make_tree(nodes):
    '''
    Function to make tree
    :param nodes: Nodes
    :return: Root of the tree
    '''
    while len(nodes) > 1:
        (key1, c1) = nodes[-1]
        (key2, c2) = nodes[-2]
        nodes = nodes[:-2]
        node = NodeTree(key1, key2)
        nodes.append((node, c1 + c2))
        #reverse True, decending order
        nodes = sorted(nodes, key=lambda x: x[1], reverse=True)
    return nodes[0][0]
def decode_string(huffman_string, the_keys, the_values):
    for i in range(len(huffman_string)):
        try:
            return (int(the_keys[the_values.index(huffman_string[:i+1])]),huffman_string[i+1:])
        except:
            pass
Bryce Hepner's avatar
Bryce Hepner committed
180 181 182 183 184 185 186 187 188

def make_dictionary(tiff_image_path_list, num_bins=4, difference = True):
    """
    This function is used to encode the error based on the difference
    and split the difference into different bins
    
    Input:
    tiff_image_path     (string): path to the tiff file
    num_bins            (int): number of bins
189 190
    difference (bool): whether or not to use the alternate differencing system

Bryce Hepner's avatar
Bryce Hepner committed
191
    Return:
192
    huffman_encoding_list  list    (num_bins + 1): a list of dictionary, each dictionary is a huffman encoding
Bryce Hepner's avatar
Bryce Hepner committed
193 194 195 196 197 198 199 200 201
    bins                   list    (num_bins - 1,): a list of threshold to cut the bins
    """
    list_of_all_vals = []
    huffman_encoding_list = []
    for _ in range(num_bins+1):
        list_of_all_vals.append([])
    for _, tiff_image_path in enumerate(tiff_image_path_list):
        # get the image_array, etc
        image_array, diff, error= predict_pix(tiff_image_path, difference)
202 203 204 205 206
        # plt.hist(np.ravel(image_array), bins=30)
        # plt.xlabel('Pixel Value')
        # plt.ylabel('Frequency')
        # plt.title('Histogram of Pixel Values')
        # plt.show()
Bryce Hepner's avatar
Bryce Hepner committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
        bins = [21,32,48]
        # get the boundary 
        boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))
        
        # take the difference of the boundary with the very first pixel
        boundary = boundary - image_array[0,0]
        
        #boundary is 1dim, so boundary[0] is just the first element
        boundary[0] = image_array[0,0]
        
        # huffman encode the boundary
        for j in boundary:
            list_of_all_vals[0].append(str(j))
        
        # create a list of huffman table
        n = len(bins)
        
        # loop through different bins
        for k in range (0,n):
            # the first bin
            if k == 0 :
                # get the point within the bin and huffman huffman_encoding_dict
                mask = diff <= bins[k]
                for j in error[mask].astype(int):
                    list_of_all_vals[k+1].append(str(j))

                
            # the middle bins
            else:
                # get the point within the bin and huffman huffman_encoding_dict
                mask = diff > bins[k-1]
                new_error = error[mask]
                mask2 = diff[mask] <= bins[k]
                for j in new_error[mask2].astype(int):
                    list_of_all_vals[k+1].append(str(j))

                
        # the last bin       
        # get the point within the bin and huffman huffman_encoding_dict
        mask = diff > bins[-1]
        for j in error[mask].astype(int):
            list_of_all_vals[-1].append(str(j))
249 250 251 252 253
    # plt.hist([int(i) for i in list_of_all_vals[2]], bins=30)
    # plt.xlabel("Error")
    # plt.ylabel("Frequency")
    # plt.title("Histogram of Errors")
    # plt.show()
Bryce Hepner's avatar
Bryce Hepner committed
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
    for item in list_of_all_vals:
        freq = dict(Counter(item))
        freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
        node = make_tree(freq)
        huffman_encoding_list.append(huffman_code_tree(node))
        # create a error matrix that includes the boundary (used in encoding matrix)
    new_error = np.copy(image_array)
    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))
    keep = new_error[0,0]
    new_error[0,:] = new_error[0,:] - keep
    new_error[-1,:] = new_error[-1,:] - keep
    new_error[1:-1,0] = new_error[1:-1,0] - keep
    new_error[1:-1,-1] = new_error[1:-1,-1] - keep
    new_error[0,0] = keep
        # huffman_encoding_list = list(set(huffman_encoding_list))
    diff = np.reshape(diff,(510,638))
        # return the huffman dictionary
    return huffman_encoding_list,bins

def huffman(tiff_image_path, num_bins=4, difference = True):
    """
    This function is used to encode the error based on the difference
    and split the difference into different bins
    
    Input:
    tiff_image_path     (string): path to the tiff file
    num_bins            (int): number of bins
281 282
    difference (bool): whether or not to use the alternate differencing system

Bryce Hepner's avatar
Bryce Hepner committed
283 284 285 286
    Return:
    image_as_array         ndarray (512, 640): original image
    new_error              ndarray (512, 640): error that includes the boundary
    diff                   ndarray (510, 638): difference of min and max of the 4 neighbors
287

Bryce Hepner's avatar
Bryce Hepner committed
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
    """
    # get the image_as_array, etc
    image_as_array, diff, error= predict_pix(tiff_image_path, difference)
    
    bins = [21,32,48]
    # get the boundary 
    boundary = np.hstack((image_as_array[0,:],image_as_array[-1,:],image_as_array[1:-1,0],image_as_array[1:-1,-1]))
    
    # take the difference of the boundary with the very first pixel
    boundary = boundary - image_as_array[0,0]
    
    #boundary is 1dim, so boundary[0] is just the first element
    boundary[0] = image_as_array[0,0]
    
    # huffman encode the boundary
    bound_vals_as_string = [str(i) for i in boundary]
    freq = dict(Counter(bound_vals_as_string))
    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    node = make_tree(freq)
    huffman_encoding_dict = huffman_code_tree(node)
    
    # create a list of huffman table
    huffman_encoding_list = [huffman_encoding_dict]
    n = len(bins)
    
    # loop through different bins
    for i in range (0,n):
        # the first bin
        if i == 0 :
            # get the point within the bin and huffman huffman_encoding_dict
            mask = diff <= bins[i]
            line_as_string = [str(i) for i in error[mask].astype(int)]
            freq = dict(Counter(line_as_string))
            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
            node = make_tree(freq)
            huffman_encoding_dict = huffman_code_tree(node)
            huffman_encoding_list.append(huffman_encoding_dict)
            
        # the middle bins
        else:
            # get the point within the bin and huffman huffman_encoding_dict
            mask = diff > bins[i-1]
            new_error = error[mask]
            mask2 = diff[mask] <= bins[i]
            line_as_string = [str(i) for i in new_error[mask2].astype(int)]
            freq = dict(Counter(line_as_string))
            freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
            node = make_tree(freq)
            huffman_encoding_dict = huffman_code_tree(node)
            huffman_encoding_list.append(huffman_encoding_dict)
            
    # the last bin       
    # get the point within the bin and huffman huffman_encoding_dict
    mask = diff > bins[-1]
    line_as_string = [str(i) for i in error[mask].astype(int)]
    freq = dict(Counter(line_as_string))
    freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    node = make_tree(freq)
    huffman_encoding_dict = huffman_code_tree(node)
    huffman_encoding_list.append(huffman_encoding_dict)

    # create a error matrix that includes the boundary (used in encoding matrix)
    new_error = np.copy(image_as_array)
    new_error[1:-1,1:-1] = np.reshape(error,(510, 638))
    keep = new_error[0,0]
    new_error[0,:] = new_error[0,:] - keep
    new_error[-1,:] = new_error[-1,:] - keep
    new_error[1:-1,0] = new_error[1:-1,0] - keep
    new_error[1:-1,-1] = new_error[1:-1,-1] - keep
    new_error[0,0] = keep
    # huffman_encoding_list = list(set(huffman_encoding_list))
    diff = np.reshape(diff,(510,638))
    # return the huffman dictionary

    return image_as_array, new_error, diff
 
def encoder(error, list_dic, diff, bins):
    """
    This function encode the matrix with huffman coding tables
    
    Input:
    error     (512, 640): a matrix with all the errors
    list_dic  (num_dic + 1,): a list of huffman coding table 
Bryce Hepner's avatar
Bryce Hepner committed
371
    diff     (510, 638): a matrix with all the differences
Bryce Hepner's avatar
Bryce Hepner committed
372 373 374
    bins       (num_bins - 1,): a list of threshold to cut the bins
    
    Return:
375
    returnable_encode   (512, 640): encoded matrix
Bryce Hepner's avatar
Bryce Hepner committed
376
    """
377

Bryce Hepner's avatar
Bryce Hepner committed
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    returnable_encode = ""
    # copy the error matrix (including the boundary)
    encoded = np.copy(error).astype(int).astype(str).astype(object)
    #diff = np.reshape(diff,(510,638))
    # loop through all the pixel to encode
    for i in range(encoded.shape[0]):
        for j in range(encoded.shape[1]):
            if i == 0 or i == encoded.shape[0]-1 or j == 0 or j == encoded.shape[1]-1:
                returnable_encode += list_dic[0][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[0]:
                returnable_encode += list_dic[1][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[1] and diff[i-1][j-1] > bins[0]:
                returnable_encode +=list_dic[2][encoded[i][j]]
            elif diff[i-1][j-1] <= bins[2] and diff[i-1][j-1] > bins[1]:
                returnable_encode +=list_dic[3][encoded[i][j]]
            else:
                returnable_encode += list_dic[4][encoded[i][j]]
    return returnable_encode


def decoder(encoded_string, list_dic, bins, use_diff):
    """
    This function decodes the encoded_matrix.
    Input:
402
    encoded_string  (string): This is the input, the available information in 1s, 0s
Bryce Hepner's avatar
Bryce Hepner committed
403 404
    list_dic        (num_dic + 1,): a list of huffman coding table 
    bins            (num_bins - 1,): a list of threshold to cut the bins
405
    use_diff (bool): whether or not to use the alternate differencing system
Bryce Hepner's avatar
Bryce Hepner committed
406 407 408 409 410
    
    Return:
    decode_matrix   (512, 640): decoded matrix
    """
    A = np.array([[3,0,-1],[0,3,3],[1,-3,-4]]) # the matrix for system of equation
411
    Ainv = np.array([[0.5,-0.5,-0.5],[-0.5,1.83333333,1.5],[0.5,-1.5,-1.5]])
Bryce Hepner's avatar
Bryce Hepner committed
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
    # change the dictionary back to list
    # !!!!!WARNING!!!! has to change this part, everytime you change the number of bins
    the_keys0 = list(list_dic[0].keys())
    the_values0 = list(list_dic[0].values())
    
    the_keys1 = list(list_dic[1].keys())
    the_values1 = list(list_dic[1].values())
    
    the_keys2 = list(list_dic[2].keys())
    the_values2 = list(list_dic[2].values())
    
    the_keys3 = list(list_dic[3].keys())
    the_values3 = list(list_dic[3].values())
    
    the_keys4 = list(list_dic[4].keys())
    the_values4 = list(list_dic[4].values())
    
    #Matrix system of points that will be used to solve the least squares fitting hyperplane
    points = np.array([[-1,-1,1], [-1,0,1], [-1,1,1], [0,-1,1]])
431
    # Pinv = np.linalg.pinv(points)
Bryce Hepner's avatar
Bryce Hepner committed
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
    decode_matrix = np.zeros((512,640))
    # loop through all the element in the matrix
    for i in range(decode_matrix.shape[0]):
        for j in range(decode_matrix.shape[1]):
            # if it's the very first pixel on the image
            if i == 0 and j == 0:
                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)
                decode_matrix[i][j] = colorvalue
                
            # if it's on the boundary (any of the 4 edges)
            elif i == 0 or i == decode_matrix.shape[0]-1 or j == 0 or j == decode_matrix.shape[1]-1:
                colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys0, the_values=the_values0)
                decode_matrix[i][j] = colorvalue + decode_matrix[0][0]
            # if not the boundary
            else:
                # predict the image with the known pixel value
                z0 = decode_matrix[i-1][j-1]
                z1 = decode_matrix[i-1][j]
                z2 = decode_matrix[i-1][j+1]
                z3 = decode_matrix[i][j-1]
                y0 = int(-z0+z2-z3)
                y1 = int(z0+z1+z2)
                y2 = int(-z0-z1-z2-z3)
                y = np.vstack((y0,y1,y2))
                if use_diff:
                    difference = max(z0,z1,z2,z3) - min(z0,z1,z2,z3)
                else:
459 460 461
                    # b = [z0,z1,z2,z3]
                    # x = Pinv@np.array(b)
                    # difference = np.linalg.norm(b - points@x,ord=2)
Bryce Hepner's avatar
Bryce Hepner committed
462 463 464
                    f, difference, rank, s = la.lstsq(points, [z0,z1,z2,z3], rcond=None) 
                    difference = difference.astype(int)
                    
465 466
                # predict = np.round(np.round(np.linalg.solve(A,y)[-1][0],1))
                predict = np.round(np.round((Ainv[-1]@y)[0],1))
Bryce Hepner's avatar
Bryce Hepner committed
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
                # add on the difference by searching the dictionary
                # !!!!!WARNING!!!! has to change this part, eveytime you change the number of bins
                if difference <= bins[0]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys1, the_values=the_values1)
                    decode_matrix[i][j] = colorvalue + int(predict)
                elif difference <= bins[1] and difference > bins[0]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys2, the_values=the_values2)
                    decode_matrix[i][j] = colorvalue + int(predict)
                elif difference <= bins[2] and difference > bins[1]:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys3, the_values=the_values3)
                    decode_matrix[i][j] = colorvalue + int(predict)
                else:
                    colorvalue, encoded_string = decode_string(encoded_string,the_keys=the_keys4, the_values=the_values4)
                    decode_matrix[i][j] = colorvalue + int(predict)

    return decode_matrix.astype(int)

def read_from_file(filename):
485 486 487 488 489 490 491
    '''
    This function reads the file and return the content in a list
    Input:
        filename (string): the name of the file
    Return:
        content (list): the content of the file
    '''
Bryce Hepner's avatar
Bryce Hepner committed
492 493 494 495
    with open(filename, 'rb') as file:
        return file.read()

def bitstring_to_bytes(input_string):
496 497 498 499 500 501 502
    '''
    This function converts the bitstring to bytes
    Input:
        input_string (string): the bitstring
    Return:
        output_string (string): the bytes
    '''
Bryce Hepner's avatar
Bryce Hepner committed
503 504 505 506 507 508 509 510 511 512 513 514 515 516
    int_array = []
    length_of_string = len(input_string)
    while length_of_string >= 8:
        int_array.append(int(input_string[:8],2))
        input_string = input_string[8:]
        length_of_string = len(input_string)
    if length_of_string > 0:
        zerobuffer = ""
        for _ in range(8-length_of_string):
            zerobuffer += "0"
        int_array.append(int(input_string+zerobuffer,2))
    return bytes(int_array)

def bytes_to_bitstring(input_bytearray):
517 518 519 520 521 522 523
    '''
    This function converts the bytes to bitstring
    Input:
        input_bytearray (bytearray): the bytes
    Return:
        output_string (string): the bitstring
    '''
Bryce Hepner's avatar
Bryce Hepner committed
524 525 526 527 528
    end_string = ""
    int_array = [i for i in input_bytearray]
    for i, item in enumerate(int_array):
        end_string += (bin(item)[2:].zfill(8))
    return end_string
Bryce Hepner's avatar
Bryce Hepner committed
529
def text_to_tiff(filename, list_dic, bins):
530 531 532 533 534 535 536 537 538
    '''
    This function converts and saves the text to tiff
    Input:
        filename (string): the name of the file
        list_dic (list): the list of dictionary
        bins (list): the list of bins
    Return:
        None
    '''
Bryce Hepner's avatar
Bryce Hepner committed
539 540 541 542 543
    encoded_string = bytes_to_bitstring(read_from_file(filename))
    reconstruct_image = decoder(encoded_string, list_dic, bins, False)
    reconstruct_image = reconstruct_image.astype(np.uint16)
    reconstruct_image = Image.fromarray(reconstruct_image)
    reconstruct_image.save(filename[:-16]+"_reconstructed.tiff", "TIFF")
544

Bryce Hepner's avatar
Bryce Hepner committed
545 546
if __name__ == "__main__":
    
547
    scenes = file_extractor("images")
Bryce Hepner's avatar
Bryce Hepner committed
548 549
    images = image_extractor(scenes)
    newnamesforlater = []
Bryce Hepner's avatar
Bryce Hepner committed
550
    list_dic, bins = make_dictionary(images, 4, False)
551 552
    file_sizes_new = []
    file_sizes_old = []
553
    # list_dic = np.load("first_dict.npy", allow_pickle="TRUE")
554
    bins = [21,32,48]
Bryce Hepner's avatar
Bryce Hepner committed
555
    np.save("first_dict.npy", list_dic)
Bryce Hepner's avatar
Bryce Hepner committed
556
    for i in range(len(images)):
557 558 559 560

        image, new_error, diff = huffman(images[i], 4, False)
        encoded_string = encoder(new_error, list_dic, diff, bins)
        inletters = bitstring_to_bytes(encoded_string)
Bryce Hepner's avatar
Bryce Hepner committed
561

562 563 564 565
        if images[i][-5:] == ".tiff":
            newname = images[i][:-5]
        else:
            newname = images[i][:-4]
566

567
        newnamesforlater.append(newname + "_Compressed.txt")
568 569
        with open(newname + "_Compressed.txt", 'wb') as f:
            f.write(inletters)
Bryce Hepner's avatar
Bryce Hepner committed
570 571
        file_sizes_new.append((os.path.getsize(newname + "_Compressed.txt")))
        file_sizes_old.append((os.path.getsize(images[i])))
572

573 574
    print(file_sizes_new)
    print(file_sizes_old)
Bryce Hepner's avatar
Bryce Hepner committed
575
    print(np.sum(file_sizes_new)/np.sum(file_sizes_old))
576
    file_sizes_new.append(os.path.getsize("first_dict.npy"))
Bryce Hepner's avatar
Bryce Hepner committed
577
    print(np.sum(file_sizes_new)/np.sum(file_sizes_old))
578
    # list_dic = np.load("first_dict.npy", allow_pickle="TRUE")
579
    bins = [21,32,48]
580

581 582 583 584 585 586 587 588
    # for i,item in enumerate(newnamesforlater):
    #     print(item)
    #     image, new_error, diff = huffman(images[i], 4, False)
    #     encoded_string2 = bytes_to_bitstring(read_from_file(item))
    #     starttime = time()
    #     reconstruct_image = decoder(encoded_string2, list_dic, bins, False)
    #     print(np.allclose(image, reconstruct_image))
    #     print(time() - starttime)
Bryce Hepner's avatar
Bryce Hepner committed
589 590 591 592 593
    # text_to_tiff("images/1626033496_437803/1626033496_437803_3._Compressed.txt", list_dic, bins)
    # original_image = Image.open("images/1626033496_437803/1626033496_437803_3.tiff")
    # original_image = np.array(original_image)[1:]
    # secondimage = Image.open("images/1626033496_437803/1626033496_437803_3_reconstructed.tiff")
    # secondimage = np.array(secondimage)
594 595
    # print(np.allclose(original_image, secondimage))