Commit 89fbb7f5 authored by Bryce Hepner's avatar Bryce Hepner

various tests

parent 4286c54a
......@@ -4,4 +4,5 @@ __pycache__
attic
*.log
/compress_start.pyc
/compress_experiment.ipynb
\ No newline at end of file
/compress_experiment.ipynb
*.txt
\ No newline at end of file
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
......@@ -72,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -147,7 +147,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
......@@ -206,7 +206,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
......@@ -308,7 +308,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -417,7 +417,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
......@@ -456,7 +456,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
......@@ -486,7 +486,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
......@@ -576,7 +576,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
......@@ -585,6 +585,95 @@
" return file.read()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"scenes = file_extractor()\n",
"newnamesforlater = []\n",
"images = image_extractor(scenes)\n",
"oglist_dic, ogbins = make_dictionary(images[:10], 1, False)\n",
"file_size_ratios = []\n",
"np.save(\"first_dic.npy\", oglist_dic)\n",
"for i in range(10):\n",
" list_dic, image, new_error, diff, bound, predict, bins = huffman(images[i], 1, False)\n",
" encoded_string1 = encoder(new_error, oglist_dic, diff, bound, ogbins)\n",
" # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
" # print(np.allclose(image, reconstruct_image))\n",
" inletters = bitstring_to_bytes(encoded_string1)\n",
" if images[i][:-5] == \".tiff\":\n",
" newname = images[i][:-5]\n",
" else:\n",
" newname = images[i][:-4]\n",
" newnamesforlater.append(newname + \"_Compressed.txt\")\n",
" with open(newname + \"_Compressed.txt\", 'wb') as f:\n",
" f.write(inletters)\n",
" file_size_ratios.append((os.path.getsize(newname + \"_Compressed.txt\"))/os.path.getsize('images/1626032610_393963/1626032610_393963_0.tiff'))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def check_bin_size(tiff_image_path_list, num_bins=4, difference = True):\n",
" \"\"\"\n",
" This function is used to encode the error based on the difference\n",
" and split the difference into different bins\n",
" \n",
" Input:\n",
" tiff_image_path (string): path to the tiff file\n",
" num_bins (int): number of bins\n",
" \n",
" Return:\n",
" huffman_encoding_list list (num_bins + 1): a list of dictionary\n",
" image_array ndarray (512, 640): original image\n",
" new_error ndarray (512, 640): error that includes the boundary\n",
" diff ndarray (510, 638): difference of min and max of the 4 neighbors\n",
" boundary ndarray (2300,): the boundary values after subtracting the very first pixel value\n",
" predict ndarray (325380,): the list of predicted values\n",
" bins list (num_bins - 1,): a list of threshold to cut the bins\n",
" A ndarray (3 X 3): system of equation\n",
" \n",
" \"\"\"\n",
" all_bins = []\n",
" for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
" # get the image_array, etc\n",
" image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
" \n",
" # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n",
"\n",
" # sort the difference and create the bins\n",
" sorted_diff = np.sort(diff.copy())\n",
" bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
" all_bins.append(bins)\n",
" return np.mean(all_bins,axis = 0), np.min(all_bins,axis = 0), np.max(all_bins,axis=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(array([21.00404858, 31.92712551, 48.06477733]), array([11, 16, 22]), array([ 30, 70, 141]))\n"
]
}
],
"source": [
"scenes = file_extractor()\n",
"newnamesforlater = []\n",
"images = image_extractor(scenes)\n",
"print(check_bin_size(images))"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -615,8 +704,25 @@
}
],
"metadata": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
},
"kernelspec": {
"display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4
},
......
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
......@@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
......@@ -72,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
......@@ -147,7 +147,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
......@@ -206,7 +206,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
......@@ -238,7 +238,7 @@
" # get the image_array, etc\n",
" image_array, diff, error= predict_pix(tiff_image_path, difference)\n",
"\n",
" bins = [21,32,48]\n",
" bins = [30,70,141]\n",
" # get the boundary \n",
" boundary = np.hstack((image_array[0,:],image_array[-1,:],image_array[1:-1,0],image_array[1:-1,-1]))\n",
" \n",
......@@ -302,7 +302,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
......@@ -332,7 +332,7 @@
"\n",
"\n",
" # sort the difference and create the bins\n",
" bins = [21,32,48]\n",
" bins = [30,70,141]\n",
" # get the boundary \n",
" boundary = np.hstack((image_as_array[0,:],image_as_array[-1,:],image_as_array[1:-1,0],image_as_array[1:-1,-1]))\n",
" \n",
......@@ -406,7 +406,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
......@@ -444,7 +444,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
......@@ -472,7 +472,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
......@@ -561,7 +561,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
......@@ -572,17 +572,17 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"scenes = file_extractor()\n",
"newnamesforlater = []\n",
"images = image_extractor(scenes)\n",
"list_dic, ogbins = make_dictionary(images[0:10], 4, False)\n",
"list_dic, ogbins = make_dictionary(images, 4, False)\n",
"file_size_ratios = []\n",
"np.save(\"first_dic.npy\", list_dic)\n",
"for i in range(10):\n",
"for i in range(125,126):\n",
" image, new_error, diff = huffman(images[i], 4, False)\n",
" encoded_string1 = encoder(new_error, list_dic, diff, ogbins)\n",
" # reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)\n",
......@@ -600,14 +600,14 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.3955120002069395\n"
"0.4946477235897303\n"
]
}
],
......@@ -617,7 +617,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 63,
"metadata": {},
"outputs": [
{
......@@ -626,7 +626,7 @@
"0.39535481750525336"
]
},
"execution_count": 49,
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
......@@ -661,22 +661,13 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n"
]
}
......@@ -692,9 +683,9 @@
"\n",
"\n",
"# ogbins = [12,60,180]\n",
"ogbins = [21,32,48]\n",
"for i,item in enumerate(newnamesforlater[0:10]):\n",
" image, new_error, diff = huffman(images[i], 4, False)\n",
"ogbins = [30,70,141]\n",
"for i,item in enumerate(newnamesforlater):\n",
" image, new_error, diff = huffman(images[125+i], 4, False)\n",
" encoded_string2 = bytes_to_bitstring(read_from_file(item))\n",
" reconstruct_image = decoder(encoded_string2, list_dic, ogbins, False)\n",
" print(np.allclose(image, reconstruct_image))"
......@@ -702,7 +693,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
......@@ -729,7 +720,7 @@
" all_bins = []\n",
" for i, tiff_image_path in enumerate(tiff_image_path_list):\n",
" # get the image_array, etc\n",
" image_array, predict, diff= predict_pix(tiff_image_path, difference)\n",
" image_array, predict, diff, error= predict_pix(tiff_image_path, difference)\n",
" \n",
" # calculate the number of points that will go in each bin\n",
" data_points_per_bin = diff.size // num_bins\n",
......@@ -738,19 +729,24 @@
" sorted_diff = np.sort(diff.copy())\n",
" bins = [sorted_diff[i*data_points_per_bin] for i in range(1,num_bins)]\n",
" all_bins.append(bins)\n",
" return np.mean(all_bins,axis = 0)\n"
" return np.mean(all_bins,axis = 0), np.min(all_bins,axis = 0), np.max(all_bins,axis=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-13.07692308 0. 13.08097166]\n"
"ename": "ValueError",
"evalue": "not enough values to unpack (expected 4, got 3)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 19'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000019?line=0'>1</a>\u001b[0m \u001b[39mprint\u001b[39m(check_bin_size(images))\n",
"\u001b[1;32m/home/bryce/git/master/SameTableEncoder.ipynb Cell 18'\u001b[0m in \u001b[0;36mcheck_bin_size\u001b[0;34m(tiff_image_path_list, num_bins, difference)\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=20'>21</a>\u001b[0m all_bins \u001b[39m=\u001b[39m []\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=21'>22</a>\u001b[0m \u001b[39mfor\u001b[39;00m i, tiff_image_path \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(tiff_image_path_list):\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=22'>23</a>\u001b[0m \u001b[39m# get the image_array, etc\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=23'>24</a>\u001b[0m image_array, predict, diff, error\u001b[39m=\u001b[39m predict_pix(tiff_image_path, difference)\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=25'>26</a>\u001b[0m \u001b[39m# calculate the number of points that will go in each bin\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/bryce/git/master/SameTableEncoder.ipynb#ch0000017?line=26'>27</a>\u001b[0m data_points_per_bin \u001b[39m=\u001b[39m diff\u001b[39m.\u001b[39msize \u001b[39m/\u001b[39m\u001b[39m/\u001b[39m num_bins\n",
"\u001b[0;31mValueError\u001b[0m: not enough values to unpack (expected 4, got 3)"
]
}
],
......
......@@ -10,6 +10,9 @@ from collections import Counter
import numpy.linalg as la
from time import time
folder_name = "images"
outputlocation = ""
def file_extractor(dirname="images"):
files = os.listdir(dirname)
scenes = []
......@@ -480,7 +483,6 @@ def read_from_file(filename):
with open(filename, 'rb') as file:
return file.read()
def bitstring_to_bytes(input_string):
int_array = []
length_of_string = len(input_string)
......@@ -495,7 +497,6 @@ def bitstring_to_bytes(input_string):
int_array.append(int(input_string+zerobuffer,2))
return bytes(int_array)
def bytes_to_bitstring(input_bytearray):
end_string = ""
int_array = [i for i in input_bytearray]
......@@ -503,33 +504,33 @@ def bytes_to_bitstring(input_bytearray):
end_string += (bin(item)[2:].zfill(8))
return end_string
starttime = time()
scenes = file_extractor()
# starttime = time()
scenes = file_extractor(folder_name)
newnamesforlater = []
images = image_extractor(scenes)
list_dic, ogbins = make_dictionary(images[0:10], 4, False)
# list_dic, bins = make_dictionary(images, 4, False)
file_size_ratios = []
np.save("first_dic.npy", list_dic)
for i in range(10):
image, new_error, diff = huffman(images[i], 4, False)
encoded_string1 = encoder(new_error, list_dic, diff, ogbins)
# reconstruct_image = decoder(A, encoded_string, list_dic, bins, False)
# print(np.allclose(image, reconstruct_image))
inletters = bitstring_to_bytes(encoded_string1)
# np.save("first_dic.npy", list_dic)
for i in range(len(images)):
# image, new_error, diff = huffman(images[i], 4, False)
# encoded_string = encoder(new_error, list_dic, diff, bins)
# inletters = bitstring_to_bytes(encoded_string)
if images[i][:-5] == ".tiff":
newname = images[i][:-5]
else:
newname = images[i][:-4]
newnamesforlater.append(newname + "_Compressed.txt")
with open(newname + "_Compressed.txt", 'wb') as f:
f.write(inletters)
list_dic = np.load("first_dic.npy", allow_pickle="TRUE")
ogbins = [21,32,48]
for i,item in enumerate(newnamesforlater[0:10]):
image, new_error, diff = huffman(images[i], 4, False)
encoded_string2 = bytes_to_bitstring(read_from_file(item))
reconstruct_image = decoder(encoded_string2, list_dic, ogbins, False)
print(np.allclose(image, reconstruct_image))
print(time() - starttime)
\ No newline at end of file
# with open(newname + "_Compressed.txt", 'wb') as f:
# f.write(inletters)
file_size_ratios.append((os.path.getsize(newname + "_Compressed.txt"))/os.path.getsize(images[i]))
# list_dic = np.load("first_dic.npy", allow_pickle="TRUE")
# for i,item in enumerate(newnamesforlater[0:10]):
# image, new_error, diff = huffman(images[i], 4, False)
# encoded_string2 = bytes_to_bitstring(read_from_file(item))
# reconstruct_image = decoder(encoded_string2, list_dic, bins, False)
# print(np.allclose(image, reconstruct_image))
print(np.mean(file_size_ratios))
print(np.max(file_size_ratios))
print(np.min(file_size_ratios))
print(np.argmax(file_size_ratios))
\ No newline at end of file
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment