Commit 4fb341cd authored by Bryce Hepner's avatar Bryce Hepner

after abstract changes

parent f54c590e
Pipeline #2634 passed with stage
in 7 seconds
......@@ -18,9 +18,6 @@
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{ISO/IEC14495-1}
\citation{544819}
\citation{PNGoverview}
\citation{PNGdetails}
\citation{PNGdetails}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Overview}{1}{subsection.1.1}\protected@file@percent }
\@writefile{brf}{\backcite{ISO/IEC14495-1}{{1}{1.1}{subsection.1.1}}}
......@@ -29,6 +26,9 @@
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:pixels}{{1}{1}{The other 4 pixels are used to find the value of the 5th.\relax }{figure.caption.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Background}{1}{subsection.1.2}\protected@file@percent }
\citation{PNGoverview}
\citation{PNGdetails}
\citation{PNGdetails}
\citation{LZW}
\citation{PNGdetails}
\citation{ABRARDO1997321}
......@@ -45,33 +45,33 @@
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Similar Methods}{2}{subsection.2.3}\protected@file@percent }
\@writefile{brf}{\backcite{ABRARDO1997321}{{2}{2.3}{subsection.2.3}}}
\@writefile{brf}{\backcite{Dahlen1993}{{2}{2.3}{subsection.2.3}}}
\@writefile{brf}{\backcite{AIAZZI20021619}{{2}{2.3}{subsection.2.3}}}
\citation{Numpy}
\citation{Huffman}
\citation{Numpy}
\@writefile{brf}{\backcite{AIAZZI20021619}{{3}{2.3}{subsection.2.3}}}
\@writefile{toc}{\contentsline {section}{\numberline {3}The Approach}{3}{section.3}\protected@file@percent }
\@writefile{brf}{\backcite{Numpy}{{3}{3}{section.3}}}
\@writefile{brf}{\backcite{Huffman}{{3}{3}{section.3}}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Encoding the Pixel Values\relax }}{3}{figure.caption.2}\protected@file@percent }
\newlabel{fig:Uniform}{{2}{3}{Encoding the Pixel Values\relax }{figure.caption.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Encoding the Error Values\relax }}{3}{figure.caption.3}\protected@file@percent }
\newlabel{fig:Normal}{{3}{3}{Encoding the Error Values\relax }{figure.caption.3}{}}
\@writefile{brf}{\backcite{Numpy}{{3}{3}{figure.caption.3}}}
\citation{LAPACKAlgorithms}
\citation{LeastSquaredProblem}
\bibstyle{ieee}
\bibdata{main}
\bibcite{ABRARDO1997321}{1}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Encoding the Error Values\relax }}{4}{figure.caption.3}\protected@file@percent }
\newlabel{fig:Normal}{{3}{4}{Encoding the Error Values\relax }{figure.caption.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{4}{section.5}\protected@file@percent }
\@writefile{brf}{\backcite{LAPACKAlgorithms}{{4}{5}{section.5}}}
\@writefile{brf}{\backcite{LeastSquaredProblem}{{4}{5}{section.5}}}
\bibcite{AIAZZI20021619}{2}
\bibcite{LeastSquaredProblem}{3}
\bibcite{LAPACKAlgorithms}{4}
\bibcite{Dahlen1993}{5}
\bibcite{PNGdetails}{6}
\bibcite{Numpy}{7}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{4}{section.5}\protected@file@percent }
\@writefile{brf}{\backcite{LAPACKAlgorithms}{{4}{5}{section.5}}}
\@writefile{brf}{\backcite{LeastSquaredProblem}{{4}{5}{section.5}}}
\bibcite{Huffman}{8}
\bibcite{ISO/IEC14495-1}{9}
\bibcite{PNGoverview}{10}
......
......@@ -7,7 +7,7 @@
\backcite {PNGdetails}{{2}{2.2}{subsection.2.2}}
\backcite {ABRARDO1997321}{{2}{2.3}{subsection.2.3}}
\backcite {Dahlen1993}{{2}{2.3}{subsection.2.3}}
\backcite {AIAZZI20021619}{{2}{2.3}{subsection.2.3}}
\backcite {AIAZZI20021619}{{3}{2.3}{subsection.2.3}}
\backcite {Numpy}{{3}{3}{section.3}}
\backcite {Huffman}{{3}{3}{section.3}}
\backcite {Numpy}{{3}{3}{figure.caption.3}}
......
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2020.7.20) 18 JUL 2022 16:10
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2020.7.20) 26 JUL 2022 16:15
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
......@@ -405,61 +405,66 @@ LaTeX Font Warning: Font shape `OMS/cmtt/m/n' undefined
(Font) for symbol `textbraceleft' on input line 91.
Underfull \hbox (badness 1019) in paragraph at lines 116--119
Underfull \hbox (badness 1019) in paragraph at lines 118--121
\OT1/cmr/m/n/10 com-pres-sion is not new, as ev-i-denced by its use
[]
<PixelArrangement.png, id=47, 130.55226pt x 86.724pt>
File: PixelArrangement.png Graphic file (type png)
<use PixelArrangement.png>
Package pdftex.def Info: PixelArrangement.png used on input line 122.
Package pdftex.def Info: PixelArrangement.png used on input line 124.
(pdftex.def) Requested size: 99.36972pt x 66.01147pt.
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}
<./PixelArrangement.png (PNG copy)>]
Underfull \vbox (badness 4156) has occurred while \output is active []
[2]
<Uniform_No_Title.png, id=87, 462.528pt x 346.896pt>
<./PixelArrangement.png (PNG copy)>] [2]
<Uniform_No_Title.png, id=86, 462.528pt x 346.896pt>
File: Uniform_No_Title.png Graphic file (type png)
<use Uniform_No_Title.png>
Package pdftex.def Info: Uniform_No_Title.png used on input line 250.
Package pdftex.def Info: Uniform_No_Title.png used on input line 259.
(pdftex.def) Requested size: 237.13594pt x 177.8515pt.
<Normal_No_Title.png, id=89, 462.528pt x 346.896pt>
<Normal_No_Title.png, id=88, 462.528pt x 346.896pt>
File: Normal_No_Title.png Graphic file (type png)
<use Normal_No_Title.png>
Package pdftex.def Info: Normal_No_Title.png used on input line 256.
Package pdftex.def Info: Normal_No_Title.png used on input line 265.
(pdftex.def) Requested size: 237.13594pt x 177.8515pt.
[3 <./Uniform_No_Title.png> <./Normal_No_Title.png>] (./main.bbl (./main.brf)
LaTeX Warning: `h' float specifier changed to `ht'.
[3 <./Uniform_No_Title.png>] (./main.bbl (./main.brf)
\tf@brf=\write4
\openout4 = `main.brf'.
[4 <./Normal_No_Title.png>]
Underfull \hbox (badness 7362) in paragraph at lines 26--26
\OT1/cmtt/m/n/9 netlib . org / lapack / lug / node71 . html$[][]\OT1/cmr/m/n/9
, Oct. 1999.
[]
[4])
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 328.
)
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 337.
[5
]
Package atveryend Info: Empty hook `AfterLastShipout' on input line 328.
Package atveryend Info: Empty hook `AfterLastShipout' on input line 337.
(./main.aux)
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 328.
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 337.
\snap@out=\write5
\openout5 = `main.dep'.
Dependency list written on main.dep.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 328.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 337.
Package rerunfilecheck Info: File `main.out' has not changed.
(rerunfilecheck) Checksum: 32E97EDE93C04899CE7128EA0CB0D790;513.
Package rerunfilecheck Info: File `main.brf' has not changed.
(rerunfilecheck) Checksum: 6459ACEAE59D2F518EBB98684716CB6C;711.
Package rerunfilecheck Warning: File `main.brf' has changed.
(rerunfilecheck) Rerun to get bibliographical references right.
Package rerunfilecheck Info: Checksums for `main.brf':
(rerunfilecheck) Before: 433719AC429FD6C7B6A5F8432FFC0561;711
(rerunfilecheck) After: CE4087E59E4E6D34D4C7CEA41D264500;711.
LaTeX Font Warning: Some font shapes were not available, defaults substituted.
......@@ -471,7 +476,7 @@ Here is how much of TeX's memory you used:
23525 multiletter control sequences out of 15000+600000
541812 words of font info for 57 fonts, out of 8000000 for 9000
1142 hyphenation exceptions out of 8191
47i,9n,42p,782b,466s stack positions out of 5000i,500n,10000p,200000b,80000s
47i,9n,42p,782b,468s stack positions out of 5000i,500n,10000p,200000b,80000s
</usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb></us
r/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/shar
e/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texl
......@@ -486,10 +491,10 @@ y9.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb
></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti9.pfb></usr/
share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb></usr/share/
texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt9.pfb>
Output written on main.pdf (5 pages, 254345 bytes).
Output written on main.pdf (5 pages, 255693 bytes).
PDF statistics:
193 PDF objects out of 1000 (max. 8388607)
164 compressed objects within 2 object streams
194 PDF objects out of 1000 (max. 8388607)
165 compressed objects within 2 object streams
31 named destinations out of 1000 (max. 500000)
96 words of extra memory for PDF output out of 10000 (max. 10000000)
No preview for this file type
No preview for this file type
......@@ -96,7 +96,7 @@ Elphel, Inc.\\
%The resulting files were approximately 34\% smaller than their equivalent PNGs, and 35\% smaller than TIFF files compressed with LZW.
The specific properties of thermal images compared to photographic ones are higher dynamic range (16 bits) and dependence of pixels only on the temperature variations of self-radiating objects. The ambient temperature variations add to the pixel values, not multiply them as in the case of the illuminated scenes.
We base our algorithm on the 4-neighbor method and use local context to switch between encoding tables as the expected prediction error depends only on the differences between the known pixels invariant of their average value.
This approach allows for building a 2D histogram for the prediction error and the "smoothness" of the known pixels and using it to construct the encoding tables.
This approach allows for building a 2D histogram for the prediction error and the ``smoothness'' of the known pixels and using it to construct the encoding tables.
Table selection only depends on the four-pixel values (so available to the decoder) and does not increase the compressed stream.
As a result, we could losslessly compress thermal images to be less than 41\% of their original size.
The resulting files were approximately 34\% smaller than their equivalent PNGs, and 35\% smaller than TIFF files compressed with LZW.
......@@ -104,6 +104,8 @@ The resulting files were approximately 34\% smaller than their equivalent PNGs,
\section{Introduction}
\subsection{Overview}
The base system is not new, but it will be explained here in order to keep consistant definitions and in case any reader is not familiar with the method.
The idea is based on how images are scanned in originally.
Like a cathode-ray tube in a television, the algorithm goes line by line, reading/writing each pixel individually in a raster pattern.
......@@ -115,7 +117,7 @@ Even though a possibly larger integer may need to be stored, it is more likely t
The approach of using the neighboring pixels for compression is not new, as evidenced by its use in ISO/IEC 14495-1:1999 \cite{ISO/IEC14495-1} and ``CALIC-a context based adaptive lossless image codec''\cite{544819}, which were both written more than 20 years before the publication of this paper.
%This ``neighbor'' system is not as common as it should be, as it provides a base for simple implementation with high rates of compression.
Our final implementation differs from these methods, and others, in ways that we found beneficial, and in ways others may find to be beneficial as well.
Our final implementation differs from these methods, and others, in ways that we found beneficial for thermal images, and in ways others may find to be beneficial as well.
\begin{figure}[h]
\centering
......@@ -130,6 +132,13 @@ Most images had ranges of at most 4,096 between the smallest and the largest pix
The camera being used has 16 forward-facing thermal sensors creating 16 similar thermal images every frame.
Everything detailed here can still apply to standard grayscale or RGB images, but only 16-bit thermal images were used in testing.
Thermal images are unique in that pixel values will not depend on lighting but solely on the temperature values of the objects they represent.
Direct lighting can change these values due to the heat exchange, but the general case is that due to heat conduction, objects will have near uniform temperature across the surface.
This creates a need for a different type of compression system, one that is better suited for this different type of data used in the IR spectrum.
Thermal images also have large offsets since when the environment heats up, the pixel values increase while the relationship between objects remains almost constant.
For example, grass will always be cooler than a similar colored surface due to the different thermal properties, but when the day gets hotter, both surfaces will get hotter.
The images are 16-bit because they have to save these larger temperature values, even if they will be shown on a screen in 8-bit format.
Normal compression systems work on thermal images, but since they are not optimized for these, we found it necessary to use a different system.
\section{Related Work}
\subsection{PNG}
......
Abstract
The specific properties of thermal images compared to photographic ones
are higher dynamic range (16 bits) and dependence of pixels only on the
temperature variations of self-radiating objects. The ambient
temperature variations add to the pixel values, not multiply them as in
the case of the illuminated scenes. We base our algorithm on the
4-neighbor method and use local context to switch between encoding
tables as the expected prediction error depends only on the differences
between the known pixels invariant of their average value. This approach
allows for building a 2D histogram for the prediction error and the
“smoothness" of the known pixels and using it to construct the encoding
tables. Table selection only depends on the four-pixel values (so
available to the decoder) and does not increase the compressed stream.
As a result, we could losslessly compress thermal images to be less than
41% of their original size. The resulting files were approximately 34%
smaller than their equivalent PNGs, and 35% smaller than TIFF files
compressed with LZW.
Introduction
Overview
The base system is not new, but it will be explained here in order to
keep consistant definitions and in case any reader is not familiar with
the method.
The idea is based on how images are scanned in originally. Like a
cathode-ray tube in a television, the algorithm goes line by line,
reading/writing each pixel individually in a raster pattern.
Each pixel, as long as it is not on the top or side boundaries, will
have 4 neighbors that have already been read into the machine. Those
points can be analyzed and interpolated to find the next pixel’s value.
A visual demostration of this pattern is given in Figure 1. The goal is
to encode the error between that value and the original value, save
that, and use that to compress and decompress the image. Even though a
possibly larger integer may need to be stored, it is more likely that
the guess will be correct or off by a small margin, making the
distribution better for compression.
The approach of using the neighboring pixels for compression is not new,
as evidenced by its use in ISO/IEC 14495-1:1999 and “CALIC-a context
based adaptive lossless image codec”, which were both written more than
20 years before the publication of this paper. Our final implementation
differs from these methods, and others, in ways that we found beneficial
for thermal images, and in ways others may find to be beneficial as
well.
[[fig:pixels]The other 4 pixels are used to find the value of the 5th.]
Background
The images that were used in the development of this paper were all
thermal images, with values ranging from 19,197 to 25,935. In the
system, total possible values can range from 0 to 32,768. Most images
had ranges of at most 4,096 between the smallest and the largest pixel
values. The camera being used has 16 forward-facing thermal sensors
creating 16 similar thermal images every frame. Everything detailed here
can still apply to standard grayscale or RGB images, but only 16-bit
thermal images were used in testing.
Thermal images are unique in that pixel values will not depend on
lighting, but instead solely on the temperature values of the objects
they represent. Direct lighting can change these values due to the heat
exchange, but the general case is that due to heat conduction, objects
will have near uniform temperature across the surface. This creates a
need for a different type of compression system, one that is better
suited for this different type of data used in the IR spectrum. Thermal
images also have large offsets, since when the environment heats up, the
pixel values increase while the relationship between objects remains
almost constant. For example, grass will always be cooler than a similar
colored surface due to the different thermal properties, but when the
day gets hotter, both surfaces will get hotter. The images are 16-bit
because they have to save these larger temperature values, even if they
will be shown on a screen in 8-bit format. Normal compression systems
work on thermal images, but since they are not optimized for these, we
found it necessary to use a different system.
Related Work
PNG
PNG is a lossless compression algorithm that also operates using a
single pass system. The image is separated into several blocks of
arbitrary size, which are then compressed using a combination of LZ77
and Huffman encoding . LZ77 operates by finding patterns in the data and
creating pointers to the original instance of that pattern. For example,
if there are two identical blocks of just the color blue, the second one
only has to make reference to the first. Instead of saving two full
blocks, the second one is saved as a pointer to the first, telling the
decoder to use that block. Huffman encoding is then used to save these
numbers, optimizing how the location data is stored. If one pattern is
more frequent, the algorithm should optimize over this, producing an
even smaller file. The Huffman encoding in conjunction with LZ77 helps
form “deflate”, the algorithm summarized here, and the one used in PNG.
Our algorithm uses Huffman encoding similarly, but a completely
different algorithm than LZ77. LZ77 seeks patterns between blocks, while
ours has no block structure and no explicit pattern functionality. Ours
uses the equivalent block size of 1, and instead of encoding the data,
it encodes alternate data which is used to compress.
LZW
LZW operates differently by creating a separate code table that maps
every sequence to a code. Although this is used for an image, the
original paper by Welch explains it through text examples, which will be
done here as well. Instead of looking at each character individually, it
looks at variable-length string chains and compresses those. Passing
through the items to be compressed, if a phrase has already been
encountered, it saves the reference to the original phrase along with
the next character in the sequence. In this way, the longer repeated
phrases are automatically found and can be compressed to be smaller.
This system also uses blocks like PNG in order to save patterns in the
data, but instead by looking at the whole data set as it moves along,
PNG only operates on a short portion of the text .
Ours, similarly to PNG, only looks at a short portion of the data, which
may have an advantage over LZW for images. Images generally do not have
the same patterns that text does, so it may be advantageous not to use
the entire corpus in compressing an image and instead only evaluate it
based on nearby objects. The blue parts of the sky will be next to other
blue parts of the sky, and in the realm of thermal images, temperatures
will probably be most similar to nearby ones due to how heat flows.
Similar Methods
Our prior searches did not find any very similar approaches, especially
with 16-bit thermal images. There are many papers however that may have
influenced ours indirectly or are similar to ours and need to be
mentioned for both their similarities and differences. One paper that is
close is “Encoding-interleaved hierarchical interpolation for lossless
image compression” . This method seems to operate with a similar end
goal, to save the interpolation, but operates using a different system,
including how it interpolates. Instead of using neighboring pixels in a
raster format, it uses vertical and horizontal ribbons, and a different
way of interpolating. The ribbons alternate, going between a row that is
directly saved and one that is not saved but is later interpolated. By
doing this, it is filling in the gaps of an already robust image and
saving the finer details. This other method could possibly show an
increase in speed but not likely in overall compression. This will not
have the same benefit as ours since ours uses interpolation on almost
the entire image, instead of just parts, helping it optimize over a
larger amount of data. This paper is also similar to “Iterative
polynomial interpolation and data compression” , where the researchers
did a similar approach but with different shapes. The error numbers were
still saved, but they specifically used polynomial interpretation which
we did not see fit to use in ours.
The closest method is “Near-lossless image compression by
relaxation-labelled prediction” , which is similar in the general
principles of the interpolation and encoding. The algorithm detailed in
the paper uses a clustering algorithm of the nearby points to create the
interpolation, saving the errors to be used later in the reconstruction
of the original image. This method is much more complex, not using a
direct interpolation method but instead using a clustering algorithm to
find the next point.
This could potentially have an advantage over what we did by using more
points in the process, but in proper implementation it may become too
complicated and lose value. The goal for us was to have a simple and
efficient encoding operation, and this would have too many errors to
process. It also has a binning system like ours, with theirs based off
of the mean square prediction error. The problem is that which bin it
goes into can shift over the classification process adding to the
complexity of the algorithm.
The Approach
To begin, the border values are encoded into the system, starting with
the first value. The values after that are just modifications from the
first value. There are not many values here and the algorithm needs a
place to start. Alternate things could have been done, but they would
have raised temporal complexity with marginal gain. Once the middle
points are reached, the pixel to the left, top left, directly above, and
top right have already been read into the system. Each of these values
is given a point in the x-y plane, with the top left at (-1,1), top
pixel at (0,1), top right pixel at (1,1), and the middle left pixel at
(-1,0), giving the target the coordinates (0,0). Using the formula for a
plane in 3D (ax + by + c = z) we have the system of equations
 − a + b + c = z₀
b + c = z₁
a + b + c = z₂
 − a + c = z₃
Which complete the form Ax = b as
$$A =
\begin{bmatrix}
-1 & 1 & 1\\
0 & 1 & 1 \\
1 & 1 & 1 \\
-1 & 0 & 1
\end{bmatrix}$$
$$b =
\begin{bmatrix}
z_0\\
z_1 \\
z_2 \\
z_3
\end{bmatrix}$$
Due to there being 4 equations and 4 unknowns, this is unsolvable.
This can be corrected by making
$$A =
\begin{bmatrix}
3 & 0 & -1\\
0 & 3 & 3 \\
1 & -3 & -4
\end{bmatrix}$$
and
$$b =
\begin{bmatrix}
-z_0 + z_2 - z_3\\
z_0 + z_1 + z_2 \\
-z_0 - z_1 - z_2 - z_3
\end{bmatrix}$$
.
The new matrix is full rank and can therefore be solved using
numpy.linalg.solve . The x that results corresponds to two values
followed by the original c from the ax + by + c = z form, which is the
predicted pixel value.
Huffman encoding performs well on data with varying frequency , making
it a good candidate for saving the error numbers. Figures 2 and 3 give a
representation of why saving the error numbers is better than saving the
actual values. Most pixels will be off the predicted values by low
numbers since many objects have close to uniform surface temperature or
have an almost uniform temperature gradient.
[Encoding the Pixel Values]
[Encoding the Error Values]
In order to adjust for objects in images that are known to have an
unpredictable temperature (fail the cases before), a bin system is used.
The residuals from numpy.linalg.lstsq are used to determine the
difference across the 4 known points, which the difference is then used
to place it in a category. This number is the difference between trying
to fit a plane between 4 different points. If a plane is able to be
drawn that contains all 4 points, it makes sense that the error will be
much smaller than if the best-fitted plane was not very close to any of
the points. Something more certain is more likely to be correctly
estimated. 5 bins were used with splits chosen by evenly distributing
the difference numbers into evenly sized bins. Many of the images had
several different bin sizes ranging from 11 in the first category to a
difference of 30 as the size of the first category. An average number
between all of them was chosen since using the average for bin sizes
versus specific bin sizes had an effect on compression of less than half
a percent.
Results
We attained an average compression ratio of 0.4057 on a set of 262
images, with compression ratios on individual images ranging from 0.3685
to 0.4979. Because the system runs off of a saved dictionary, it is
better to think of the system as a cross between an individual
compression system and a larger archival tool. This means that there are
significant changes in compression ratios depending on how many files
are compressed at a time, despite the ability to decompress files
individually and independently.
When the size of the saved dictionary was included, the compression
ratio on the entire set only changed from 0.4043 to 0.4057. However,
when tested on a random image in the set, it went from 0.3981 to 0.7508.
This is not a permanent issue, as changes to the method can be made to
fix this. These are outlined in the discussion section below.
This was tested on a set of a least 16 images, so this does not affect
us as much. When tested on a random set of 16 images, the ratio only
changed from 0.3973 to 0.4193.
Compression Rates
------------------- -------- -------- --------
Original LZW PNG Ours
100% 61.94% 61.21% 40.57%
Our method created files that are on average 33.7% smaller than PNG and
34.5% smaller than LWZ compression on TIFF.
Discussion
The files produced through this method are much smaller than the ones
produced by the others, but this comes at great computational costs in
its current implementation. PNG compression was several orders of
magnitude faster on the local machine than the method that was used in
this project. Using a compiled language or integrated system instead of
python will increase the speed, but there are other improvements that
can be made.
The issue with numpy.linalg.solve was later addressed to fix the
potential slowdown. Calculating the inverse beforehand and using that in
the system had marginal temporal benefit. numpy.linalg.solve runs in
O(N³) for an N × N matrix, while the multiplication runs in a similar
time. The least squares method mentioned in this project also has a
shortcoming, but this one cannot be solved as easily. The pseudoinverse
can be calculated beforehand, but the largest problem is that it is
solving the system for every pixel individually and calculating the
norm. numpy.linalg.lstsq itself runs in O(N³) for an N × N matrix ,
while the pseudoinverse, when implemented, uses more python runtime,
adding to temporal complexity.
This compression suffers when it is only used on individual images,
which is not a problem for the use cases of this project. The test
images came from a camera that has 16 image sensors that work
simultaneously. The camera works in multiple image increments and
therefore creates large packets that can be saved together, while still
having the functionality of decompressing individually. This saves
greatly on the memory that is required to view an image. It was
therefore not seen necessary to create a different system to compress
individual files as individual images are not created.
A potential workaround for this problem would be to code extraneous
values into the image directly instead of adding them to the full
dictionary. This has the downside of not being able to integrate
perfectly with Huffman encoding. A leaf of the tree could be a trigger
to switch from Huffman encoding, and instead use an alternate system to
read in the bits. We did not do this, but it would be a simple change
for someone with a different use case.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment