Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
X
x393
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Elphel
x393
Commits
530030f6
Commit
530030f6
authored
Jun 13, 2016
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Switched to new implementation of 8x8 DCT, generated documentation
parent
0e866d77
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
453 additions
and
849 deletions
+453
-849
.gitignore
.gitignore
+1
-1
cmprs_macroblock_buf_iface.v
compressor_jp/cmprs_macroblock_buf_iface.v
+22
-4
jp_channel.v
compressor_jp/jp_channel.v
+15
-48
xdct393r.v
compressor_jp/xdct393r.v
+0
-660
dct1d_chen.v
dsp/dct1d_chen.v
+75
-20
dct1d_chen_reorder_in.v
dsp/dct1d_chen_reorder_in.v
+5
-5
dct1d_chen_reorder_out.v
dsp/dct1d_chen_reorder_out.v
+27
-12
dct2d8x8_chen.v
dsp/dct2d8x8_chen.v
+56
-26
dct_chen_transpose.v
dsp/dct_chen_transpose.v
+12
-6
dsp_addsub_simd.v
dsp/dsp_addsub_simd.v
+10
-10
dsp_ma.v
dsp/dsp_ma.v
+5
-5
dsp_ma_preadd.v
dsp/dsp_ma_preadd.v
+5
-5
fpga_version.vh
fpga_version.vh
+17
-15
x393_jpeg.py
py393/x393_jpeg.py
+5
-1
system_defines.vh
system_defines.vh
+2
-0
x393_1_7_0.Doxyfile
x393_1_7_0.Doxyfile
+2
-2
x393_testbench04.sav
x393_testbench04.sav
+194
-29
No files found.
.gitignore
View file @
530030f6
...
@@ -23,7 +23,7 @@ py393/dbg*
...
@@ -23,7 +23,7 @@ py393/dbg*
debug/*
debug/*
html/*
html/*
man/*
man/*
x393_docs/*
includes/x393_cur_params_sim.vh
includes/x393_cur_params_sim.vh
includes/x393_cur_params_target_*.vh
includes/x393_cur_params_target_*.vh
py393/exp_gpio.py
py393/exp_gpio.py
...
...
compressor_jp/cmprs_macroblock_buf_iface.v
View file @
530030f6
...
@@ -40,7 +40,14 @@
...
@@ -40,7 +40,14 @@
*/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
cmprs_macroblock_buf_iface
(
module
cmprs_macroblock_buf_iface
#(
`ifdef
USE_OLD_DCT
parameter
DCT_PIPELINE_PAUSE
=
0
// No need to delay
`else
parameter
DCT_PIPELINE_PAUSE
=
48
// TODO: find really required value (minimal), adjust counter bits (now 6)
// 48 seems to be OK (may be less)
`endif
)(
// input rst,
// input rst,
input
xclk
,
// global clock input, compressor single clock rate
input
xclk
,
// global clock input, compressor single clock rate
...
@@ -98,6 +105,7 @@ module cmprs_macroblock_buf_iface (
...
@@ -98,6 +105,7 @@ module cmprs_macroblock_buf_iface (
wire
frame_pre_start_w
;
// start sequence for a new frame
wire
frame_pre_start_w
;
// start sequence for a new frame
reg
frame_pre_start_r
;
reg
frame_pre_start_r
;
reg
[
8
:
0
]
mb_pre_start
;
// 1-hot macroblock pre start calcualtions - TODO: adjust width
reg
[
8
:
0
]
mb_pre_start
;
// 1-hot macroblock pre start calcualtions - TODO: adjust width
reg
mb_pre_start4_first
;
// first cycle after mb_pre_start[3]
wire
[
2
:
0
]
buf_diff
;
// difference between page needed and next valid - should be negative to have it ready
wire
[
2
:
0
]
buf_diff
;
// difference between page needed and next valid - should be negative to have it ready
wire
buf_ready_w
;
// External memory buffer has all the pages needed
wire
buf_ready_w
;
// External memory buffer has all the pages needed
...
@@ -117,6 +125,8 @@ module cmprs_macroblock_buf_iface (
...
@@ -117,6 +125,8 @@ module cmprs_macroblock_buf_iface (
reg
frame_pre_run
;
reg
frame_pre_run
;
reg
[
1
:
0
]
frame_may_start
;
reg
[
1
:
0
]
frame_may_start
;
reg
[
5
:
0
]
dct_pipeline_delay_cntr
;
`ifdef
DEBUG_RING
`ifdef
DEBUG_RING
assign
dbg_add_invalid
=
add_invalid
;
assign
dbg_add_invalid
=
add_invalid
;
assign
dbg_mb_release_buf
=
mb_release_buf
;
assign
dbg_mb_release_buf
=
mb_release_buf
;
...
@@ -180,9 +190,17 @@ module cmprs_macroblock_buf_iface (
...
@@ -180,9 +190,17 @@ module cmprs_macroblock_buf_iface (
// calculate before starting each macroblock (will wait if buffer is not ready) (TODO: align mb_pre_start[0] to mb_pre_end[2] - same)
// calculate before starting each macroblock (will wait if buffer is not ready) (TODO: align mb_pre_start[0] to mb_pre_end[2] - same)
//mb_pre_start_w
//mb_pre_start_w
if
(
!
frame_en_r
)
mb_pre_start
<=
0
;
// TODO: Here enforce minimal pause (if not zero for the DCT pipeline to recover
if
(
mb_pre_start_w
)
mb_pre_start
<=
1
;
// will wait for buf_ready_w, but not less than DCT_PIPELINE_PAUSE (or no wait at all)
else
if
(
!
mb_pre_start
[
4
]
||
buf_ready_w
)
mb_pre_start
<=
mb_pre_start
<<
1
;
mb_pre_start4_first
<=
mb_pre_start
[
3
]
;
if
(
xrst
)
dct_pipeline_delay_cntr
<=
0
;
else
if
(
mb_pre_start4_first
&&
!
buf_ready_w
)
dct_pipeline_delay_cntr
<=
DCT_PIPELINE_PAUSE
-
1
;
else
if
(
|
dct_pipeline_delay_cntr
)
dct_pipeline_delay_cntr
<=
dct_pipeline_delay_cntr
-
1
;
if
(
!
frame_en_r
)
mb_pre_start
<=
0
;
if
(
mb_pre_start_w
)
mb_pre_start
<=
1
;
else
if
(
!
mb_pre_start
[
4
]
||
(
buf_ready_w
&&
!
(
|
dct_pipeline_delay_cntr
)))
mb_pre_start
<=
mb_pre_start
<<
1
;
if
(
mb_pre_start
[
1
])
mbl_x_r
[
6
:
3
]
<=
mb_first_in_row
?
{
2'b0
,
left_marg
[
4
:
3
]
}
:
mbl_x_next_r
[
6
:
3
]
;
if
(
mb_pre_start
[
1
])
mbl_x_r
[
6
:
3
]
<=
mb_first_in_row
?
{
2'b0
,
left_marg
[
4
:
3
]
}
:
mbl_x_next_r
[
6
:
3
]
;
if
(
mb_pre_start
[
2
])
mbl_x_last_r
[
7
:
3
]
<=
{
1'b0
,
mbl_x_r
[
6
:
3
]
}
+
{
2'b0
,
mb_w_m1
[
5
:
3
]
};
if
(
mb_pre_start
[
2
])
mbl_x_last_r
[
7
:
3
]
<=
{
1'b0
,
mbl_x_r
[
6
:
3
]
}
+
{
2'b0
,
mb_w_m1
[
5
:
3
]
};
...
...
compressor_jp/jp_channel.v
View file @
530030f6
...
@@ -965,39 +965,10 @@ module jp_channel#(
...
@@ -965,39 +965,10 @@ module jp_channel#(
if
(
dct_last_in
)
first_block_dct
<=
first_block_color_after
;
if
(
dct_last_in
)
first_block_dct
<=
first_block_color_after
;
end
end
// 8x8 DCT implementing Chen algorithm and 2 passes
`ifdef
USE_OLD_XDCT393
// Each pass (1d) uses 5 DSP48E1 modules (2 - multipliers and 3 SIMD (2x24) adder/subracters
// Needs a small (<48, but did not calculate yet) pause between block if they did not come
xdct393
xdct393_i
(
// immediately after each other. This pause is needed to restart pipeline
.
clk
(
xclk
)
,
// input
.
en
(
frame_en
)
,
// input if zero will reset transpose memory page numbers
.
start
(
dct_start
)
,
// input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.
xin
(
yc_nodc
)
,
// input[9:0]
.
last_in
(
dct_last_in
)
,
// output reg output high during input of the last of 64 pixels in a 8x8 block //
.
pre_first_out
(
dct_pre_first_out
)
,
// outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
dv
()
,
// not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
d_out
(
dct_out
)
// output[12:0]
)
;
`else
xdct393r
xdct393_i
(
.
clk
(
xclk
)
,
// input
.
en
(
frame_en
)
,
// input if zero will reset transpose memory page numbers
.
start
(
dct_start
)
,
// input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.
xin
(
yc_nodc
)
,
// input[9:0]
.
last_in
(
dct_last_in
)
,
// output reg output high during input of the last of 64 pixels in a 8x8 block //
.
pre_first_out
(
dct_pre_first_out
)
,
// outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
dv
()
,
// not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
d_out
(
dct_out
)
// output[12:0]
)
;
/* New DCT, now in passive mode */
// TODO: enforce minimal pause (when not butted together)
wire
dct_last_in_debug
;
wire
dct_pre_first_out_debug
;
wire
dct_dv_debug
;
wire
[
12
:
0
]
dct_dout_debug
;
dct2d8x8_chen
#(
dct2d8x8_chen
#(
.
INPUT_WIDTH
(
10
)
,
.
INPUT_WIDTH
(
10
)
,
...
@@ -1005,27 +976,23 @@ module jp_channel#(
...
@@ -1005,27 +976,23 @@ module jp_channel#(
.
STAGE1_SAFE_BITS
(
3
)
,
.
STAGE1_SAFE_BITS
(
3
)
,
.
STAGE2_SAFE_BITS
(
3
)
,
.
STAGE2_SAFE_BITS
(
3
)
,
.
TRANSPOSE_WIDTH
(
16
)
,
.
TRANSPOSE_WIDTH
(
16
)
,
.
TRIM_STAGE_1
(
0
)
,
.
TRIM_STAGE_1
(
1
)
,
.
TRIM_STAGE_2
(
2
)
,
.
TRIM_STAGE_2
(
0
)
,
.
DSP_WIDTH
(
24
)
,
.
DSP_WIDTH
(
24
)
,
.
DSP_OUT_WIDTH
(
24
)
,
.
DSP_B_WIDTH
(
18
)
,
.
DSP_B_WIDTH
(
18
)
,
.
DSP_A_WIDTH
(
25
)
,
.
DSP_A_WIDTH
(
25
)
,
.
DSP_P_WIDTH
(
48
)
,
.
DSP_P_WIDTH
(
48
)
.
DSP_M_WIDTH
(
43
)
)
dct2d8x8_chen_i
(
)
dct2d8x8_chen_i
(
.
clk
(
xclk
)
,
// input
.
clk
(
xclk
)
,
// input
.
rst
(
!
frame_en
)
,
// input
.
rst
(
!
frame_en
)
,
// input
.
start
(
dct_start
)
,
// input
.
start
(
dct_start
)
,
// input
.
xin
(
yc_nodc
)
,
// input[9:0] signed
.
xin
(
yc_nodc
)
,
// input[9:0] signed
.
last_in
(
dct_last_in
_debug
)
,
// output reg
.
last_in
(
dct_last_in
)
,
// output reg
.
pre_first_out
(
dct_pre_first_out
_debug
)
,
// output
.
pre_first_out
(
dct_pre_first_out
)
,
// output
.
dv
(
dct_dv_debug
)
,
// output
.
dv
(
)
,
// output
.
d_out
(
dct_
dout_debug
)
// output[12:0] signed
.
d_out
(
dct_
out
)
// output[12:0] signed
)
;
)
;
`endif
wire
quant_start
;
wire
quant_start
;
dly_16
#(
.
WIDTH
(
1
))
i_quant_start
(
.
clk
(
xclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd0
)
,
.
din
(
dct_pre_first_out
)
,
.
dout
(
quant_start
))
;
// dly=0+1
dly_16
#(
.
WIDTH
(
1
))
i_quant_start
(
.
clk
(
xclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd0
)
,
.
din
(
dct_pre_first_out
)
,
.
dout
(
quant_start
))
;
// dly=0+1
...
...
compressor_jp/xdct393r.v
deleted
100644 → 0
View file @
0e866d77
/**********************************************************************
** -----------------------------------------------------------------------------**
** xdct393r.v
**
** 8x8 discrete Cosine Transform
** adding more registers to increase bandwidth
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** xdct393r is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
** Modified by Andrey Filippov - goal to make it work in start/stop mode, using
** "start" input (going together with the first data, no restriction on the gap between 64-pixel blocks (>=0)
** Removed "RST" input ("en" is only used to reset ping-pong transpose memory address)
** Split module in 2 stages
** Also saved some area - original design compiled by XST to 865 slices (XC2S300e), this one - 780!
**
** It is based on the original design (Xilix app. note XAPP610) by:
** Author: Latha Pillai
** Senior Applications Engineer
**
** Video Applications
** Advanced Products Group
** Xilinx, Inc.
**
** Copyright (c) 2001 Xilinx, Inc.
** All rights reserved
**
** Date: Feb. 10, 2002
**
** RESTRICTED RIGHTS LEGEND
**
** This software has not been published by the author, and
** has been disclosed to others for the purpose of enhancing
** and promoting design productivity in Xilinx products.
**
** Therefore use, duplication or disclosure, now and in the
** future should give consideration to the productivity
** enhancements afforded the user of this code by the author's
** efforts. Thank you for using our products !
**
** Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
** WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
** IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
** A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
***********************************************************************/
/*
after I added DC subtraction before DCT I got 9-bit (allthough not likely to go out of 8bit range) signed data.
also increased transpose memory to 9 bits (anyway it is 16-bit wide) - see if it will help to prevent saturation
without significant increase in gates
Saturatuion is still visible on real pictures, but there was a bug - addsub<i>a_comp, addsub<i>b_comp where not using their
MSB. I added 1 more bit to add_sub<i>a and add_sub<i>b and fixed that bug. Only 2 mofre slices were used
*/
`timescale
1
ns
/
1
ps
// For xdct353 - increasing data in 9 bits -> 10 bits, out 12 bits ->13 bits
module
xdct393r
(
// increased latency by 3
input
clk
,
// system clock, posedge
input
en
,
// if zero will reset transpose memory page njumbers
input
start
,
// single-cycle start pulse that goes with the first pixel data. Other 63 should follow
input
[
9
:
0
]
xin
,
// [7:0] - input data
output
reg
last_in
,
// output high during input of the last of 64 pixels in a 8x8 block
output
pre_first_out
,
// 1 cycle ahead of the first output in a 64 block
output
dv
,
// data output valid. Will go high on the 94-th cycle after the start
output
[
12
:
0
]
d_out
)
;
// [8:0]output data
wire
stage1_done
;
wire
tm_page
;
wire
tm_we
;
wire
[
6
:
0
]
tm_ra
;
wire
[
6
:
0
]
tm_wa
;
wire
[
15
:
0
]
tm_out
;
wire
[
15
:
0
]
tm_di
;
// reg stage1_done_r; // delay by one clock to use memory output register
wire
tm_re
;
// =1'b1; // TODO: generate, for now just 1'b1
wire
tm_regen
;
always
@
(
posedge
clk
)
begin
last_in
<=
(
tm_wa
[
5
:
0
]
==
6'h30
)
;
// stage1_done_r <= stage1_done;
// tm_regen <= tm_re;
end
dct393r_stage1
i_dct_stage1
(
.
clk
(
clk
)
,
.
en
(
en
)
,
.
start
(
start
)
,
.
xin
(
xin
)
,
// [7:0]
.
we
(
tm_we
)
,
// write to transpose memory
.
wr_cntr
(
tm_wa
)
,
// [6:0] transpose memory write address
.
z_out
(
tm_di
[
15
:
0
])
,
.
page
(
tm_page
)
,
.
done
(
stage1_done
))
;
dct393r_stage2
i_dct_stage2
(
.
clk
(
clk
)
,
.
en
(
en
)
,
.
start
(
stage1_done
)
,
// stage 1 finished, data available in transpose memory (extra RAM latency)
.
page
(
tm_page
)
,
// transpose memory page finished, valid at start
.
rd_cntr
(
tm_ra
[
6
:
0
])
,
// [6:0] transpose memory read address
.
ren
(
tm_re
)
,
// output
.
regen
(
tm_regen
)
,
// output reg
.
tdin
(
tm_out
[
15
:
0
])
,
// [7:0] - data from transpose memory
.
endv
(
pre_first_out
)
,
// output
.
dv
(
dv
)
,
// data output valid
.
dct2_out
(
d_out
[
12
:
0
]))
;
// [10:0]output data
ram18_var_w_var_r
#(
.
REGISTERS
(
1
)
,
.
LOG2WIDTH_WR
(
4
)
,
.
LOG2WIDTH_RD
(
4
)
,
.
DUMMY
(
0
)
)
i_transpose_mem
(
.
rclk
(
clk
)
,
// input
.
raddr
(
{
3'b0
,
tm_ra
[
6
:
0
]
}
)
,
// input[9:0]
.
ren
(
tm_re
)
,
// input
.
regen
(
tm_regen
)
,
// input
.
data_out
(
tm_out
[
15
:
0
])
,
// output[15:0]
.
wclk
(
clk
)
,
// input
.
waddr
(
{
3'b0
,
tm_wa
[
6
:
0
]
}
)
,
// input[9:0]
.
we
(
tm_we
)
,
// input
.
web
(
4'hf
)
,
// input[3:0]
.
data_in
(
tm_di
[
15
:
0
])
// input[15:0]
)
;
endmodule
// 01/24/2004: Moved all clocks in stage 1 to "negedge" to reduce current pulses
module
dct393r_stage1
(
// increased latency by 1
input
clk
,
// system clock, posedge
input
en
,
input
start
,
// single-cycle start pulse to replace RST
input
[
9
:
0
]
xin
,
// [7:0]
output
we
,
// write to transpose memory
output
[
6
:
0
]
wr_cntr
,
// [6:0] transpose memory write address
output
reg
[
15
:
0
]
z_out
,
//data to transpose memory
output
page
,
// transpose memory page just filled (valid @ done)
output
done
)
;
// last cycle writing to transpose memory - may use after it (move it earlier?)
/* constants */
localparam
C3
=
16'd54491
;
localparam
S3
=
16'd36410
;
localparam
C4
=
16'd46341
;
localparam
C6
=
16'd25080
;
localparam
S6
=
16'd60547
;
localparam
C7
=
16'd12785
;
localparam
S7
=
16'd64277
;
reg
[
16
:
0
]
memory1a
,
memory2a
,
memory3a
,
memory4a
;
/* 1D section */
/* The max value of a pixel after processing (to make their expected mean to zero)
is 127. If all the values in a row are 127, the max value of the product terms
would be (127*2)*(23170/256) and that of z_out_int would be (127*8)*23170/256.
This value divided by 2raised to 8 is equivalent to ignoring the 8 lsb bits of the value */
reg
[
9
:
0
]
xa0_in
,
xa1_in
,
xa2_in
,
xa3_in
,
xa4_in
,
xa5_in
,
xa6_in
,
xa7_in
;
reg
[
9
:
0
]
xa0_reg
,
xa1_reg
,
xa2_reg
,
xa3_reg
,
xa4_reg
,
xa5_reg
,
xa6_reg
,
xa7_reg
;
reg
[
9
:
0
]
addsub1a_comp
,
addsub2a_comp
,
addsub3a_comp
,
addsub4a_comp
;
// reg [10:0] addsub1a_comp, addsub2a_comp, addsub3a_comp, addsub4a_comp; // AF2015: increasing width - was limiting
reg
[
10
:
0
]
add_sub1a
,
add_sub2a
,
add_sub3a
,
add_sub4a
;
reg
save_sign1a
,
save_sign2a
,
save_sign3a
,
save_sign4a
;
reg
[
17
:
0
]
p1a
,
p2a
,
p3a
,
p4a
;
wire
[
35
:
0
]
p1a_all
,
p2a_all
,
p3a_all
,
p4a_all
;
reg
toggleA
;
reg
[
18
:
0
]
z_out_int1
,
z_out_int2
;
reg
[
18
:
0
]
z_out_int
;
wire
[
15
:
0
]
z_out_prelatch
;
reg
[
2
:
0
]
indexi
;
/* clks and counters */
reg
[
6
:
0
]
wr_cntr_prelatch
;
/* memory section */
reg
done_prelatch
;
reg
we_prelatch
;
wire
enwe
;
wire
pre_sxregs
;
reg
sxregs
;
reg
page_prelatch
;
// TODO: See if negedge is needed
wire
nclk
=
~
clk
;
// seems that everything here is running at negedge (and delays too), but not the transpose memory
// to conserve energy by disabling toggleA
wire
sxregs_d8
;
reg
enable_toggle
;
// SRL16_1 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs)); // dly=7+1
dly_16
#(
.
WIDTH
(
1
))
i_sxregs_d8
(
.
clk
(
nclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd7
)
,
.
din
(
sxregs
)
,
.
dout
(
sxregs_d8
))
;
// dly=7+1
// SRL16_1 i_pre_sxregs (.Q(pre_sxregs), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(start)); // dly=6+1
dly_16
#(
.
WIDTH
(
1
))
i_pre_sxregs
(
.
clk
(
nclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd6
)
,
.
din
(
start
)
,
.
dout
(
pre_sxregs
))
;
// dly=6+1
// SRL16_1 i_enwe (.Q(enwe), .A0(1'b1), .A1(1'b0), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(pre_sxregs)); // dly=5+1
dly_16
#(
.
WIDTH
(
1
))
i_enwe
(
.
clk
(
nclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd5
)
,
.
din
(
pre_sxregs
)
,
.
dout
(
enwe
))
;
// dly=5+1
always
@
(
posedge
nclk
)
begin
enable_toggle
<=
en
&&
(
sxregs
||
(
enable_toggle
&&
!
sxregs_d8
))
;
done_prelatch
<=
(
wr_cntr_prelatch
[
5
:
0
]
==
6'h3f
)
;
if
(
wr_cntr_prelatch
[
5
:
0
]
==
6'h3f
)
page_prelatch
<=
wr_cntr_prelatch
[
6
]
;
we_prelatch
<=
enwe
||
(
en
&&
we_prelatch
&&
(
wr_cntr_prelatch
[
5
:
0
]
!=
6'h3f
))
;
if
(
!
en
)
wr_cntr_prelatch
<=
7'b0
;
else
if
(
we_prelatch
)
wr_cntr_prelatch
<=
wr_cntr_prelatch
+
1
;
sxregs
<=
pre_sxregs
||
((
wr_cntr_prelatch
[
2
:
0
]
==
3'h1
)
&&
(
wr_cntr_prelatch
[
5
:
3
]
!=
3'h7
))
;
toggleA
<=
sxregs
||
(
enable_toggle
&&
(
~
toggleA
))
;
if
(
sxregs
)
indexi
<=
3'h7
;
else
if
(
enable_toggle
)
indexi
<=
indexi
+
1
;
end
/* 1D-DCT BEGIN */
// store 1D-DCT constant coefficient values for multipliers */
always
@
(
posedge
nclk
)
begin
case
(
indexi
)
0
:
begin
memory1a
<=
{
1'b0
,
C4
};
//8'd91
memory2a
<=
{
1'b0
,
C4
};
//8'd91
memory3a
<=
{
1'b0
,
C4
};
//8'd91
memory4a
<=
{
1'b0
,
C4
};
//8'd91
end
1
:
begin
memory1a
<=
{
1'b0
,
S7
};
//8'd126;
memory2a
<=
{
1'b0
,
C3
};
//8'd106;
memory3a
<=
{
1'b0
,
S3
};
//8'd71;
memory4a
<=
{
1'b0
,
C7
};
//8'd25;
end
2
:
begin
memory1a
<=
{
1'b0
,
S6
};
//8'd118;
memory2a
<=
{
1'b0
,
C6
};
//8'd49;
memory3a
<=
{
1'b1
,
C6
};
//-8'd49;
memory4a
<=
{
1'b1
,
S6
};
//-8'd118
end
3
:
begin
memory1a
<=
{
1'b0
,
C3
};
// 8'd106;
memory2a
<=
{
1'b1
,
C7
};
//-8'd25;
memory3a
<=
{
1'b1
,
S7
};
//-8'd126;
memory4a
<=
{
1'b1
,
S3
};
//-8'd71;
end
4
:
begin
memory1a
<=
{
1'b0
,
C4
};
// 8'd91;
memory2a
<=
{
1'b1
,
C4
};
//-8'd91;
memory3a
<=
{
1'b1
,
C4
};
//-8'd91;
memory4a
<=
{
1'b0
,
C4
};
// 8'd91;
end
5
:
begin
memory1a
<=
{
1'b0
,
S3
};
// 8'd71;
memory2a
<=
{
1'b1
,
S7
};
//-8'd126;
memory3a
<=
{
1'b0
,
C7
};
// 8'd25;
memory4a
<=
{
1'b0
,
C3
};
// 8'd106;
end
6
:
begin
memory1a
<=
{
1'b0
,
C6
};
// 8'd49;
memory2a
<=
{
1'b1
,
S6
};
//-8'd118;
memory3a
<=
{
1'b0
,
S6
};
// 8'd118;
memory4a
<=
{
1'b1
,
C6
};
//-8'd49;
end
7
:
begin
memory1a
<=
{
1'b0
,
C7
};
// 8'd25;
memory2a
<=
{
1'b1
,
S3
};
//-8'd71;
memory3a
<=
{
1'b0
,
C3
};
// 8'd106;
memory4a
<=
{
1'b1
,
S7
};
//-8'd126;
end
endcase
end
/* 8-bit input shifted 8 times through a shift register*/
// xa0_in will see output registers from posedge, may be replaced by latches if needed - but currently delay is under 5ns
always
@
(
posedge
nclk
)
begin
xa0_in
<=
xin
;
xa1_in
<=
xa0_in
;
xa2_in
<=
xa1_in
;
xa3_in
<=
xa2_in
;
xa4_in
<=
xa3_in
;
xa5_in
<=
xa4_in
;
xa6_in
<=
xa5_in
;
xa7_in
<=
xa6_in
;
end
/* shifted inputs registered every 8th clk (using cntr8)*/
always
@
(
posedge
nclk
)
if
(
sxregs
)
begin
xa0_reg
<=
xa0_in
;
xa1_reg
<=
xa1_in
;
xa2_reg
<=
xa2_in
;
xa3_reg
<=
xa3_in
;
xa4_reg
<=
xa4_in
;
xa5_reg
<=
xa5_in
;
xa6_reg
<=
xa6_in
;
xa7_reg
<=
xa7_in
;
end
/* adder / subtractor block */
always
@
(
negedge
clk
)
if
(
toggleA
==
1'b1
)
begin
add_sub1a
<=
{
xa7_reg
[
9
]
,
xa7_reg
[
9
:
0
]
}
+
{
xa0_reg
[
9
]
,
xa0_reg
[
9
:
0
]
};
add_sub2a
<=
{
xa6_reg
[
9
]
,
xa6_reg
[
9
:
0
]
}
+
{
xa1_reg
[
9
]
,
xa1_reg
[
9
:
0
]
};
add_sub3a
<=
{
xa5_reg
[
9
]
,
xa5_reg
[
9
:
0
]
}
+
{
xa2_reg
[
9
]
,
xa2_reg
[
9
:
0
]
};
add_sub4a
<=
{
xa4_reg
[
9
]
,
xa4_reg
[
9
:
0
]
}
+
{
xa3_reg
[
9
]
,
xa3_reg
[
9
:
0
]
};
end
else
begin
add_sub1a
<=
{
xa7_reg
[
9
]
,
xa7_reg
[
9
:
0
]
}
-
{
xa0_reg
[
9
]
,
xa0_reg
[
9
:
0
]
};
add_sub2a
<=
{
xa6_reg
[
9
]
,
xa6_reg
[
9
:
0
]
}
-
{
xa1_reg
[
9
]
,
xa1_reg
[
9
:
0
]
};
add_sub3a
<=
{
xa5_reg
[
9
]
,
xa5_reg
[
9
:
0
]
}
-
{
xa2_reg
[
9
]
,
xa2_reg
[
9
:
0
]
};
add_sub4a
<=
{
xa4_reg
[
9
]
,
xa4_reg
[
9
:
0
]
}
-
{
xa3_reg
[
9
]
,
xa3_reg
[
9
:
0
]
};
end
// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks
// Adding these wires to get rid of the MSB that is always 0
wire
[
10
:
0
]
addsub1a_comp_w
=
add_sub1a
[
10
]
?
(
-
add_sub1a
)
:
add_sub1a
;
wire
[
10
:
0
]
addsub2a_comp_w
=
add_sub2a
[
10
]
?
(
-
add_sub2a
)
:
add_sub2a
;
wire
[
10
:
0
]
addsub3a_comp_w
=
add_sub3a
[
10
]
?
(
-
add_sub3a
)
:
add_sub3a
;
wire
[
10
:
0
]
addsub4a_comp_w
=
add_sub4a
[
10
]
?
(
-
add_sub4a
)
:
add_sub4a
;
always
@
(
posedge
nclk
)
begin
save_sign1a
<=
add_sub1a
[
10
]
;
save_sign2a
<=
add_sub2a
[
10
]
;
save_sign3a
<=
add_sub3a
[
10
]
;
save_sign4a
<=
add_sub4a
[
10
]
;
addsub1a_comp
<=
addsub1a_comp_w
[
9
:
0
]
;
//add_sub1a[10]? (-add_sub1a) : add_sub1a;
addsub2a_comp
<=
addsub2a_comp_w
[
9
:
0
]
;
//add_sub2a[10]? (-add_sub2a) : add_sub2a;
addsub3a_comp
<=
addsub3a_comp_w
[
9
:
0
]
;
//add_sub3a[10]? (-add_sub3a) : add_sub3a;
addsub4a_comp
<=
addsub4a_comp_w
[
9
:
0
]
;
//add_sub4a[10]? (-add_sub4a) : add_sub4a;
end
assign
p1a_all
=
addsub1a_comp
*
memory1a
[
15
:
0
]
;
// [16] is sign!
assign
p2a_all
=
addsub2a_comp
*
memory2a
[
15
:
0
]
;
assign
p3a_all
=
addsub3a_comp
*
memory3a
[
15
:
0
]
;
assign
p4a_all
=
addsub4a_comp
*
memory4a
[
15
:
0
]
;
reg
[
17
:
0
]
p1a_all_r
;
reg
[
17
:
0
]
p2a_all_r
;
reg
[
17
:
0
]
p3a_all_r
;
reg
[
17
:
0
]
p4a_all_r
;
reg
p1a_sig
,
p2a_sig
,
p3a_sig
,
p4a_sig
;
always
@
(
posedge
nclk
)
begin
p1a_all_r
<=
p1a_all
[
26
:
9
]
;
p2a_all_r
<=
p2a_all
[
26
:
9
]
;
p3a_all_r
<=
p3a_all
[
26
:
9
]
;
p4a_all_r
<=
p4a_all
[
26
:
9
]
;
p1a_sig
<=
(
save_sign1a
^
memory1a
[
16
])
;
p2a_sig
<=
(
save_sign2a
^
memory2a
[
16
])
;
p3a_sig
<=
(
save_sign3a
^
memory3a
[
16
])
;
p4a_sig
<=
(
save_sign4a
^
memory4a
[
16
])
;
end
always
@
(
posedge
nclk
)
begin
p1a
<=
p1a_sig
?
(
-
p1a_all_r
)
:
p1a_all_r
;
p2a
<=
p2a_sig
?
(
-
p2a_all_r
)
:
p2a_all_r
;
p3a
<=
p3a_sig
?
(
-
p3a_all_r
)
:
p3a_all_r
;
p4a
<=
p4a_sig
?
(
-
p4a_all_r
)
:
p4a_all_r
;
end
/* Final adder. Adding the ouputs of the 4 multipliers */
always
@
(
posedge
nclk
)
begin
z_out_int1
<=
(
{
p1a
[
17
]
,
p1a
}
+
{
p2a
[
17
]
,
p2a
}
)
;
z_out_int2
<=
(
{
p3a
[
17
]
,
p3a
}
+
{
p4a
[
17
]
,
p4a
}
)
;
z_out_int
<=
(
z_out_int1
+
z_out_int2
)
;
end
// rounding of the value
assign
z_out_prelatch
[
15
:
0
]
=
z_out_int
[
18
:
3
]
+
z_out_int
[
2
]
;
// correct rounding
// outputs from output latches to cross clock edge boundary
always
@
(
posedge
clk
)
begin
z_out
[
15
:
0
]
<=
z_out_prelatch
[
15
:
0
]
;
// wr_cntr[6:0] <= wr_cntr_prelatch[6:0];
// done <= done_prelatch;
// we <= we_prelatch;
// page <= page_prelatch;
end
dly_16
#(
.
WIDTH
(
10
))
i_delayed_outs
(
.
clk
(
clk
)
,
.
rst
(
1'b0
)
,
.
dly
(
4'd1
)
,
.
din
(
{
wr_cntr_prelatch
[
6
:
0
]
,
done_prelatch
,
we_prelatch
,
page_prelatch
}
)
,
.
dout
(
{
wr_cntr
[
6
:
0
]
,
done
,
we
,
page
}
))
;
/* 1D-DCT END */
endmodule
module
dct393r_stage2
(
// increased latency by 2 clocks
input
clk
,
// system clock, posedge
input
en
,
input
start
,
// stage 1 finished, data available in transpose memory
input
page
,
// transpose memory page finished, valid at start
output
[
6
:
0
]
rd_cntr
,
// [6:0] transpose memory read address
output
ren
,
// read enable transpose memory
output
reg
regen
,
// register enable in transpose memory
input
[
15
:
0
]
tdin
,
// [15:0] - data from transpose memory, added 6 bit fractional part
output
reg
endv
,
// one cycle ahead of starting (continuing) dv
output
reg
dv
,
// data output valid
output
reg
[
12
:
0
]
dct2_out
)
;
// [8:0]output data
/* constants */
localparam
C3
=
16'd54491
;
localparam
S3
=
16'd36410
;
localparam
C4
=
16'd46341
;
localparam
C6
=
16'd25080
;
localparam
S6
=
16'd60547
;
localparam
C7
=
16'd12785
;
localparam
S7
=
16'd64277
;
reg
[
16
:
0
]
memory1a
,
memory2a
,
memory3a
,
memory4a
;
reg
[
2
:
0
]
indexi
;
/* 2D section */
reg
[
15
:
0
]
xb0_in
,
xb1_in
,
xb2_in
,
xb3_in
,
xb4_in
,
xb5_in
,
xb6_in
,
xb7_in
;
reg
[
15
:
0
]
xb0_reg
,
xb1_reg
,
xb2_reg
,
xb3_reg
,
xb4_reg
,
xb5_reg
,
xb6_reg
,
xb7_reg
;
reg
[
16
:
0
]
add_sub1b
,
add_sub2b
,
add_sub3b
,
add_sub4b
;
reg
[
15
:
0
]
addsub1b_comp
,
addsub2b_comp
,
addsub3b_comp
,
addsub4b_comp
;
reg
save_sign1b
,
save_sign2b
,
save_sign3b
,
save_sign4b
;
reg
[
18
:
0
]
p1b
,
p2b
,
p3b
,
p4b
;
wire
[
35
:
0
]
p1b_all
,
p2b_all
,
p3b_all
,
p4b_all
;
reg
toggleB
;
reg
[
19
:
0
]
dct2d_int1
,
dct2d_int2
;
reg
[
20
:
0
]
dct_2d_int
;
wire
[
12
:
0
]
dct_2d_rnd
;
// transpose memory read address
reg
[
5
:
0
]
rd_cntrs
;
reg
rd_page
;
// start with the same as stage1
wire
sxregs
;
// to conserve energy by disabling toggleB
wire
sxregs_d8
;
reg
enable_toggle
;
reg
en_started
;
wire
pre2_endv
;
wire
pre2_disdv
;
// AF2015: was missing
reg
pre_endv
;
reg
pre_disdv
;
reg
pre_dv
;
// SRL16 i_endv (.Q(endv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(start)); // dly=14+1
// dly_16 #(.WIDTH(1)) i_endv(.clk(clk),.rst(1'b0), .dly(4'd14), .din(start), .dout(endv)); // dly=14+1
dly_16
#(
.
WIDTH
(
1
))
i_pre2_endv
(
.
clk
(
clk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd15
)
,
.
din
(
start
)
,
.
dout
(
pre2_endv
))
;
// dly=15+1
// SRL16 i_disdv (.Q(disdv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(rd_cntrs[5:0]==6'h3f)); // dly=14+1
// dly_16 #(.WIDTH(1)) i_disdv(.clk(clk),.rst(1'b0), .dly(4'd14), .din(rd_cntrs[5:0]==6'h3f), .dout(disdv)); // dly=14+1
dly_16
#(
.
WIDTH
(
1
))
i_pre2_disdv
(
.
clk
(
clk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd15
)
,
.
din
(
rd_cntrs
[
5
:
0
]
==
6'h3f
)
,
.
dout
(
pre2_disdv
))
;
// dly=15+1
// SRL16 i_sxregs (.Q(sxregs), .A0(1'b0), .A1(1'b0), .A2(1'b0), .A3(1'b1), .CLK(clk),.D((rd_cntr[5:3]==3'h0) && en_started)); // dly=8+1
// dly_16 #(.WIDTH(1)) i_sxregs(.clk(clk),.rst(1'b0), .dly(4'd8), .din((rd_cntr[5:3]==3'h0) && en_started), .dout(sxregs)); // dly=8+1
dly_16
#(
.
WIDTH
(
1
))
i_sxregs
(
.
clk
(
clk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd9
)
,
.
din
((
rd_cntrs
[
2
:
0
]
==
3'h0
)
&&
en_started
)
,
.
dout
(
sxregs
))
;
// dly=9+1
// SRL16 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs && en_started)); // dly=7+1
dly_16
#(
.
WIDTH
(
1
))
i_sxregs_d8
(
.
clk
(
clk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd7
)
,
.
din
(
sxregs
&&
en_started
)
,
.
dout
(
sxregs_d8
))
;
// dly=7+1
assign
ren
=
en_started
;
always
@
(
posedge
clk
)
begin
enable_toggle
<=
en
&&
(
sxregs
||
(
enable_toggle
&&
!
sxregs_d8
))
;
// en_started <= en && (start || en_started);
if
(
!
en
)
en_started
<=
0
;
else
if
(
start
)
en_started
<=
1
;
else
if
(
rd_cntrs
[
5
:
0
]
==
6'h3f
)
en_started
<=
0
;
// should be after (start) as they happen simultaneously
regen
<=
en_started
;
pre_endv
<=
pre2_endv
;
endv
<=
pre_endv
;
// output reg
pre_disdv
<=
pre2_disdv
;
pre_dv
<=
en
&&
(
pre_endv
||
(
pre_dv
&&
~
pre_disdv
))
;
// dv <= en && (endv || (dv && ~disdv));
dv
<=
en
&&
pre_dv
;
// output reg
toggleB
<=
sxregs
||
(
enable_toggle
&&
(
~
toggleB
))
;
if
(
sxregs
)
indexi
<=
3'h7
;
else
if
(
enable_toggle
)
indexi
<=
indexi
+
1
;
if
(
start
)
rd_page
<=
page
;
if
(
start
)
rd_cntrs
[
5
:
0
]
<=
6'b0
;
// will always count, but that does not matter- What about saving energy ;-) ? Saved...
else
if
(
rd_cntrs
[
5
:
0
]
!=
6'h3f
)
rd_cntrs
[
5
:
0
]
<=
rd_cntrs
[
5
:
0
]
+
1
;
end
assign
rd_cntr
[
6
:
0
]
=
{
rd_page
,
rd_cntrs
[
2
:
0
]
,
rd_cntrs
[
5
:
3
]
};
// transposed counter
// duplicate memory<i>a from stage 1
// store 1D-DCT constant coeeficient values for multipliers */
always
@
(
posedge
clk
)
begin
case
(
indexi
)
0
:
begin
memory1a
<=
{
1'b0
,
C4
};
//8'd91
memory2a
<=
{
1'b0
,
C4
};
//8'd91
memory3a
<=
{
1'b0
,
C4
};
//8'd91
memory4a
<=
{
1'b0
,
C4
};
//8'd91
end
1
:
begin
memory1a
<=
{
1'b0
,
S7
};
//8'd126;
memory2a
<=
{
1'b0
,
C3
};
//8'd106;
memory3a
<=
{
1'b0
,
S3
};
//8'd71;
memory4a
<=
{
1'b0
,
C7
};
//8'd25;
end
2
:
begin
memory1a
<=
{
1'b0
,
S6
};
//8'd118;
memory2a
<=
{
1'b0
,
C6
};
//8'd49;
memory3a
<=
{
1'b1
,
C6
};
//-8'd49;
memory4a
<=
{
1'b1
,
S6
};
//-8'd118
end
3
:
begin
memory1a
<=
{
1'b0
,
C3
};
// 8'd106;
memory2a
<=
{
1'b1
,
C7
};
//-8'd25;
memory3a
<=
{
1'b1
,
S7
};
//-8'd126;
memory4a
<=
{
1'b1
,
S3
};
//-8'd71;
end
4
:
begin
memory1a
<=
{
1'b0
,
C4
};
// 8'd91;
memory2a
<=
{
1'b1
,
C4
};
//-8'd91;
memory3a
<=
{
1'b1
,
C4
};
//-8'd91;
memory4a
<=
{
1'b0
,
C4
};
// 8'd91;
end
5
:
begin
memory1a
<=
{
1'b0
,
S3
};
// 8'd71;
memory2a
<=
{
1'b1
,
S7
};
//-8'd126;
memory3a
<=
{
1'b0
,
C7
};
// 8'd25;
memory4a
<=
{
1'b0
,
C3
};
// 8'd106;
end
6
:
begin
memory1a
<=
{
1'b0
,
C6
};
// 8'd49;
memory2a
<=
{
1'b1
,
S6
};
//-8'd118;
memory3a
<=
{
1'b0
,
S6
};
// 8'd118;
memory4a
<=
{
1'b1
,
C6
};
//-8'd49;
end
7
:
begin
memory1a
<=
{
1'b0
,
C7
};
// 8'd25;
memory2a
<=
{
1'b1
,
S3
};
//-8'd71;
memory3a
<=
{
1'b0
,
C3
};
// 8'd106;
memory4a
<=
{
1'b1
,
S7
};
//-8'd126;
end
endcase
end
always
@
(
posedge
clk
)
begin
xb0_in
<=
tdin
;
xb1_in
<=
xb0_in
;
xb2_in
<=
xb1_in
;
xb3_in
<=
xb2_in
;
xb4_in
<=
xb3_in
;
xb5_in
<=
xb4_in
;
xb6_in
<=
xb5_in
;
xb7_in
<=
xb6_in
;
end
/* register inputs, inputs read in every eighth clk*/
always
@
(
posedge
clk
)
if
(
sxregs
)
begin
xb0_reg
<=
xb0_in
;
xb1_reg
<=
xb1_in
;
xb2_reg
<=
xb2_in
;
xb3_reg
<=
xb3_in
;
xb4_reg
<=
xb4_in
;
xb5_reg
<=
xb5_in
;
xb6_reg
<=
xb6_in
;
xb7_reg
<=
xb7_in
;
end
always
@
(
posedge
clk
)
if
(
toggleB
==
1'b1
)
begin
add_sub1b
<=
{
xb7_reg
[
15
]
,
xb7_reg
[
15
:
0
]
}
+
{
xb0_reg
[
15
]
,
xb0_reg
[
15
:
0
]
};
add_sub2b
<=
{
xb6_reg
[
15
]
,
xb6_reg
[
15
:
0
]
}
+
{
xb1_reg
[
15
]
,
xb1_reg
[
15
:
0
]
};
add_sub3b
<=
{
xb5_reg
[
15
]
,
xb5_reg
[
15
:
0
]
}
+
{
xb2_reg
[
15
]
,
xb2_reg
[
15
:
0
]
};
add_sub4b
<=
{
xb4_reg
[
15
]
,
xb4_reg
[
15
:
0
]
}
+
{
xb3_reg
[
15
]
,
xb3_reg
[
15
:
0
]
};
end
else
begin
add_sub1b
<=
{
xb7_reg
[
15
]
,
xb7_reg
[
15
:
0
]
}
-
{
xb0_reg
[
15
]
,
xb0_reg
[
15
:
0
]
};
add_sub2b
<=
{
xb6_reg
[
15
]
,
xb6_reg
[
15
:
0
]
}
-
{
xb1_reg
[
15
]
,
xb1_reg
[
15
:
0
]
};
add_sub3b
<=
{
xb5_reg
[
15
]
,
xb5_reg
[
15
:
0
]
}
-
{
xb2_reg
[
15
]
,
xb2_reg
[
15
:
0
]
};
add_sub4b
<=
{
xb4_reg
[
15
]
,
xb4_reg
[
15
:
0
]
}
-
{
xb3_reg
[
15
]
,
xb3_reg
[
15
:
0
]
};
end
// Adding these wires to get rid of the MSB that is always 0
wire
[
16
:
0
]
addsub1b_comp_w
=
add_sub1b
[
16
]
?
(
-
add_sub1b
)
:
add_sub1b
;
wire
[
16
:
0
]
addsub2b_comp_w
=
add_sub2b
[
16
]
?
(
-
add_sub2b
)
:
add_sub2b
;
wire
[
16
:
0
]
addsub3b_comp_w
=
add_sub3b
[
16
]
?
(
-
add_sub3b
)
:
add_sub3b
;
wire
[
16
:
0
]
addsub4b_comp_w
=
add_sub4b
[
16
]
?
(
-
add_sub4b
)
:
add_sub4b
;
always
@
(
posedge
clk
)
begin
save_sign1b
<=
add_sub1b
[
16
]
;
save_sign2b
<=
add_sub2b
[
16
]
;
save_sign3b
<=
add_sub3b
[
16
]
;
save_sign4b
<=
add_sub4b
[
16
]
;
addsub1b_comp
<=
addsub1b_comp_w
[
15
:
0
]
;
// add_sub1b[16]? (-add_sub1b) : add_sub1b;
addsub2b_comp
<=
addsub2b_comp_w
[
15
:
0
]
;
// add_sub2b[16]? (-add_sub2b) : add_sub2b;
addsub3b_comp
<=
addsub3b_comp_w
[
15
:
0
]
;
// add_sub3b[16]? (-add_sub3b) : add_sub3b;
addsub4b_comp
<=
addsub4b_comp_w
[
15
:
0
]
;
// add_sub4b[16]? (-add_sub4b) : add_sub4b;
end
assign
p1b_all
=
addsub1b_comp
*
memory1a
[
15
:
0
]
;
// MSB [16] is sign!
assign
p2b_all
=
addsub2b_comp
*
memory2a
[
15
:
0
]
;
assign
p3b_all
=
addsub3b_comp
*
memory3a
[
15
:
0
]
;
assign
p4b_all
=
addsub4b_comp
*
memory4a
[
15
:
0
]
;
reg
[
18
:
0
]
p1b_all_r
;
reg
[
18
:
0
]
p2b_all_r
;
reg
[
18
:
0
]
p3b_all_r
;
reg
[
18
:
0
]
p4b_all_r
;
reg
p1b_sig
,
p2b_sig
,
p3b_sig
,
p4b_sig
;
always
@
(
posedge
clk
)
begin
p1b_all_r
<=
p1b_all
[
32
:
14
]
;
p2b_all_r
<=
p2b_all
[
32
:
14
]
;
p3b_all_r
<=
p3b_all
[
32
:
14
]
;
p4b_all_r
<=
p4b_all
[
32
:
14
]
;
p1b_sig
<=
(
save_sign1b
^
memory1a
[
16
])
;
p2b_sig
<=
(
save_sign2b
^
memory2a
[
16
])
;
p3b_sig
<=
(
save_sign3b
^
memory3a
[
16
])
;
p4b_sig
<=
(
save_sign4b
^
memory4a
[
16
])
;
end
always
@
(
posedge
clk
)
begin
p1b
[
18
:
0
]
<=
p1b_sig
?
(
-
p1b_all_r
)
:
(
p1b_all_r
)
;
p2b
[
18
:
0
]
<=
p2b_sig
?
(
-
p2b_all_r
)
:
(
p2b_all_r
)
;
p3b
[
18
:
0
]
<=
p3b_sig
?
(
-
p3b_all_r
)
:
(
p3b_all_r
)
;
p4b
[
18
:
0
]
<=
p4b_sig
?
(
-
p4b_all_r
)
:
(
p4b_all_r
)
;
end
/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */
/* Final adder. Adding the ouputs of the 4 multipliers */
always
@
(
posedge
clk
)
begin
dct2d_int1
<=
(
{
p1b
[
18
]
,
p1b
[
18
:
0
]
}
+
{
p2b
[
18
]
,
p2b
[
18
:
0
]
}
)
;
dct2d_int2
<=
(
{
p3b
[
18
]
,
p3b
[
18
:
0
]
}
+
{
p4b
[
18
]
,
p4b
[
18
:
0
]
}
)
;
dct_2d_int
<=
(
{
dct2d_int1
[
19
]
,
dct2d_int1
[
19
:
0
]
}
+
{
dct2d_int2
[
19
]
,
dct2d_int2
[
19
:
0
]
}
)
;
if
(
pre_dv
)
dct2_out
[
12
:
0
]
<=
dct_2d_rnd
[
12
:
0
]
+
dct_2d_int
[
7
]
;
end
assign
dct_2d_rnd
[
12
:
0
]
=
dct_2d_int
[
20
:
8
]
;
// assign dct2_out[12:0] = dct_2d_rnd[12:0] + dct_2d_int[7];
endmodule
dsp/dct1d_chen.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen
* <b>Module:</b>dct1d_chen
* @file dct1d_chen.v
* @file dct1d_chen.v
* @date
:
2016-06-05
* @date
2016-06-05
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
1d 8-point DCT based on Chen algorithm
* @brief 1d 8-point DCT based on Chen algorithm
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,17 +35,19 @@
...
@@ -35,17 +35,19 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dct1d_chen
#(
module
dct1d_chen
#(
parameter
WIDTH
=
24
,
parameter
WIDTH
=
24
,
parameter
OUT_WIDTH
=
24
,
parameter
OUT_WIDTH
=
16
,
parameter
B_WIDTH
=
18
,
parameter
B_WIDTH
=
18
,
parameter
A_WIDTH
=
25
,
parameter
A_WIDTH
=
25
,
parameter
P_WIDTH
=
48
,
parameter
P_WIDTH
=
48
,
parameter
M_WIDTH
=
43
,
// actual multiplier width (== (A_WIDTH +B_WIDTH)
// parameter M_WIDTH = 43, // actual multiplier width (== (A_WIDTH +B_WIDTH)
parameter
ROUND_OUT
=
8
,
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
parameter
COSINE_SHIFT
=
17
,
parameter
COSINE_SHIFT
=
17
,
parameter
COS_1_16
=
128553
,
// (1<<17) * cos(1*pi/16)
parameter
COS_1_16
=
128553
,
// (1<<17) * cos(1*pi/16)
parameter
COS_2_16
=
121095
,
// (2<<17) * cos(1*pi/16)
parameter
COS_2_16
=
121095
,
// (2<<17) * cos(1*pi/16)
parameter
COS_3_16
=
108982
,
// (3<<17) * cos(1*pi/16)
parameter
COS_3_16
=
108982
,
// (3<<17) * cos(1*pi/16)
...
@@ -59,11 +61,13 @@ module dct1d_chen#(
...
@@ -59,11 +61,13 @@ module dct1d_chen#(
input
en
,
input
en
,
input
[
2
*
WIDTH
-
1
:
0
]
d10_32_76_54
,
// Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]}
input
[
2
*
WIDTH
-
1
:
0
]
d10_32_76_54
,
// Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]}
input
start
,
// {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]}
input
start
,
// {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]}
output
[
WIDTH
-
1
:
0
]
dout
,
output
[
OUT_WIDTH
-
1
:
0
]
dout
,
output
reg
pre2_start_out
,
// 2 clock cycle before F4 output, full dout sequence
output
reg
pre2_start_out
,
// 2 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
output
reg
en_out
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
output
reg
en_out
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
)
;
)
;
localparam
TOTAL_RSHIFT
=
COSINE_SHIFT
+
ROUND_OUT
;
localparam
BEFORE_SAT_WIDTH
=
P_WIDTH
-
TOTAL_RSHIFT
;
reg
signed
[
B_WIDTH
-
1
:
0
]
dsp_ma_bin
;
reg
signed
[
B_WIDTH
-
1
:
0
]
dsp_ma_bin
;
wire
dsp_ma_ceb1_1
;
// load b1 register
wire
dsp_ma_ceb1_1
;
// load b1 register
wire
dsp_ma_ceb2_1
;
// load b2 register
wire
dsp_ma_ceb2_1
;
// load b2 register
...
@@ -94,6 +98,7 @@ module dct1d_chen#(
...
@@ -94,6 +98,7 @@ module dct1d_chen#(
wire
dsp_ma_neg_m_2
;
// 1 - negate multiplier result
wire
dsp_ma_neg_m_2
;
// 1 - negate multiplier result
wire
dsp_ma_accum_2
;
// 0 - use multiplier result, 1 add to accumulator
wire
dsp_ma_accum_2
;
// 0 - use multiplier result, 1 add to accumulator
wire
signed
[
P_WIDTH
-
1
:
0
]
dsp_ma_p_2
;
wire
signed
[
P_WIDTH
-
1
:
0
]
dsp_ma_p_2
;
wire
signed
[
P_WIDTH
-
1
:
0
]
dsp_ma_p_mux
;
// Multipler A/D inputs before shift
// Multipler A/D inputs before shift
wire
signed
[
WIDTH
-
1
:
0
]
dsp_ma_ain24_1
;
wire
signed
[
WIDTH
-
1
:
0
]
dsp_ma_ain24_1
;
...
@@ -142,10 +147,25 @@ module dct1d_chen#(
...
@@ -142,10 +147,25 @@ module dct1d_chen#(
reg
[
7
:
0
]
phase
;
reg
[
7
:
0
]
phase
;
reg
[
2
:
0
]
phase_cnt
;
reg
[
2
:
0
]
phase_cnt
;
reg
[
OUT_WIDTH
-
1
:
0
]
dout_r
;
reg
[
OUT_WIDTH
-
1
:
0
]
dout_r
;
wire
[
OUT_WIDTH
-
1
:
0
]
dout1_w
;
// wire [OUT_WIDTH -1:0] dout1_w;
wire
[
OUT_WIDTH
-
1
:
0
]
dout2_w
;
// wire [OUT_WIDTH -1:0] dout2_w;
wire
dout_round_c
;
wire
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round_w
;
// after rounding, before (optional) saturation
reg
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round_r
;
// after rounding, before (optional) saturation
wire
[
OUT_WIDTH
-
1
:
0
]
dout_sat_w
;
wire
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round
;
// after rounding, before (optional) saturation
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
// Temporarily adding 1 extra latency cycle for rounding/saturation. TODO: Remove when moved to DSP itself
reg
pre3_start_out
;
// 3 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
reg
pre_en_out
;
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
// .ain ({simd_a1,simd_a0}), // input[47:0]
// .ain ({simd_a1,simd_a0}), // input[47:0]
// .bin ({simd_b1,simd_b0}), // input[47:0]
// .bin ({simd_b1,simd_b0}), // input[47:0]
// dsp_addsub_simd1_i input connections
// dsp_addsub_simd1_i input connections
...
@@ -233,7 +253,7 @@ module dct1d_chen#(
...
@@ -233,7 +253,7 @@ module dct1d_chen#(
assign
dsp_ma_ced_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_ced_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_sela_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_sela_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_seld_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
5
]
|
phase
[
7
]
;
assign
dsp_ma_seld_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
5
]
|
phase
[
7
]
;
assign
dsp_ma_neg_m_2
=
phase
[
6
]
;
assign
dsp_ma_neg_m_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_accum_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
4
]
|
phase
[
6
]
;
assign
dsp_ma_accum_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
4
]
|
phase
[
6
]
;
// dsp_ma2_i data input connections
// dsp_ma2_i data input connections
assign
dsp_ma_ain24_2
=
simd_p5
;
assign
dsp_ma_ain24_2
=
simd_p5
;
...
@@ -255,10 +275,37 @@ module dct1d_chen#(
...
@@ -255,10 +275,37 @@ module dct1d_chen#(
// assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign
dout1_w
=
dsp_ma_p_1
[
COSINE_SHIFT
+:
WIDTH
]
;
// adding one bit for adder (two MPY outputs are added)
assign
dsp_ma_p_mux
=
phase_cnt
[
0
]
?
dsp_ma_p_1
:
dsp_ma_p_2
;
assign
dout2_w
=
dsp_ma_p_2
[
COSINE_SHIFT
+:
WIDTH
]
;
// adding one bit for adder (two MPY outputs are added)
// assign dout1_w = dsp_ma_p_1[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign
dout_round_c
=
dsp_ma_p_mux
[
TOTAL_RSHIFT
-
1
]
;
assign
dout_round_w
=
dsp_ma_p_mux
[
TOTAL_RSHIFT
+:
BEFORE_SAT_WIDTH
]
+
dout_round_c
;
// Saturation (only if BEFORE_SAT_WIDTH > OUT_WIDTH)
localparam
TRIM_MSB
=
BEFORE_SAT_WIDTH
-
OUT_WIDTH
;
generate
if
(
TRIM_MSB
<
0
)
begin
// should never happen
assign
dout_sat_w
=
{
{
(
-
TRIM_MSB
)
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}},
dout_round
};
end
else
if
(
TRIM_MSB
==
0
)
begin
assign
dout_sat_w
=
dout_round
[
0
+:
OUT_WIDTH
]
;
end
else
begin
//! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign
dout_sat_w
=
(
dout_round
[
BEFORE_SAT_WIDTH
-
1
-:
TRIM_MSB
]
==
{
TRIM_MSB
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}}
)
?
dout_round
[
0
+:
OUT_WIDTH
]
:
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
,
{
OUT_WIDTH
-
1
{~
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}}};
end
endgenerate
// to possibly remove registers with generate
assign
dout_round
=
dout_round_r
;
//BEFORE_SAT_WIDTH
// wire dout_round_c;
// wire [OUT_WIDTH -1:0] dout_round_w;
//ROUND_OUT
//phase_cnt[0] ? dout1_w : dout2_w;
assign
dout
=
dout_r
;
assign
dout
=
dout_r
;
always
@
(
posedge
clk
)
begin
always
@
(
posedge
clk
)
begin
...
@@ -284,16 +331,24 @@ module dct1d_chen#(
...
@@ -284,16 +331,24 @@ module dct1d_chen#(
3'h6
:
dsp_ma_bin
<=
COS_4_16
;
3'h6
:
dsp_ma_bin
<=
COS_4_16
;
3'h7
:
dsp_ma_bin
<=
COS_6_16
;
3'h7
:
dsp_ma_bin
<=
COS_6_16
;
endcase
endcase
dout_r
<=
phase_cnt
[
0
]
?
dout1_w
:
dout2_w
;
// dout_r <= phase_cnt[0] ? dout1_w : dout2_w;
dout_round_r
<=
dout_round_w
;
dout_r
<=
dout_sat_w
;
if
(
rst
)
pre2_start_out
<=
0
;
if
(
rst
)
pre3_start_out
<=
0
;
else
pre2_start_out
<=
(
per_type
==
2
)
&&
phase
[
3
]
;
else
pre3_start_out
<=
(
per_type
==
2
)
&&
phase
[
3
]
;
pre2_start_out
<=
pre3_start_out
;
if
(
rst
||
!
(
en
||
(
|
phase
)))
en_out
<=
0
;
if
(
rst
||
!
(
en
||
(
|
phase
)))
pre_en_out
<=
0
;
else
if
(
phase
[
3
])
begin
else
if
(
phase
[
3
])
begin
if
(
per_type
==
2
)
en_out
<=
1
;
if
(
per_type
==
2
)
pre_en_out
<=
1
;
else
if
(
per_type
[
2
])
en_out
<=
0
;
else
if
(
per_type
[
2
])
pre_en_out
<=
0
;
end
end
en_out
<=
pre_en_out
;
end
end
dsp_addsub_simd
#(
dsp_addsub_simd
#(
...
...
dsp/dct1d_chen_reorder_in.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen_reorder_in
* <b>Module:</b>dct1d_chen_reorder_in
* @file dct1d_chen_reorder_in.v
* @file dct1d_chen_reorder_in.v
* @date
:
2016-06-08
* @date
2016-06-08
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
Reorder scan-line pixel stream for dct1d_chen module
* @brief Reorder scan-line pixel stream for dct1d_chen module
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dct1d_chen_reorder_in
#(
module
dct1d_chen_reorder_in
#(
...
...
dsp/dct1d_chen_reorder_out.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen_reorder_out
* <b>Module:</b>dct1d_chen_reorder_out
* @file dct1d_chen_reorder_out.v
* @file dct1d_chen_reorder_out.v
* @date
:
2016-06-08
* @date
2016-06-08
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
Reorder data from dct1d_chen output to natural sequence
* @brief Reorder data from dct1d_chen output to natural sequence
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dct1d_chen_reorder_out
#(
module
dct1d_chen_reorder_out
#(
...
@@ -62,6 +62,7 @@ module dct1d_chen_reorder_out#(
...
@@ -62,6 +62,7 @@ module dct1d_chen_reorder_out#(
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
reg
start_out_r
;
reg
start_out_r
;
reg
en_out_r
;
reg
en_out_r
;
wire
stop_out
;
// qualify with en
assign
dout
=
dout_r
;
assign
dout
=
dout_r
;
assign
start_out
=
start_out_r
;
assign
start_out
=
start_out_r
;
assign
en_out
=
en_out_r
;
assign
en_out
=
en_out_r
;
...
@@ -98,16 +99,30 @@ module dct1d_chen_reorder_out#(
...
@@ -98,16 +99,30 @@ module dct1d_chen_reorder_out#(
if
((
per_type
==
2
)
&&
(
cntr_in
==
1
))
raddr
<=
{~
cntr_in
[
3
]
,
3'b0
};
if
((
per_type
==
2
)
&&
(
cntr_in
==
1
))
raddr
<=
{~
cntr_in
[
3
]
,
3'b0
};
else
if
((
raddr
[
2
:
0
]
!=
0
)
||
(
per_type
!=
0
))
raddr
<=
raddr
+
1
;
else
if
((
raddr
[
2
:
0
]
!=
0
)
||
(
per_type
!=
0
))
raddr
<=
raddr
+
1
;
dout_r
<=
reord_buf_ram
[
raddr
]
;
if
(
en_out_r
)
dout_r
<=
reord_buf_ram
[
raddr
]
;
start_out_r
<=
(
per_type
==
2
)
&&
(
cntr_in
==
1
)
;
start_out_r
<=
(
per_type
==
2
)
&&
(
cntr_in
==
1
)
;
if
(
rst
||
(
per_type
==
0
)
)
en_out_r
<=
0
;
if
(
rst
||
(
per_type
==
0
)
)
en_out_r
<=
0
;
else
if
(
cntr_in
==
1
)
en_out_r
<=
(
per_type
==
2
)
||
!
per_type
[
2
]
;
// else if (cntr_in == 1) en_out_r <= (per_type == 2) || !per_type[2];
else
if
((
cntr_in
==
1
)
&&
(
per_type
==
2
))
en_out_r
<=
1
;
if
(
rst
)
dv
<=
0
;
else
if
(
stop_out
&&
!
en
)
en_out_r
<=
0
;
else
if
(
start_out_r
)
dv
<=
1
;
//stop_out
else
if
((
raddr
[
2
:
0
]
==
0
)
&&
!
en_out_r
)
dv
<=
0
;
dv
<=
en_out_r
;
// if (rst) dv <= 0;
// else if (start_out_r) dv <= 1;
// else if ((raddr[2:0] == 0) && !en_out_r) dv <= 0;
end
end
dly01_16
dly01_16_i
(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
dly
(
4'd8
)
,
// input[3:0]
.
din
((
&
cntr_in
[
2
:
0
])
&&
!
en
)
,
// input
.
dout
(
stop_out
)
// output
)
;
endmodule
endmodule
dsp/dct2d8x8_chen.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct2d8x8_chen
* <b>Module:</b>dct2d8x8_chen
* @file dct2d8x8_chen.v
* @file dct2d8x8_chen.v
* @date
:
2016-06-10
* @date
2016-06-10
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
2-d DCT implementation of Chen algorithm
* @brief 2-d DCT implementation of Chen algorithm
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dct2d8x8_chen
#(
module
dct2d8x8_chen
#(
...
@@ -45,13 +45,13 @@ module dct2d8x8_chen#(
...
@@ -45,13 +45,13 @@ module dct2d8x8_chen#(
parameter
STAGE2_SAFE_BITS
=
3
,
// leave this number of extra bits on DCT1D input to prevent output saturation
parameter
STAGE2_SAFE_BITS
=
3
,
// leave this number of extra bits on DCT1D input to prevent output saturation
parameter
TRANSPOSE_WIDTH
=
16
,
// transpose memory width
parameter
TRANSPOSE_WIDTH
=
16
,
// transpose memory width
parameter
TRIM_STAGE_1
=
1
,
// Trim these MSBs from the stage1 results (1 - matches old DCT)
parameter
TRIM_STAGE_1
=
1
,
// Trim these MSBs from the stage1 results (1 - matches old DCT)
parameter
TRIM_STAGE_2
=
2
,
// Trim these MSBs from the stage2 results TODO: put real value
parameter
TRIM_STAGE_2
=
0
,
// Trim these MSBs from the stage2 results
parameter
DSP_WIDTH
=
24
,
parameter
DSP_WIDTH
=
24
,
parameter
DSP_OUT_WIDTH
=
24
,
//
parameter DSP_OUT_WIDTH = 24,
parameter
DSP_B_WIDTH
=
18
,
parameter
DSP_B_WIDTH
=
18
,
parameter
DSP_A_WIDTH
=
25
,
parameter
DSP_A_WIDTH
=
25
,
parameter
DSP_P_WIDTH
=
48
,
parameter
DSP_P_WIDTH
=
48
parameter
DSP_M_WIDTH
=
43
// actual multiplier width (== (A_WIDTH +B_WIDTH)
//
parameter DSP_M_WIDTH = 43 // actual multiplier width (== (A_WIDTH +B_WIDTH)
)
(
)
(
input
clk
,
/// system clock, posedge
input
clk
,
/// system clock, posedge
input
rst
,
// sync reset
input
rst
,
// sync reset
...
@@ -68,6 +68,8 @@ module dct2d8x8_chen#(
...
@@ -68,6 +68,8 @@ module dct2d8x8_chen#(
localparam
REPLICATE_IN_STAGE2
=
STAGE2_SAFE_BITS
;
localparam
REPLICATE_IN_STAGE2
=
STAGE2_SAFE_BITS
;
localparam
PAD_IN_STAGE2
=
DSP_WIDTH
-
TRANSPOSE_WIDTH
-
STAGE2_SAFE_BITS
;
localparam
PAD_IN_STAGE2
=
DSP_WIDTH
-
TRANSPOSE_WIDTH
-
STAGE2_SAFE_BITS
;
localparam
ROUND_STAGE1
=
DSP_WIDTH
-
TRANSPOSE_WIDTH
-
TRIM_STAGE_1
;
localparam
ROUND_STAGE2
=
DSP_WIDTH
-
OUTPUT_WIDTH
-
TRIM_STAGE_2
;
reg
signed
[
INPUT_WIDTH
-
1
:
0
]
xin_r
;
reg
signed
[
INPUT_WIDTH
-
1
:
0
]
xin_r
;
...
@@ -82,7 +84,7 @@ module dct2d8x8_chen#(
...
@@ -82,7 +84,7 @@ module dct2d8x8_chen#(
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_l
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_l
;
wire
signed
[
DSP_OUT
_WIDTH
-
1
:
0
]
dct1_out
;
wire
signed
[
TRANSPOSE
_WIDTH
-
1
:
0
]
dct1_out
;
wire
stage1_pre2_start_out
;
wire
stage1_pre2_start_out
;
// wire stage1_pre2_en_out;
// wire stage1_pre2_en_out;
...
@@ -94,20 +96,43 @@ module dct2d8x8_chen#(
...
@@ -94,20 +96,43 @@ module dct2d8x8_chen#(
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_l
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_l
;
wire
signed
[
DSP_O
UT_WIDTH
-
1
:
0
]
dct2_out
;
wire
signed
[
OUTP
UT_WIDTH
-
1
:
0
]
dct2_out
;
wire
stage2_pre2_start_out
;
wire
stage2_pre2_start_out
;
wire
stage2_pre2_en_out
;
wire
stage2_pre2_en_out
;
wire
signed
[
OUTPUT_WIDTH
-
1
:
0
]
dct2_trimmed
;
//
wire signed [OUTPUT_WIDTH-1:0] dct2_trimmed;
assign
dct1in_pad_h
=
{{
REPLICATE_IN_STAGE1
{
dct1in_h
[
INPUT_WIDTH
-
1
]
}},
dct1in_h
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
dct1in_pad_h
=
{{
REPLICATE_IN_STAGE1
{
dct1in_h
[
INPUT_WIDTH
-
1
]
}},
dct1in_h
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
dct1in_pad_l
=
{{
REPLICATE_IN_STAGE1
{
dct1in_l
[
INPUT_WIDTH
-
1
]
}},
dct1in_l
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
dct1in_pad_l
=
{{
REPLICATE_IN_STAGE1
{
dct1in_l
[
INPUT_WIDTH
-
1
]
}},
dct1in_l
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
transpose_din
=
dct1_out
[
DSP_OUT_WIDTH
-
1
-
TRIM_STAGE_1
-:
TRANSPOSE_WIDTH
]
;
assign
transpose_din
=
dct1_out
;
/*
generate
if (TRIM_STAGE_1 == 0) begin
assign transpose_din = dct1_out[DSP_OUT_WIDTH-1 -:TRANSPOSE_WIDTH];
end else begin //! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign transpose_din = (dct1_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_1] == {TRIM_STAGE_1{dct1_out[DSP_OUT_WIDTH-1]}})?
dct1_out[DSP_OUT_WIDTH-1-TRIM_STAGE_1 -: TRANSPOSE_WIDTH]:
{dct1_out[DSP_OUT_WIDTH-1], {TRANSPOSE_WIDTH-1{~dct1_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
assign
dct2in_pad_h
=
{{
REPLICATE_IN_STAGE2
{
transpose_douth
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_douth
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2in_pad_h
=
{{
REPLICATE_IN_STAGE2
{
transpose_douth
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_douth
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2in_pad_l
=
{{
REPLICATE_IN_STAGE2
{
transpose_doutl
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_doutl
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2in_pad_l
=
{{
REPLICATE_IN_STAGE2
{
transpose_doutl
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_doutl
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2_trimmed
=
dct2_out
[
DSP_OUT_WIDTH
-
1
-
TRIM_STAGE_2
-:
OUTPUT_WIDTH
]
;
// assign dct2_trimmed = dct2_out;
/*
generate
if (TRIM_STAGE_2 == 0) begin
assign dct2_trimmed = dct2_out[DSP_OUT_WIDTH-1 -: OUTPUT_WIDTH];
end else begin //! saturate. Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign dct2_trimmed = (dct2_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_2] == {TRIM_STAGE_2{dct2_out[DSP_OUT_WIDTH-1]}})?
dct2_out[DSP_OUT_WIDTH-1-TRIM_STAGE_2 -:OUTPUT_WIDTH]:
{dct2_out[DSP_OUT_WIDTH-1], {OUTPUT_WIDTH-1{~dct2_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
always
@
(
posedge
clk
)
begin
always
@
(
posedge
clk
)
begin
start_in_r
<=
start
;
start_in_r
<=
start
;
...
@@ -141,11 +166,11 @@ module dct2d8x8_chen#(
...
@@ -141,11 +166,11 @@ module dct2d8x8_chen#(
wire
dbg_stage1_pre2_en_out
;
wire
dbg_stage1_pre2_en_out
;
dct1d_chen
#(
dct1d_chen
#(
.
WIDTH
(
DSP_WIDTH
)
,
.
WIDTH
(
DSP_WIDTH
)
,
.
OUT_WIDTH
(
DSP_OUT_WIDTH
)
,
.
OUT_WIDTH
(
TRANSPOSE_WIDTH
)
,
//
DSP_OUT_WIDTH),
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
M_WIDTH
(
DSP_M_WIDTH
)
.
ROUND_OUT
(
ROUND_STAGE1
)
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
)
dct1d_chen_stage1_i
(
)
dct1d_chen_stage1_i
(
.
clk
(
clk
)
,
// input
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
rst
(
rst
)
,
// input
...
@@ -170,12 +195,12 @@ module dct2d8x8_chen#(
...
@@ -170,12 +195,12 @@ module dct2d8x8_chen#(
)
;
)
;
dct1d_chen
#(
dct1d_chen
#(
.
WIDTH
(
DSP_WIDTH
)
,
.
WIDTH
(
DSP_WIDTH
)
,
.
OUT_WIDTH
(
DSP_O
UT_WIDTH
)
,
.
OUT_WIDTH
(
OUTP
UT_WIDTH
)
,
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
M_WIDTH
(
DSP_M_WIDTH
)
.
ROUND_OUT
(
ROUND_STAGE2
)
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
)
dct1d_chen_stage2_i
(
)
dct1d_chen_stage2_i
(
.
clk
(
clk
)
,
// input
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
rst
(
rst
)
,
// input
...
@@ -193,7 +218,7 @@ module dct2d8x8_chen#(
...
@@ -193,7 +218,7 @@ module dct2d8x8_chen#(
.
clk
(
clk
)
,
// input
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
rst
(
rst
)
,
// input
.
en
(
stage2_pre2_en_out
)
,
// input
.
en
(
stage2_pre2_en_out
)
,
// input
.
din
(
dct2_
trimmed
)
,
// input[23:0]
.
din
(
dct2_
out
)
,
// input[23:0]
.
pre2_start
(
stage2_pre2_start_out
)
,
// input
.
pre2_start
(
stage2_pre2_start_out
)
,
// input
.
dout
(
d_out
)
,
// output[23:0]
.
dout
(
d_out
)
,
// output[23:0]
.
start_out
(
pre_first_out
)
,
// output reg
.
start_out
(
pre_first_out
)
,
// output reg
...
@@ -202,13 +227,16 @@ module dct2d8x8_chen#(
...
@@ -202,13 +227,16 @@ module dct2d8x8_chen#(
)
;
)
;
// Just for debugging/comparing with old 1-d DCT:
// Just for debugging/comparing with old 1-d DCT:
wire
[
DSP_WIDTH
-
1
:
0
]
dbg_d_out
;
`ifdef
SIMULATION
// no sense to synthesize it
`ifdef
DEBUG_DCT1D
wire
[
TRANSPOSE_WIDTH
-
1
:
0
]
dbg_d_out
;
//wire [15:0] dbg_d_out13=dbg_d_out[7 +: 16] ;
wire
dbg_dv
;
wire
dbg_dv
;
wire
dbg_en_out
;
wire
dbg_en_out
;
wire
dbg_pre_first_out
;
wire
dbg_pre_first_out
;
dct1d_chen_reorder_out
#(
dct1d_chen_reorder_out
#(
.
WIDTH
(
DSP
_WIDTH
)
.
WIDTH
(
TRANSPOSE
_WIDTH
)
)
dct1d_chen_reorder_out_dbg_i
(
)
dct1d_chen_reorder_out_dbg_i
(
.
clk
(
clk
)
,
// input
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
rst
(
rst
)
,
// input
...
@@ -220,5 +248,7 @@ wire dbg_pre_first_out;
...
@@ -220,5 +248,7 @@ wire dbg_pre_first_out;
.
dv
(
dbg_dv
)
,
// output reg
.
dv
(
dbg_dv
)
,
// output reg
.
en_out
(
dbg_en_out
)
// output reg
.
en_out
(
dbg_en_out
)
// output reg
)
;
)
;
`endif
`endif
endmodule
endmodule
dsp/dct_chen_transpose.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct_chen_transpose
* <b>Module:</b>dct_chen_transpose
* @file dct_chen_transpose.v
* @file dct_chen_transpose.v
* @date
:
2016-06-09
* @date
2016-06-09
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
Reorder+transpose data between two 1-d DCT passes
* @brief Reorder+transpose data between two 1-d DCT passes
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dct_chen_transpose
#(
module
dct_chen_transpose
#(
...
@@ -70,6 +70,7 @@ module dct_chen_transpose#(
...
@@ -70,6 +70,7 @@ module dct_chen_transpose#(
reg
[
2
*
WIDTH
-
1
:
0
]
ram_reg2
;
reg
[
2
*
WIDTH
-
1
:
0
]
ram_reg2
;
wire
pre_rstart_w
=
wcntr
[
5
:
0
]
==
61
;
wire
pre_rstart_w
=
wcntr
[
5
:
0
]
==
61
;
reg
[
1
:
0
]
rstop_r
;
reg
[
1
:
0
]
rstop_r
;
reg
first_after_pause
;
// first block after pause - do not write 2 items to the "past"
assign
wpage
=
wcntr
[
6
]
^
wrow_mod
[
3
]
;
// previous page for row 0, col 1 & 3
assign
wpage
=
wcntr
[
6
]
^
wrow_mod
[
3
]
;
// previous page for row 0, col 1 & 3
assign
wrow_mod
=
{
1'b0
,
wrow
}
-
wcol13
;
assign
wrow_mod
=
{
1'b0
,
wrow
}
-
wcol13
;
...
@@ -93,7 +94,7 @@ module dct_chen_transpose#(
...
@@ -93,7 +94,7 @@ module dct_chen_transpose#(
else
if
(
pre_we_r
)
wcntr
<=
wcntr
+
1
;
// including page, should be before 'if (pre2_start)'
else
if
(
pre_we_r
)
wcntr
<=
wcntr
+
1
;
// including page, should be before 'if (pre2_start)'
else
if
(
pre2_start
)
wcntr
<=
{
wcntr
[
6
]
,
6'b0
};
// if happens during pre_we_r - will be ignored, otherwise (after pause) will zero in-page adderss
else
if
(
pre2_start
)
wcntr
<=
{
wcntr
[
6
]
,
6'b0
};
// if happens during pre_we_r - will be ignored, otherwise (after pause) will zero in-page adderss
we_r
<=
pre_we_r
;
we_r
<=
pre_we_r
&&
(
!
first_after_pause
||
!
wcol13
||
(
|
wrow
))
;
// do not write first after pause to the "past"
if
(
we_r
)
transpose_ram
[
waddr
]
<=
din
;
if
(
we_r
)
transpose_ram
[
waddr
]
<=
din
;
...
@@ -118,6 +119,11 @@ module dct_chen_transpose#(
...
@@ -118,6 +119,11 @@ module dct_chen_transpose#(
if
(
rst
)
en_out
<=
0
;
if
(
rst
)
en_out
<=
0
;
else
if
(
rcntr
==
1
)
en_out
<=
1
;
else
if
(
rcntr
==
1
)
en_out
<=
1
;
else
if
(
rstop_r
[
1
])
en_out
<=
0
;
else
if
(
rstop_r
[
1
])
en_out
<=
0
;
if
(
rst
)
first_after_pause
<=
0
;
else
if
(
pre2_start
&&
!
we_r
)
first_after_pause
<=
1
;
else
if
(
&
wcntr
[
5
:
0
])
first_after_pause
<=
0
;
end
end
dly01_16
dly01_16_stop_i
(
dly01_16
dly01_16_stop_i
(
...
...
dsp/dsp_addsub_simd.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dsp_addsub_simd
* <b>Module:</b>dsp_addsub_simd
* @file dsp_addsub_simd.v
* @file dsp_addsub_simd.v
* @date
:
2016-06-05
* @date
2016-06-05
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
SIMD adder/subtracter
* @brief SIMD adder/subtracter
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dsp_addsub_simd
#(
module
dsp_addsub_simd
#(
...
@@ -70,7 +70,7 @@ module dsp_addsub_simd#(
...
@@ -70,7 +70,7 @@ module dsp_addsub_simd#(
DSP48E1
#(
DSP48E1
#(
.
ACASCREG
(
1
)
,
.
ACASCREG
(
1
)
,
.
ADREG
(
0
)
,
// (
1),
.
ADREG
(
1
)
,
.
ALUMODEREG
(
1
)
,
.
ALUMODEREG
(
1
)
,
.
AREG
(
1
)
,
// (1)
.
AREG
(
1
)
,
// (1)
.
AUTORESET_PATDET
(
"NO_RESET"
)
,
.
AUTORESET_PATDET
(
"NO_RESET"
)
,
...
@@ -81,7 +81,7 @@ module dsp_addsub_simd#(
...
@@ -81,7 +81,7 @@ module dsp_addsub_simd#(
.
CARRYINREG
(
1
)
,
.
CARRYINREG
(
1
)
,
.
CARRYINSELREG
(
1
)
,
.
CARRYINSELREG
(
1
)
,
.
CREG
(
1
)
,
//(1),
.
CREG
(
1
)
,
//(1),
.
DREG
(
0
)
,
//(
1),
.
DREG
(
1
)
,
.
INMODEREG
(
1
)
,
.
INMODEREG
(
1
)
,
.
IS_ALUMODE_INVERTED
(
4'b0
)
,
.
IS_ALUMODE_INVERTED
(
4'b0
)
,
.
IS_CARRYIN_INVERTED
(
1'b0
)
,
.
IS_CARRYIN_INVERTED
(
1'b0
)
,
...
@@ -131,7 +131,7 @@ module dsp_addsub_simd#(
...
@@ -131,7 +131,7 @@ module dsp_addsub_simd#(
.
CECTRL
(
1'b1
)
,
// input
.
CECTRL
(
1'b1
)
,
// input
.
CED
(
1'b0
)
,
// input
.
CED
(
1'b0
)
,
// input
.
CEINMODE
(
1'b1
)
,
// input
.
CEINMODE
(
1'b1
)
,
// input
.
CEM
(
1'b
1
)
,
// input
.
CEM
(
1'b
0
)
,
// input
.
CEP
(
cep
)
,
// input
.
CEP
(
cep
)
,
// input
.
CLK
(
clk
)
,
// input
.
CLK
(
clk
)
,
// input
.
D
(
25'h1ffffff
)
,
// input[24:0]
.
D
(
25'h1ffffff
)
,
// input[24:0]
...
@@ -145,9 +145,9 @@ module dsp_addsub_simd#(
...
@@ -145,9 +145,9 @@ module dsp_addsub_simd#(
.
RSTB
(
rst
)
,
// input
.
RSTB
(
rst
)
,
// input
.
RSTC
(
rst
)
,
// input
.
RSTC
(
rst
)
,
// input
.
RSTCTRL
(
rst
)
,
// input
.
RSTCTRL
(
rst
)
,
// input
.
RSTD
(
rst
)
,
// input
.
RSTD
(
1'b0
)
,
// input
.
RSTINMODE
(
rst
)
,
// input
.
RSTINMODE
(
rst
)
,
// input
.
RSTM
(
rst
)
,
// input
.
RSTM
(
1'b0
)
,
// input
.
RSTP
(
rst
)
// input
.
RSTP
(
rst
)
// input
)
;
)
;
`else
`else
...
...
dsp/dsp_ma.v
View file @
530030f6
/*
******************************************************************************
/*
!
* dsp_ma
* dsp_ma
* @file dsp_ma.v
* @file dsp_ma.v
* @date
:
2016-06-05
* @date
2016-06-05
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
DSP with multi-input multiplier and accumulator
* @brief DSP with multi-input multiplier and accumulator
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dsp_ma
#(
module
dsp_ma
#(
...
...
dsp/dsp_ma_preadd.v
View file @
530030f6
/*
******************************************************************************
/*
!
* dsp_ma_preadd
* dsp_ma_preadd
* @file dsp_ma_preadd.v
* @file dsp_ma_preadd.v
* @date
:
2016-06-05
* @date
2016-06-05
* @author
:
Andrey Filippov
* @author
Andrey Filippov
*
*
* @brief
:
DSP with multi-input multiplier and accumulator with pre-adder
* @brief DSP with multi-input multiplier and accumulator with pre-adder
*
*
* @copyright Copyright (c) 2016 Elphel, Inc.
* @copyright Copyright (c) 2016 Elphel, Inc.
*
*
...
@@ -35,7 +35,7 @@
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
`timescale
1
ns
/
1
ps
module
dsp_ma_preadd
#(
module
dsp_ma_preadd
#(
...
...
fpga_version.vh
View file @
530030f6
...
@@ -35,21 +35,23 @@
...
@@ -35,21 +35,23 @@
* contains all the components and scripts required to completely simulate it
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
* with at least one of the Free Software programs.
*/
*/
parameter FPGA_VERSION = 32'h03930096; // serial, next
parameter FPGA_VERSION = 32'h03930098; // serial, trying dct_chen - works, removing old completely
// parameter FPGA_VERSION = 32'h03930095; // parallel -0.068/-0.342/5 82.38%
// parameter FPGA_VERSION = 32'h03930097; // serial, trying dct_chen - works
// parameter FPGA_VERSION = 32'h03930094; // hispi, disabling debug -0.187/-1.252/16 84.14%
// parameter FPGA_VERSION = 32'h03930096; // serial, next (before changing DCT)
// parameter FPGA_VERSION = 32'h03930093; // hispi, masking sensor data to memory buffer, debug still on
// parameter FPGA_VERSION = 32'h03930095; // parallel -0.068/-0.342/5 82.38%
// parameter FPGA_VERSION = 32'h03930092; // hispi, even more debugging memory pages sens-> memory
// parameter FPGA_VERSION = 32'h03930094; // hispi, disabling debug -0.187/-1.252/16 84.14%
// parameter FPGA_VERSION = 32'h03930091; // hispi, more debugging memory pages sens-> memory
// parameter FPGA_VERSION = 32'h03930093; // hispi, masking sensor data to memory buffer, debug still on
// parameter FPGA_VERSION = 32'h03930090; // hispi, debugging memory pages sens-> memory (not met)
// parameter FPGA_VERSION = 32'h03930092; // hispi, even more debugging memory pages sens-> memory
// parameter FPGA_VERSION = 32'h0393008f; // parallel, all the same
// parameter FPGA_VERSION = 32'h03930091; // hispi, more debugging memory pages sens-> memory
// parameter FPGA_VERSION = 32'h0393008e; // hispi, adding i2c fifo fill, all met,83.73%
// parameter FPGA_VERSION = 32'h03930090; // hispi, debugging memory pages sens-> memory (not met)
// parameter FPGA_VERSION = 32'h0393008d; // parallel, adding i2c fifo fill max err 0.128, 82.61%
// parameter FPGA_VERSION = 32'h0393008f; // parallel, all the same
// parameter FPGA_VERSION = 32'h0393008c; // hispi, all met, 83.55%
// parameter FPGA_VERSION = 32'h0393008e; // hispi, adding i2c fifo fill, all met,83.73%
// parameter FPGA_VERSION = 32'h0393008b; // parallel, all met, 82.06% . Reran 0.051ns error, 82.02%
// parameter FPGA_VERSION = 32'h0393008d; // parallel, adding i2c fifo fill max err 0.128, 82.61%
// parameter FPGA_VERSION = 32'h0393008a; // HiSPI sensor (14 MPix) no timing errors
// parameter FPGA_VERSION = 32'h0393008c; // hispi, all met, 83.55%
// parameter FPGA_VERSION = 32'h03930089; // Auto-synchronizing i2c sequencers with the command ones
// parameter FPGA_VERSION = 32'h0393008b; // parallel, all met, 82.06% . Reran 0.051ns error, 82.02%
// parameter FPGA_VERSION = 32'h03930088; // Fixing circbuf rollover pointers bug (only one path violated)
// parameter FPGA_VERSION = 32'h0393008a; // HiSPI sensor (14 MPix) no timing errors
// parameter FPGA_VERSION = 32'h03930089; // Auto-synchronizing i2c sequencers with the command ones
// parameter FPGA_VERSION = 32'h03930088; // Fixing circbuf rollover pointers bug (only one path violated)
// parameter FPGA_VERSION = 32'h03930087; // Fixed default 90% quantization table
// parameter FPGA_VERSION = 32'h03930087; // Fixed default 90% quantization table
// parameter FPGA_VERSION = 32'h03930087; // Synchronizing i2c sequencer frame number with that of a command sequencer
// parameter FPGA_VERSION = 32'h03930087; // Synchronizing i2c sequencer frame number with that of a command sequencer
// parameter FPGA_VERSION = 32'h03930086; // Adding byte-wide JTAG read to speed-up 10359 load
// parameter FPGA_VERSION = 32'h03930086; // Adding byte-wide JTAG read to speed-up 10359 load
...
...
py393/x393_jpeg.py
View file @
530030f6
...
@@ -1104,6 +1104,10 @@ write_sensor_i2c 0 1 0 0x302e0010
...
@@ -1104,6 +1104,10 @@ write_sensor_i2c 0 1 0 0x302e0010
#Exposure 0x800 lines
#Exposure 0x800 lines
write_sensor_i2c 0 1 0 0x30120800
write_sensor_i2c 0 1 0 0x30120800
#test - running 8, 8-bit
write_sensor_i2c 0 1 0 0x30700101
################## Serial - chn3 ####################
################## Serial - chn3 ####################
cd /usr/local/verilog/; test_mcntrl.py @hargs
cd /usr/local/verilog/; test_mcntrl.py @hargs
bitstream_set_path /usr/local/verilog/x393_hispi.bit
bitstream_set_path /usr/local/verilog/x393_hispi.bit
...
@@ -1124,7 +1128,7 @@ write_sensor_i2c 3 1 0 0x3028000a
...
@@ -1124,7 +1128,7 @@ write_sensor_i2c 3 1 0 0x3028000a
write_sensor_i2c 3 1 0 0x302c000d
write_sensor_i2c 3 1 0 0x302c000d
write_sensor_i2c 3 1 0 0x302e0010
write_sensor_i2c 3 1 0 0x302e0010
#exposure
#exposure
write_sensor_i2c 3 1 0 0x30120
2
00
write_sensor_i2c 3 1 0 0x30120
8
00
compressor_control 3 2
compressor_control 3 2
...
...
system_defines.vh
View file @
530030f6
...
@@ -42,6 +42,8 @@
...
@@ -42,6 +42,8 @@
`define SYSTEM_DEFINES
`define SYSTEM_DEFINES
// TODO: Later compare instantiate/infer
// TODO: Later compare instantiate/infer
`define INSTANTIATE_DSP48E1
`define INSTANTIATE_DSP48E1
`define DEBUG_DCT1D // undefine after debugging is over
// `define USE_OLD_DCT
// Parameters from x393_sata project
// Parameters from x393_sata project
`define USE_DRP
`define USE_DRP
...
...
x393_1_7_0.Doxyfile
View file @
530030f6
...
@@ -38,7 +38,7 @@ PROJECT_NUMBER = 1.0
...
@@ -38,7 +38,7 @@ PROJECT_NUMBER = 1.0
# If a relative path is entered, it will be relative to the location
# If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used.
# where doxygen was started. If left blank the current directory will be used.
OUTPUT_DIRECTORY =
OUTPUT_DIRECTORY =
x393_docs
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# 4096 sub-directories (in 2 levels) under the output directory of each output
# 4096 sub-directories (in 2 levels) under the output directory of each output
...
@@ -616,7 +616,7 @@ RECURSIVE = YES
...
@@ -616,7 +616,7 @@ RECURSIVE = YES
# excluded from the INPUT source files. This way you can easily exclude a
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE = unisims ddr3 x353 debug helpers html py393 glbl.v IVERILOG_INCLUDE.v x393_sata/system_defines.vh x393_sata/top_tmp.v
EXCLUDE = unisims ddr3 x353 debug helpers html py393 glbl.v IVERILOG_INCLUDE.v x393_sata/system_defines.vh x393_sata/top_tmp.v
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
# directories that are symbolic links (a Unix filesystem feature) are excluded
# directories that are symbolic links (a Unix filesystem feature) are excluded
...
...
x393_testbench04.sav
View file @
530030f6
[*]
[*]
[*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI
[*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI
[*]
Sun Jun 12 10:04:58
2016
[*]
Mon Jun 13 02:28:45
2016
[*]
[*]
[dumpfile] "/home/andrey/git/x393/simulation/x393_testbench03-20160612
033213998
.fst"
[dumpfile] "/home/andrey/git/x393/simulation/x393_testbench03-20160612
183504062
.fst"
[dumpfile_mtime] "
Sun Jun 12 09:48:19
2016"
[dumpfile_mtime] "
Mon Jun 13 00:51:06
2016"
[dumpfile_size] 85
326946
[dumpfile_size] 85
539825
[savefile] "/home/andrey/git/x393/x393_testbench04.sav"
[savefile] "/home/andrey/git/x393/x393_testbench04.sav"
[timestart]
906968
00
[timestart]
749000
00
[size] 1823 1180
[size] 1823 1180
[pos] 0 0
[pos] 0 0
*-
15.073349 90841667
209370000 209396667 209423333 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
*-
22.194141 94601000
209370000 209396667 209423333 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] x393_testbench03.
[treeopen] x393_testbench03.
[treeopen] x393_testbench03.read_compressor_frame_irq.
[treeopen] x393_testbench03.read_compressor_frame_irq.
[treeopen] x393_testbench03.read_contol_register_irq.
[treeopen] x393_testbench03.read_contol_register_irq.
...
@@ -33,6 +33,11 @@
...
@@ -33,6 +33,11 @@
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[2].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[2].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[3].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[3].
[treeopen] x393_testbench03.x393_i.compressor393_i.genblk3.
[treeopen] x393_testbench03.x393_i.compressor393_i.genblk3.
...
@@ -62,10 +67,10 @@
...
@@ -62,10 +67,10 @@
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.sensor_i2c_io_i.
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.sensor_i2c_io_i.
[sst_width]
238
[sst_width]
395
[signals_width]
260
[signals_width]
338
[sst_expanded] 1
[sst_expanded] 1
[sst_vpaned_height] 42
0
[sst_vpaned_height] 42
1
@820
@820
x393_testbench03.TEST_TITLE[639:0]
x393_testbench03.TEST_TITLE[639:0]
@c00200
@c00200
...
@@ -1600,7 +1605,164 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct
...
@@ -1600,7 +1605,164 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.dv
@420
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.d_out[12:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.d_out[12:0]
@200
-dct_chen_out
@420
[color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.d_out[12:0]
@800200
-chn1
@200
-xdct
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.tm_di[15:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.d_out[12:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct_dout_debug[12:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.dv
@200
-dct_chen
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.d_out[12:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dv
@c08420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(8)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(9)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(10)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(11)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(12)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
@1401200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dbg_stage1_pre2_en_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.pre2_start_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1_out[15:0]
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
@1401200
-group_end
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase_cnt[2:0]
@800200
-transpose
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre2_start
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@1401200
-group_end
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
@1401200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.din[15:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.dout_10_32_76_54[31:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.en_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.rstop_r[1:0]
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcol13
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow[2:0]
@1401200
-group_end
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
@1401200
-group_end
@800200
-debug_start_stop
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start4_first
@29
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.buf_ready_w
@8022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.dct_pipeline_delay_cntr[5:0]
@800022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(8)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
@1001200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_end
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_release_buf
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre2_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.yc_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_start
@200
-
@1000200
@1000200
-debug_start_stop
-transpose
-chn1
-xdct
-xdct
@800200
@800200
-dct_chen
-dct_chen
...
@@ -2169,9 +2331,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2169,9 +2331,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
(23)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.simd_b4[23:0]
(23)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.simd_b4[23:0]
@1401200
@1401200
-group_end
-group_end
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout1_w[23:0]
@1401200
-ma1
-ma1
@c00200
@c00200
-ma2
-ma2
...
@@ -2195,8 +2354,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2195,8 +2354,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_seld_2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_seld_2
@420
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_p_2[47:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_p_2[47:0]
[color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout2_w[23:0]
@800200
@800200
-dsp48e1
-dsp48e1
@420
@420
...
@@ -2243,6 +2400,25 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2243,6 +2400,25 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.en_in_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.en_in_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_en
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_en
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.pre2_start
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.cntr_in[3:0]
@800022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
@1001200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.start_out_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.en
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.en_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.per_type[2:0]
@c00200
@c00200
-reorder_in
-reorder_in
@28
@28
...
@@ -2270,15 +2446,9 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2270,15 +2446,9 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
-stage1_dbg
-stage1_dbg
@28
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout1_w[23:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout2_w[23:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.pre2_start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.pre2_start
@420
[color] 3
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.din[23:0]
@22
@22
[color] 3
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.cntr_in[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.cntr_in[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.waddr[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.waddr[3:0]
@28
@28
...
@@ -2289,13 +2459,8 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2289,13 +2459,8 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_en_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_en_out
@420
[color] 6
[color] 6
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_d_out[23:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.stage1_pre2_start_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.stage1_pre2_start_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_out[23:0]
@200
@200
-
-
@1000200
@1000200
...
@@ -2316,10 +2481,10 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2316,10 +2481,10 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r
@22
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre_rstart_w
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre_rstart_w
@
800023
@
c00022
[color] 2
[color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@2
9
@2
8
[color] 2
[color] 2
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
[color] 2
[color] 2
...
@@ -2334,7 +2499,7 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
...
@@ -2334,7 +2499,7 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
[color] 2
[color] 2
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@1
001201
@1
401200
-group_end
-group_end
@22
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wpage
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wpage
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment