Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
X
x393
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Elphel
x393
Commits
530030f6
Commit
530030f6
authored
Jun 13, 2016
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Switched to new implementation of 8x8 DCT, generated documentation
parent
0e866d77
Changes
17
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
453 additions
and
849 deletions
+453
-849
.gitignore
.gitignore
+1
-1
cmprs_macroblock_buf_iface.v
compressor_jp/cmprs_macroblock_buf_iface.v
+22
-4
jp_channel.v
compressor_jp/jp_channel.v
+15
-48
xdct393r.v
compressor_jp/xdct393r.v
+0
-660
dct1d_chen.v
dsp/dct1d_chen.v
+75
-20
dct1d_chen_reorder_in.v
dsp/dct1d_chen_reorder_in.v
+5
-5
dct1d_chen_reorder_out.v
dsp/dct1d_chen_reorder_out.v
+27
-12
dct2d8x8_chen.v
dsp/dct2d8x8_chen.v
+56
-26
dct_chen_transpose.v
dsp/dct_chen_transpose.v
+12
-6
dsp_addsub_simd.v
dsp/dsp_addsub_simd.v
+10
-10
dsp_ma.v
dsp/dsp_ma.v
+5
-5
dsp_ma_preadd.v
dsp/dsp_ma_preadd.v
+5
-5
fpga_version.vh
fpga_version.vh
+17
-15
x393_jpeg.py
py393/x393_jpeg.py
+5
-1
system_defines.vh
system_defines.vh
+2
-0
x393_1_7_0.Doxyfile
x393_1_7_0.Doxyfile
+2
-2
x393_testbench04.sav
x393_testbench04.sav
+194
-29
No files found.
.gitignore
View file @
530030f6
...
...
@@ -23,7 +23,7 @@ py393/dbg*
debug/*
html/*
man/*
x393_docs/*
includes/x393_cur_params_sim.vh
includes/x393_cur_params_target_*.vh
py393/exp_gpio.py
...
...
compressor_jp/cmprs_macroblock_buf_iface.v
View file @
530030f6
...
...
@@ -40,7 +40,14 @@
*/
`timescale
1
ns
/
1
ps
module
cmprs_macroblock_buf_iface
(
module
cmprs_macroblock_buf_iface
#(
`ifdef
USE_OLD_DCT
parameter
DCT_PIPELINE_PAUSE
=
0
// No need to delay
`else
parameter
DCT_PIPELINE_PAUSE
=
48
// TODO: find really required value (minimal), adjust counter bits (now 6)
// 48 seems to be OK (may be less)
`endif
)(
// input rst,
input
xclk
,
// global clock input, compressor single clock rate
...
...
@@ -98,6 +105,7 @@ module cmprs_macroblock_buf_iface (
wire
frame_pre_start_w
;
// start sequence for a new frame
reg
frame_pre_start_r
;
reg
[
8
:
0
]
mb_pre_start
;
// 1-hot macroblock pre start calcualtions - TODO: adjust width
reg
mb_pre_start4_first
;
// first cycle after mb_pre_start[3]
wire
[
2
:
0
]
buf_diff
;
// difference between page needed and next valid - should be negative to have it ready
wire
buf_ready_w
;
// External memory buffer has all the pages needed
...
...
@@ -117,6 +125,8 @@ module cmprs_macroblock_buf_iface (
reg
frame_pre_run
;
reg
[
1
:
0
]
frame_may_start
;
reg
[
5
:
0
]
dct_pipeline_delay_cntr
;
`ifdef
DEBUG_RING
assign
dbg_add_invalid
=
add_invalid
;
assign
dbg_mb_release_buf
=
mb_release_buf
;
...
...
@@ -180,9 +190,17 @@ module cmprs_macroblock_buf_iface (
// calculate before starting each macroblock (will wait if buffer is not ready) (TODO: align mb_pre_start[0] to mb_pre_end[2] - same)
//mb_pre_start_w
if
(
!
frame_en_r
)
mb_pre_start
<=
0
;
if
(
mb_pre_start_w
)
mb_pre_start
<=
1
;
else
if
(
!
mb_pre_start
[
4
]
||
buf_ready_w
)
mb_pre_start
<=
mb_pre_start
<<
1
;
// TODO: Here enforce minimal pause (if not zero for the DCT pipeline to recover
// will wait for buf_ready_w, but not less than DCT_PIPELINE_PAUSE (or no wait at all)
mb_pre_start4_first
<=
mb_pre_start
[
3
]
;
if
(
xrst
)
dct_pipeline_delay_cntr
<=
0
;
else
if
(
mb_pre_start4_first
&&
!
buf_ready_w
)
dct_pipeline_delay_cntr
<=
DCT_PIPELINE_PAUSE
-
1
;
else
if
(
|
dct_pipeline_delay_cntr
)
dct_pipeline_delay_cntr
<=
dct_pipeline_delay_cntr
-
1
;
if
(
!
frame_en_r
)
mb_pre_start
<=
0
;
if
(
mb_pre_start_w
)
mb_pre_start
<=
1
;
else
if
(
!
mb_pre_start
[
4
]
||
(
buf_ready_w
&&
!
(
|
dct_pipeline_delay_cntr
)))
mb_pre_start
<=
mb_pre_start
<<
1
;
if
(
mb_pre_start
[
1
])
mbl_x_r
[
6
:
3
]
<=
mb_first_in_row
?
{
2'b0
,
left_marg
[
4
:
3
]
}
:
mbl_x_next_r
[
6
:
3
]
;
if
(
mb_pre_start
[
2
])
mbl_x_last_r
[
7
:
3
]
<=
{
1'b0
,
mbl_x_r
[
6
:
3
]
}
+
{
2'b0
,
mb_w_m1
[
5
:
3
]
};
...
...
compressor_jp/jp_channel.v
View file @
530030f6
...
...
@@ -965,39 +965,10 @@ module jp_channel#(
if
(
dct_last_in
)
first_block_dct
<=
first_block_color_after
;
end
`ifdef
USE_OLD_XDCT393
xdct393
xdct393_i
(
.
clk
(
xclk
)
,
// input
.
en
(
frame_en
)
,
// input if zero will reset transpose memory page numbers
.
start
(
dct_start
)
,
// input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.
xin
(
yc_nodc
)
,
// input[9:0]
.
last_in
(
dct_last_in
)
,
// output reg output high during input of the last of 64 pixels in a 8x8 block //
.
pre_first_out
(
dct_pre_first_out
)
,
// outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
dv
()
,
// not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
d_out
(
dct_out
)
// output[12:0]
)
;
`else
xdct393r
xdct393_i
(
.
clk
(
xclk
)
,
// input
.
en
(
frame_en
)
,
// input if zero will reset transpose memory page numbers
.
start
(
dct_start
)
,
// input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.
xin
(
yc_nodc
)
,
// input[9:0]
.
last_in
(
dct_last_in
)
,
// output reg output high during input of the last of 64 pixels in a 8x8 block //
.
pre_first_out
(
dct_pre_first_out
)
,
// outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
dv
()
,
// not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.
d_out
(
dct_out
)
// output[12:0]
)
;
/* New DCT, now in passive mode */
// TODO: enforce minimal pause (when not butted together)
wire
dct_last_in_debug
;
wire
dct_pre_first_out_debug
;
wire
dct_dv_debug
;
wire
[
12
:
0
]
dct_dout_debug
;
// 8x8 DCT implementing Chen algorithm and 2 passes
// Each pass (1d) uses 5 DSP48E1 modules (2 - multipliers and 3 SIMD (2x24) adder/subracters
// Needs a small (<48, but did not calculate yet) pause between block if they did not come
// immediately after each other. This pause is needed to restart pipeline
dct2d8x8_chen
#(
.
INPUT_WIDTH
(
10
)
,
...
...
@@ -1005,27 +976,23 @@ module jp_channel#(
.
STAGE1_SAFE_BITS
(
3
)
,
.
STAGE2_SAFE_BITS
(
3
)
,
.
TRANSPOSE_WIDTH
(
16
)
,
.
TRIM_STAGE_1
(
0
)
,
.
TRIM_STAGE_2
(
2
)
,
.
TRIM_STAGE_1
(
1
)
,
.
TRIM_STAGE_2
(
0
)
,
.
DSP_WIDTH
(
24
)
,
.
DSP_OUT_WIDTH
(
24
)
,
.
DSP_B_WIDTH
(
18
)
,
.
DSP_A_WIDTH
(
25
)
,
.
DSP_P_WIDTH
(
48
)
,
.
DSP_M_WIDTH
(
43
)
.
DSP_P_WIDTH
(
48
)
)
dct2d8x8_chen_i
(
.
clk
(
xclk
)
,
// input
.
rst
(
!
frame_en
)
,
// input
.
start
(
dct_start
)
,
// input
.
xin
(
yc_nodc
)
,
// input[9:0] signed
.
last_in
(
dct_last_in
_debug
)
,
// output reg
.
pre_first_out
(
dct_pre_first_out
_debug
)
,
// output
.
dv
(
dct_dv_debug
)
,
// output
.
d_out
(
dct_
dout_debug
)
// output[12:0] signed
.
clk
(
xclk
)
,
// input
.
rst
(
!
frame_en
)
,
// input
.
start
(
dct_start
)
,
// input
.
xin
(
yc_nodc
)
,
// input[9:0] signed
.
last_in
(
dct_last_in
)
,
// output reg
.
pre_first_out
(
dct_pre_first_out
)
,
// output
.
dv
(
)
,
// output
.
d_out
(
dct_
out
)
// output[12:0] signed
)
;
`endif
wire
quant_start
;
dly_16
#(
.
WIDTH
(
1
))
i_quant_start
(
.
clk
(
xclk
)
,.
rst
(
1'b0
)
,
.
dly
(
4'd0
)
,
.
din
(
dct_pre_first_out
)
,
.
dout
(
quant_start
))
;
// dly=0+1
...
...
compressor_jp/xdct393r.v
deleted
100644 → 0
View file @
0e866d77
This diff is collapsed.
Click to expand it.
dsp/dct1d_chen.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen
* @file dct1d_chen.v
* @date
:
2016-06-05
* @author
:
Andrey Filippov
* @date
2016-06-05
* @author
Andrey Filippov
*
* @brief
:
1d 8-point DCT based on Chen algorithm
* @brief 1d 8-point DCT based on Chen algorithm
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,17 +35,19 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dct1d_chen
#(
parameter
WIDTH
=
24
,
parameter
OUT_WIDTH
=
24
,
parameter
OUT_WIDTH
=
16
,
parameter
B_WIDTH
=
18
,
parameter
A_WIDTH
=
25
,
parameter
P_WIDTH
=
48
,
parameter
M_WIDTH
=
43
,
// actual multiplier width (== (A_WIDTH +B_WIDTH)
// parameter M_WIDTH = 43, // actual multiplier width (== (A_WIDTH +B_WIDTH)
parameter
ROUND_OUT
=
8
,
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
parameter
COSINE_SHIFT
=
17
,
parameter
COS_1_16
=
128553
,
// (1<<17) * cos(1*pi/16)
parameter
COS_2_16
=
121095
,
// (2<<17) * cos(1*pi/16)
parameter
COS_3_16
=
108982
,
// (3<<17) * cos(1*pi/16)
...
...
@@ -59,11 +61,13 @@ module dct1d_chen#(
input
en
,
input
[
2
*
WIDTH
-
1
:
0
]
d10_32_76_54
,
// Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]}
input
start
,
// {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]}
output
[
WIDTH
-
1
:
0
]
dout
,
output
[
OUT_WIDTH
-
1
:
0
]
dout
,
output
reg
pre2_start_out
,
// 2 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
output
reg
en_out
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
)
;
localparam
TOTAL_RSHIFT
=
COSINE_SHIFT
+
ROUND_OUT
;
localparam
BEFORE_SAT_WIDTH
=
P_WIDTH
-
TOTAL_RSHIFT
;
reg
signed
[
B_WIDTH
-
1
:
0
]
dsp_ma_bin
;
wire
dsp_ma_ceb1_1
;
// load b1 register
wire
dsp_ma_ceb2_1
;
// load b2 register
...
...
@@ -94,6 +98,7 @@ module dct1d_chen#(
wire
dsp_ma_neg_m_2
;
// 1 - negate multiplier result
wire
dsp_ma_accum_2
;
// 0 - use multiplier result, 1 add to accumulator
wire
signed
[
P_WIDTH
-
1
:
0
]
dsp_ma_p_2
;
wire
signed
[
P_WIDTH
-
1
:
0
]
dsp_ma_p_mux
;
// Multipler A/D inputs before shift
wire
signed
[
WIDTH
-
1
:
0
]
dsp_ma_ain24_1
;
...
...
@@ -142,10 +147,25 @@ module dct1d_chen#(
reg
[
7
:
0
]
phase
;
reg
[
2
:
0
]
phase_cnt
;
reg
[
OUT_WIDTH
-
1
:
0
]
dout_r
;
wire
[
OUT_WIDTH
-
1
:
0
]
dout1_w
;
wire
[
OUT_WIDTH
-
1
:
0
]
dout2_w
;
// wire [OUT_WIDTH -1:0] dout1_w;
// wire [OUT_WIDTH -1:0] dout2_w;
wire
dout_round_c
;
wire
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round_w
;
// after rounding, before (optional) saturation
reg
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round_r
;
// after rounding, before (optional) saturation
wire
[
OUT_WIDTH
-
1
:
0
]
dout_sat_w
;
wire
[
BEFORE_SAT_WIDTH
-
1
:
0
]
dout_round
;
// after rounding, before (optional) saturation
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
// Temporarily adding 1 extra latency cycle for rounding/saturation. TODO: Remove when moved to DSP itself
reg
pre3_start_out
;
// 3 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
reg
pre_en_out
;
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
// .ain ({simd_a1,simd_a0}), // input[47:0]
// .bin ({simd_b1,simd_b0}), // input[47:0]
// dsp_addsub_simd1_i input connections
...
...
@@ -233,7 +253,7 @@ module dct1d_chen#(
assign
dsp_ma_ced_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_sela_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_seld_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
5
]
|
phase
[
7
]
;
assign
dsp_ma_neg_m_2
=
phase
[
6
]
;
assign
dsp_ma_neg_m_2
=
phase
[
1
]
|
phase
[
6
]
;
assign
dsp_ma_accum_2
=
phase
[
0
]
|
phase
[
2
]
|
phase
[
4
]
|
phase
[
6
]
;
// dsp_ma2_i data input connections
assign
dsp_ma_ain24_2
=
simd_p5
;
...
...
@@ -255,10 +275,37 @@ module dct1d_chen#(
// assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign
dout1_w
=
dsp_ma_p_1
[
COSINE_SHIFT
+:
WIDTH
]
;
// adding one bit for adder (two MPY outputs are added)
assign
dout2_w
=
dsp_ma_p_2
[
COSINE_SHIFT
+:
WIDTH
]
;
// adding one bit for adder (two MPY outputs are added)
assign
dsp_ma_p_mux
=
phase_cnt
[
0
]
?
dsp_ma_p_1
:
dsp_ma_p_2
;
// assign dout1_w = dsp_ma_p_1[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign
dout_round_c
=
dsp_ma_p_mux
[
TOTAL_RSHIFT
-
1
]
;
assign
dout_round_w
=
dsp_ma_p_mux
[
TOTAL_RSHIFT
+:
BEFORE_SAT_WIDTH
]
+
dout_round_c
;
// Saturation (only if BEFORE_SAT_WIDTH > OUT_WIDTH)
localparam
TRIM_MSB
=
BEFORE_SAT_WIDTH
-
OUT_WIDTH
;
generate
if
(
TRIM_MSB
<
0
)
begin
// should never happen
assign
dout_sat_w
=
{
{
(
-
TRIM_MSB
)
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}},
dout_round
};
end
else
if
(
TRIM_MSB
==
0
)
begin
assign
dout_sat_w
=
dout_round
[
0
+:
OUT_WIDTH
]
;
end
else
begin
//! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign
dout_sat_w
=
(
dout_round
[
BEFORE_SAT_WIDTH
-
1
-:
TRIM_MSB
]
==
{
TRIM_MSB
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}}
)
?
dout_round
[
0
+:
OUT_WIDTH
]
:
{
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
,
{
OUT_WIDTH
-
1
{~
dout_round
[
BEFORE_SAT_WIDTH
-
1
]
}}};
end
endgenerate
// to possibly remove registers with generate
assign
dout_round
=
dout_round_r
;
//BEFORE_SAT_WIDTH
// wire dout_round_c;
// wire [OUT_WIDTH -1:0] dout_round_w;
//ROUND_OUT
//phase_cnt[0] ? dout1_w : dout2_w;
assign
dout
=
dout_r
;
always
@
(
posedge
clk
)
begin
...
...
@@ -284,16 +331,24 @@ module dct1d_chen#(
3'h6
:
dsp_ma_bin
<=
COS_4_16
;
3'h7
:
dsp_ma_bin
<=
COS_6_16
;
endcase
dout_r
<=
phase_cnt
[
0
]
?
dout1_w
:
dout2_w
;
// dout_r <= phase_cnt[0] ? dout1_w : dout2_w;
dout_round_r
<=
dout_round_w
;
dout_r
<=
dout_sat_w
;
if
(
rst
)
pre2_start_out
<=
0
;
else
pre2_start_out
<=
(
per_type
==
2
)
&&
phase
[
3
]
;
if
(
rst
)
pre3_start_out
<=
0
;
else
pre3_start_out
<=
(
per_type
==
2
)
&&
phase
[
3
]
;
pre2_start_out
<=
pre3_start_out
;
if
(
rst
||
!
(
en
||
(
|
phase
)))
en_out
<=
0
;
if
(
rst
||
!
(
en
||
(
|
phase
)))
pre_en_out
<=
0
;
else
if
(
phase
[
3
])
begin
if
(
per_type
==
2
)
en_out
<=
1
;
else
if
(
per_type
[
2
])
en_out
<=
0
;
end
if
(
per_type
==
2
)
pre_en_out
<=
1
;
else
if
(
per_type
[
2
])
pre_en_out
<=
0
;
end
en_out
<=
pre_en_out
;
end
dsp_addsub_simd
#(
...
...
dsp/dct1d_chen_reorder_in.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen_reorder_in
* @file dct1d_chen_reorder_in.v
* @date
:
2016-06-08
* @author
:
Andrey Filippov
* @date
2016-06-08
* @author
Andrey Filippov
*
* @brief
:
Reorder scan-line pixel stream for dct1d_chen module
* @brief Reorder scan-line pixel stream for dct1d_chen module
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dct1d_chen_reorder_in
#(
...
...
dsp/dct1d_chen_reorder_out.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct1d_chen_reorder_out
* @file dct1d_chen_reorder_out.v
* @date
:
2016-06-08
* @author
:
Andrey Filippov
* @date
2016-06-08
* @author
Andrey Filippov
*
* @brief
:
Reorder data from dct1d_chen output to natural sequence
* @brief Reorder data from dct1d_chen output to natural sequence
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dct1d_chen_reorder_out
#(
...
...
@@ -62,6 +62,7 @@ module dct1d_chen_reorder_out#(
reg
[
2
:
0
]
per_type
;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
reg
start_out_r
;
reg
en_out_r
;
wire
stop_out
;
// qualify with en
assign
dout
=
dout_r
;
assign
start_out
=
start_out_r
;
assign
en_out
=
en_out_r
;
...
...
@@ -98,16 +99,30 @@ module dct1d_chen_reorder_out#(
if
((
per_type
==
2
)
&&
(
cntr_in
==
1
))
raddr
<=
{~
cntr_in
[
3
]
,
3'b0
};
else
if
((
raddr
[
2
:
0
]
!=
0
)
||
(
per_type
!=
0
))
raddr
<=
raddr
+
1
;
dout_r
<=
reord_buf_ram
[
raddr
]
;
if
(
en_out_r
)
dout_r
<=
reord_buf_ram
[
raddr
]
;
start_out_r
<=
(
per_type
==
2
)
&&
(
cntr_in
==
1
)
;
if
(
rst
||
(
per_type
==
0
)
)
en_out_r
<=
0
;
else
if
(
cntr_in
==
1
)
en_out_r
<=
(
per_type
==
2
)
||
!
per_type
[
2
]
;
if
(
rst
)
dv
<=
0
;
else
if
(
start_out_r
)
dv
<=
1
;
else
if
((
raddr
[
2
:
0
]
==
0
)
&&
!
en_out_r
)
dv
<=
0
;
if
(
rst
||
(
per_type
==
0
)
)
en_out_r
<=
0
;
// else if (cntr_in == 1) en_out_r <= (per_type == 2) || !per_type[2];
else
if
((
cntr_in
==
1
)
&&
(
per_type
==
2
))
en_out_r
<=
1
;
else
if
(
stop_out
&&
!
en
)
en_out_r
<=
0
;
//stop_out
dv
<=
en_out_r
;
// if (rst) dv <= 0;
// else if (start_out_r) dv <= 1;
// else if ((raddr[2:0] == 0) && !en_out_r) dv <= 0;
end
dly01_16
dly01_16_i
(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
dly
(
4'd8
)
,
// input[3:0]
.
din
((
&
cntr_in
[
2
:
0
])
&&
!
en
)
,
// input
.
dout
(
stop_out
)
// output
)
;
endmodule
dsp/dct2d8x8_chen.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct2d8x8_chen
* @file dct2d8x8_chen.v
* @date
:
2016-06-10
* @author
:
Andrey Filippov
* @date
2016-06-10
* @author
Andrey Filippov
*
* @brief
:
2-d DCT implementation of Chen algorithm
* @brief 2-d DCT implementation of Chen algorithm
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dct2d8x8_chen
#(
...
...
@@ -45,13 +45,13 @@ module dct2d8x8_chen#(
parameter
STAGE2_SAFE_BITS
=
3
,
// leave this number of extra bits on DCT1D input to prevent output saturation
parameter
TRANSPOSE_WIDTH
=
16
,
// transpose memory width
parameter
TRIM_STAGE_1
=
1
,
// Trim these MSBs from the stage1 results (1 - matches old DCT)
parameter
TRIM_STAGE_2
=
2
,
// Trim these MSBs from the stage2 results TODO: put real value
parameter
TRIM_STAGE_2
=
0
,
// Trim these MSBs from the stage2 results
parameter
DSP_WIDTH
=
24
,
parameter
DSP_OUT_WIDTH
=
24
,
//
parameter DSP_OUT_WIDTH = 24,
parameter
DSP_B_WIDTH
=
18
,
parameter
DSP_A_WIDTH
=
25
,
parameter
DSP_P_WIDTH
=
48
,
parameter
DSP_M_WIDTH
=
43
// actual multiplier width (== (A_WIDTH +B_WIDTH)
parameter
DSP_P_WIDTH
=
48
//
parameter DSP_M_WIDTH = 43 // actual multiplier width (== (A_WIDTH +B_WIDTH)
)
(
input
clk
,
/// system clock, posedge
input
rst
,
// sync reset
...
...
@@ -68,6 +68,8 @@ module dct2d8x8_chen#(
localparam
REPLICATE_IN_STAGE2
=
STAGE2_SAFE_BITS
;
localparam
PAD_IN_STAGE2
=
DSP_WIDTH
-
TRANSPOSE_WIDTH
-
STAGE2_SAFE_BITS
;
localparam
ROUND_STAGE1
=
DSP_WIDTH
-
TRANSPOSE_WIDTH
-
TRIM_STAGE_1
;
localparam
ROUND_STAGE2
=
DSP_WIDTH
-
OUTPUT_WIDTH
-
TRIM_STAGE_2
;
reg
signed
[
INPUT_WIDTH
-
1
:
0
]
xin_r
;
...
...
@@ -82,7 +84,7 @@ module dct2d8x8_chen#(
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct1in_pad_l
;
wire
signed
[
DSP_OUT
_WIDTH
-
1
:
0
]
dct1_out
;
wire
signed
[
TRANSPOSE
_WIDTH
-
1
:
0
]
dct1_out
;
wire
stage1_pre2_start_out
;
// wire stage1_pre2_en_out;
...
...
@@ -94,20 +96,43 @@ module dct2d8x8_chen#(
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_h
;
wire
signed
[
DSP_WIDTH
-
1
:
0
]
dct2in_pad_l
;
wire
signed
[
DSP_O
UT_WIDTH
-
1
:
0
]
dct2_out
;
wire
signed
[
OUTP
UT_WIDTH
-
1
:
0
]
dct2_out
;
wire
stage2_pre2_start_out
;
wire
stage2_pre2_en_out
;
wire
signed
[
OUTPUT_WIDTH
-
1
:
0
]
dct2_trimmed
;
//
wire signed [OUTPUT_WIDTH-1:0] dct2_trimmed;
assign
dct1in_pad_h
=
{{
REPLICATE_IN_STAGE1
{
dct1in_h
[
INPUT_WIDTH
-
1
]
}},
dct1in_h
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
dct1in_pad_l
=
{{
REPLICATE_IN_STAGE1
{
dct1in_l
[
INPUT_WIDTH
-
1
]
}},
dct1in_l
,
{
PAD_IN_STAGE1
{
1'b0
}}};
assign
transpose_din
=
dct1_out
[
DSP_OUT_WIDTH
-
1
-
TRIM_STAGE_1
-:
TRANSPOSE_WIDTH
]
;
assign
transpose_din
=
dct1_out
;
/*
generate
if (TRIM_STAGE_1 == 0) begin
assign transpose_din = dct1_out[DSP_OUT_WIDTH-1 -:TRANSPOSE_WIDTH];
end else begin //! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign transpose_din = (dct1_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_1] == {TRIM_STAGE_1{dct1_out[DSP_OUT_WIDTH-1]}})?
dct1_out[DSP_OUT_WIDTH-1-TRIM_STAGE_1 -: TRANSPOSE_WIDTH]:
{dct1_out[DSP_OUT_WIDTH-1], {TRANSPOSE_WIDTH-1{~dct1_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
assign
dct2in_pad_h
=
{{
REPLICATE_IN_STAGE2
{
transpose_douth
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_douth
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2in_pad_l
=
{{
REPLICATE_IN_STAGE2
{
transpose_doutl
[
TRANSPOSE_WIDTH
-
1
]
}},
transpose_doutl
,
{
PAD_IN_STAGE2
{
1'b0
}}};
assign
dct2_trimmed
=
dct2_out
[
DSP_OUT_WIDTH
-
1
-
TRIM_STAGE_2
-:
OUTPUT_WIDTH
]
;
// assign dct2_trimmed = dct2_out;
/*
generate
if (TRIM_STAGE_2 == 0) begin
assign dct2_trimmed = dct2_out[DSP_OUT_WIDTH-1 -: OUTPUT_WIDTH];
end else begin //! saturate. Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign dct2_trimmed = (dct2_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_2] == {TRIM_STAGE_2{dct2_out[DSP_OUT_WIDTH-1]}})?
dct2_out[DSP_OUT_WIDTH-1-TRIM_STAGE_2 -:OUTPUT_WIDTH]:
{dct2_out[DSP_OUT_WIDTH-1], {OUTPUT_WIDTH-1{~dct2_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
always
@
(
posedge
clk
)
begin
start_in_r
<=
start
;
...
...
@@ -141,11 +166,11 @@ module dct2d8x8_chen#(
wire
dbg_stage1_pre2_en_out
;
dct1d_chen
#(
.
WIDTH
(
DSP_WIDTH
)
,
.
OUT_WIDTH
(
DSP_OUT_WIDTH
)
,
.
OUT_WIDTH
(
TRANSPOSE_WIDTH
)
,
//
DSP_OUT_WIDTH),
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
M_WIDTH
(
DSP_M_WIDTH
)
.
ROUND_OUT
(
ROUND_STAGE1
)
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
)
dct1d_chen_stage1_i
(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
...
...
@@ -170,12 +195,12 @@ module dct2d8x8_chen#(
)
;
dct1d_chen
#(
.
WIDTH
(
DSP_WIDTH
)
,
.
OUT_WIDTH
(
DSP_O
UT_WIDTH
)
,
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
M_WIDTH
(
DSP_M_WIDTH
)
.
WIDTH
(
DSP_WIDTH
)
,
.
OUT_WIDTH
(
OUTP
UT_WIDTH
)
,
.
B_WIDTH
(
DSP_B_WIDTH
)
,
.
A_WIDTH
(
DSP_A_WIDTH
)
,
.
P_WIDTH
(
DSP_P_WIDTH
)
,
.
ROUND_OUT
(
ROUND_STAGE2
)
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
)
dct1d_chen_stage2_i
(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
...
...
@@ -193,7 +218,7 @@ module dct2d8x8_chen#(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
.
en
(
stage2_pre2_en_out
)
,
// input
.
din
(
dct2_
trimmed
)
,
// input[23:0]
.
din
(
dct2_
out
)
,
// input[23:0]
.
pre2_start
(
stage2_pre2_start_out
)
,
// input
.
dout
(
d_out
)
,
// output[23:0]
.
start_out
(
pre_first_out
)
,
// output reg
...
...
@@ -202,13 +227,16 @@ module dct2d8x8_chen#(
)
;
// Just for debugging/comparing with old 1-d DCT:
wire
[
DSP_WIDTH
-
1
:
0
]
dbg_d_out
;
`ifdef
SIMULATION
// no sense to synthesize it
`ifdef
DEBUG_DCT1D
wire
[
TRANSPOSE_WIDTH
-
1
:
0
]
dbg_d_out
;
//wire [15:0] dbg_d_out13=dbg_d_out[7 +: 16] ;
wire
dbg_dv
;
wire
dbg_en_out
;
wire
dbg_pre_first_out
;
dct1d_chen_reorder_out
#(
.
WIDTH
(
DSP
_WIDTH
)
.
WIDTH
(
TRANSPOSE
_WIDTH
)
)
dct1d_chen_reorder_out_dbg_i
(
.
clk
(
clk
)
,
// input
.
rst
(
rst
)
,
// input
...
...
@@ -220,5 +248,7 @@ wire dbg_pre_first_out;
.
dv
(
dbg_dv
)
,
// output reg
.
en_out
(
dbg_en_out
)
// output reg
)
;
`endif
`endif
endmodule
dsp/dct_chen_transpose.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dct_chen_transpose
* @file dct_chen_transpose.v
* @date
:
2016-06-09
* @author
:
Andrey Filippov
* @date
2016-06-09
* @author
Andrey Filippov
*
* @brief
:
Reorder+transpose data between two 1-d DCT passes
* @brief Reorder+transpose data between two 1-d DCT passes
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dct_chen_transpose
#(
...
...
@@ -70,6 +70,7 @@ module dct_chen_transpose#(
reg
[
2
*
WIDTH
-
1
:
0
]
ram_reg2
;
wire
pre_rstart_w
=
wcntr
[
5
:
0
]
==
61
;
reg
[
1
:
0
]
rstop_r
;
reg
first_after_pause
;
// first block after pause - do not write 2 items to the "past"
assign
wpage
=
wcntr
[
6
]
^
wrow_mod
[
3
]
;
// previous page for row 0, col 1 & 3
assign
wrow_mod
=
{
1'b0
,
wrow
}
-
wcol13
;
...
...
@@ -93,7 +94,7 @@ module dct_chen_transpose#(
else
if
(
pre_we_r
)
wcntr
<=
wcntr
+
1
;
// including page, should be before 'if (pre2_start)'
else
if
(
pre2_start
)
wcntr
<=
{
wcntr
[
6
]
,
6'b0
};
// if happens during pre_we_r - will be ignored, otherwise (after pause) will zero in-page adderss
we_r
<=
pre_we_r
;
we_r
<=
pre_we_r
&&
(
!
first_after_pause
||
!
wcol13
||
(
|
wrow
))
;
// do not write first after pause to the "past"
if
(
we_r
)
transpose_ram
[
waddr
]
<=
din
;
...
...
@@ -118,6 +119,11 @@ module dct_chen_transpose#(
if
(
rst
)
en_out
<=
0
;
else
if
(
rcntr
==
1
)
en_out
<=
1
;
else
if
(
rstop_r
[
1
])
en_out
<=
0
;
if
(
rst
)
first_after_pause
<=
0
;
else
if
(
pre2_start
&&
!
we_r
)
first_after_pause
<=
1
;
else
if
(
&
wcntr
[
5
:
0
])
first_after_pause
<=
0
;
end
dly01_16
dly01_16_stop_i
(
...
...
dsp/dsp_addsub_simd.v
View file @
530030f6
/*
******************************************************************************
/*
!
* <b>Module:</b>dsp_addsub_simd
* @file dsp_addsub_simd.v
* @date
:
2016-06-05
* @author
:
Andrey Filippov
* @date
2016-06-05
* @author
Andrey Filippov
*
* @brief
:
SIMD adder/subtracter
* @brief SIMD adder/subtracter
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dsp_addsub_simd
#(
...
...
@@ -70,7 +70,7 @@ module dsp_addsub_simd#(
DSP48E1
#(
.
ACASCREG
(
1
)
,
.
ADREG
(
0
)
,
// (
1),
.
ADREG
(
1
)
,
.
ALUMODEREG
(
1
)
,
.
AREG
(
1
)
,
// (1)
.
AUTORESET_PATDET
(
"NO_RESET"
)
,
...
...
@@ -81,7 +81,7 @@ module dsp_addsub_simd#(
.
CARRYINREG
(
1
)
,
.
CARRYINSELREG
(
1
)
,
.
CREG
(
1
)
,
//(1),
.
DREG
(
0
)
,
//(
1),
.
DREG
(
1
)
,
.
INMODEREG
(
1
)
,
.
IS_ALUMODE_INVERTED
(
4'b0
)
,
.
IS_CARRYIN_INVERTED
(
1'b0
)
,
...
...
@@ -131,7 +131,7 @@ module dsp_addsub_simd#(
.
CECTRL
(
1'b1
)
,
// input
.
CED
(
1'b0
)
,
// input
.
CEINMODE
(
1'b1
)
,
// input
.
CEM
(
1'b
1
)
,
// input
.
CEM
(
1'b
0
)
,
// input
.
CEP
(
cep
)
,
// input
.
CLK
(
clk
)
,
// input
.
D
(
25'h1ffffff
)
,
// input[24:0]
...
...
@@ -145,9 +145,9 @@ module dsp_addsub_simd#(
.
RSTB
(
rst
)
,
// input
.
RSTC
(
rst
)
,
// input
.
RSTCTRL
(
rst
)
,
// input
.
RSTD
(
rst
)
,
// input
.
RSTD
(
1'b0
)
,
// input
.
RSTINMODE
(
rst
)
,
// input
.
RSTM
(
rst
)
,
// input
.
RSTM
(
1'b0
)
,
// input
.
RSTP
(
rst
)
// input
)
;
`else
...
...
dsp/dsp_ma.v
View file @
530030f6
/*
******************************************************************************
/*
!
* dsp_ma
* @file dsp_ma.v
* @date
:
2016-06-05
* @author
:
Andrey Filippov
* @date
2016-06-05
* @author
Andrey Filippov
*
* @brief
:
DSP with multi-input multiplier and accumulator
* @brief DSP with multi-input multiplier and accumulator
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/
`timescale
1
ns
/
1
ps
module
dsp_ma
#(
...
...
dsp/dsp_ma_preadd.v
View file @
530030f6
/*
******************************************************************************
/*
!
* dsp_ma_preadd
* @file dsp_ma_preadd.v
* @date
:
2016-06-05
* @author
:
Andrey Filippov
* @date
2016-06-05
* @author
Andrey Filippov
*
* @brief
:
DSP with multi-input multiplier and accumulator with pre-adder
* @brief DSP with multi-input multiplier and accumulator with pre-adder
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
...
...
@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*
******************************************************************************
/
*/