diff --git a/dct_tests_01.sav b/dct_tests_01.sav
index df3e21c9c3c86c32bac58882eb8bc4ca5c36f6ee..d4720f4e320f3ec330572c668f44cf7fbe93afcc 100644
--- a/dct_tests_01.sav
+++ b/dct_tests_01.sav
@@ -1,25 +1,26 @@
[*]
-[*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI
-[*] Tue Dec 6 17:55:24 2016
+[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI
+[*] Tue Dec 13 06:32:08 2016
[*]
-[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161206105514691.fst"
-[dumpfile_mtime] "Tue Dec 6 17:55:14 2016"
-[dumpfile_size] 10348
+[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161212230744155.fst"
+[dumpfile_mtime] "Tue Dec 13 06:07:44 2016"
+[dumpfile_size] 100634
[savefile] "/home/eyesis/git/x393-neon/dct_tests_01.sav"
[timestart] 0
[size] 1814 1171
-[pos] 1937 0
-*-18.387537 1752000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+[pos] 1920 0
+*-19.687614 1195000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] dct_tests_01.
[treeopen] dct_tests_01.dct_iv8_1d_i.
[treeopen] dct_tests_01.dct_iv8_1d_i.dsp_ma_preadd_c_1_i.
+[treeopen] dct_tests_01.dct_iv_8x8_i.
[sst_width] 204
-[signals_width] 305
+[signals_width] 325
[sst_expanded] 1
[sst_vpaned_height] 344
-@800200
+@c00200
-top
-@25
+@24
dct_tests_01.i
dct_tests_01.j
@28
@@ -87,10 +88,50 @@ dct_tests_01.y_we
dct_tests_01.phase_y[3:0]
dct_tests_01.y_dct[23:0]
dct_tests_01.y_out[23:0]
-@1000200
+dct_tests_01.dct_iv8_1d_i.y_index[2:0]
+@1401200
-top
@800200
+-2d-1d
+@28
+dct_tests_01.start
+@22
+dct_tests_01.x_out[23:0]
+@8420
+dct_tests_01.x_out[23:0]
+dct_tests_01.dct_iv8_1d_i.d_in[23:0]
+dct_tests_01.dct_iv8_1d_i.dout[23:0]
+@28
+dct_tests_01.dct_iv8_1d_i.en_out
+@22
+dct_tests_01.dct_iv8_1d_i.y_index[2:0]
+@8420
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.d_in[23:0]
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.dout[24:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.en_out
+@22
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.y_index[2:0]
+@8420
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.d_in[23:0]
+@8421
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.dout[24:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.en_out
+@22
+dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.y_index[2:0]
+@200
+-
+@1000200
+-2d-1d
+@c00200
-dct_iv8_1d
+@22
+[color] 2
+dct_tests_01.dct_iv8_1d_i.phase_cnt[3:0]
+@28
+dct_tests_01.dct_iv8_1d_i.en_out
+dct_tests_01.dct_iv8_1d_i.run_out
@c08022
dct_tests_01.phase_out[3:0]
@28
@@ -105,8 +146,6 @@ dct_tests_01.dct_iv8_1d_i.start
dct_tests_01.dct_iv8_1d_i.restart
dct_tests_01.dct_iv8_1d_i.clk
@8022
-[color] 2
-dct_tests_01.dct_iv8_1d_i.phase_cnt[3:0]
dct_tests_01.dct_iv8_1d_i.d_in[23:0]
dct_tests_01.dct_iv8_1d_i.dsp_ain_1[24:0]
@28
@@ -124,8 +163,8 @@ dct_tests_01.dct_iv8_1d_i.dsp_ced_1
@22
dct_tests_01.dct_iv8_1d_i.dsp_cin_1[47:0]
@28
-dct_tests_01.dct_iv8_1d_i.dsp_cec_1
dct_tests_01.dct_iv8_1d_i.dsp_neg_m_1
+dct_tests_01.dct_iv8_1d_i.dsp_cec_1
dct_tests_01.dct_iv8_1d_i.dsp_post_add_1
dct_tests_01.dct_iv8_1d_i.dsp_accum_1
@22
@@ -238,7 +277,305 @@ dct_tests_01.dct_iv8_1d_i.pre2_start_out
dct_tests_01.dct_iv8_1d_i.rst
dct_tests_01.dct_iv8_1d_i.run_in
dct_tests_01.dct_iv8_1d_i.run_out
-@1000200
+@1401200
-dct_iv8_1d
+@800200
+-st22d_test
+@28
+dct_tests_01.CLK
+dct_tests_01.RST
+[color] 2
+dct_tests_01.start
+[color] 2
+dct_tests_01.start2
+@c00022
+dct_tests_01.x_in_2d[23:0]
+@28
+(0)dct_tests_01.x_in_2d[23:0]
+(1)dct_tests_01.x_in_2d[23:0]
+(2)dct_tests_01.x_in_2d[23:0]
+(3)dct_tests_01.x_in_2d[23:0]
+(4)dct_tests_01.x_in_2d[23:0]
+(5)dct_tests_01.x_in_2d[23:0]
+(6)dct_tests_01.x_in_2d[23:0]
+(7)dct_tests_01.x_in_2d[23:0]
+(8)dct_tests_01.x_in_2d[23:0]
+(9)dct_tests_01.x_in_2d[23:0]
+(10)dct_tests_01.x_in_2d[23:0]
+(11)dct_tests_01.x_in_2d[23:0]
+(12)dct_tests_01.x_in_2d[23:0]
+(13)dct_tests_01.x_in_2d[23:0]
+(14)dct_tests_01.x_in_2d[23:0]
+(15)dct_tests_01.x_in_2d[23:0]
+(16)dct_tests_01.x_in_2d[23:0]
+(17)dct_tests_01.x_in_2d[23:0]
+(18)dct_tests_01.x_in_2d[23:0]
+(19)dct_tests_01.x_in_2d[23:0]
+(20)dct_tests_01.x_in_2d[23:0]
+(21)dct_tests_01.x_in_2d[23:0]
+(22)dct_tests_01.x_in_2d[23:0]
+(23)dct_tests_01.x_in_2d[23:0]
+@1401200
+-group_end
+@c08420
+dct_tests_01.x_in_2d[23:0]
+@28
+(0)dct_tests_01.x_in_2d[23:0]
+(1)dct_tests_01.x_in_2d[23:0]
+(2)dct_tests_01.x_in_2d[23:0]
+(3)dct_tests_01.x_in_2d[23:0]
+(4)dct_tests_01.x_in_2d[23:0]
+(5)dct_tests_01.x_in_2d[23:0]
+(6)dct_tests_01.x_in_2d[23:0]
+(7)dct_tests_01.x_in_2d[23:0]
+(8)dct_tests_01.x_in_2d[23:0]
+(9)dct_tests_01.x_in_2d[23:0]
+(10)dct_tests_01.x_in_2d[23:0]
+(11)dct_tests_01.x_in_2d[23:0]
+(12)dct_tests_01.x_in_2d[23:0]
+(13)dct_tests_01.x_in_2d[23:0]
+(14)dct_tests_01.x_in_2d[23:0]
+(15)dct_tests_01.x_in_2d[23:0]
+(16)dct_tests_01.x_in_2d[23:0]
+(17)dct_tests_01.x_in_2d[23:0]
+(18)dct_tests_01.x_in_2d[23:0]
+(19)dct_tests_01.x_in_2d[23:0]
+(20)dct_tests_01.x_in_2d[23:0]
+(21)dct_tests_01.x_in_2d[23:0]
+(22)dct_tests_01.x_in_2d[23:0]
+(23)dct_tests_01.x_in_2d[23:0]
+@1401200
+-group_end
+@28
+dct_tests_01.pre_busy_2d
+dct_tests_01.pre_last_in_2d
+dct_tests_01.pre_first_out_2d
+dct_tests_01.dv_2d
+@22
+dct_tests_01.d_out_2d[23:0]
+@28
+dct_tests_01.dv_2dr
+@22
+dct_tests_01.d_out_2dr[23:0]
+@8420
+dct_tests_01.d_out_2dr[23:0]
+@200
+-
+@800200
+-dct_iv_8x8
+@28
+dct_tests_01.dct_iv_8x8_i.clk
+dct_tests_01.dct_iv_8x8_i.start
+dct_tests_01.dct_iv_8x8_i.pre_last_in
+dct_tests_01.dct_iv_8x8_i.pre_busy
+dct_tests_01.dct_iv_8x8_i.x_run
+@c00022
+dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+(1)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+(2)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+(3)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+(4)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+(5)dct_tests_01.dct_iv_8x8_i.x_wa[5:0]
+@1401200
+-group_end
+@28
+dct_tests_01.dct_iv_8x8_i.dcth_phin_start
+@22
+dct_tests_01.dct_iv_8x8_i.dcth_phin_run
+@c00022
+dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(1)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(2)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(3)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(4)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(5)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+(6)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0]
+@1401200
+-group_end
+@22
+dct_tests_01.dct_iv_8x8_i.x_ra0[2:0]
+dct_tests_01.dct_iv_8x8_i.x_ra1[2:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dcth_en0
+dct_tests_01.dct_iv_8x8_i.dcth_start_0_r
+@22
+dct_tests_01.dct_iv_8x8_i.dcth_xin0[23:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dcth_en_out0
+@22
+dct_tests_01.dct_iv_8x8_i.dcth_dout0[24:0]
+dct_tests_01.dct_iv_8x8_i.dcth_yindex0[2:0]
+@200
+-
+@28
+dct_tests_01.dct_iv_8x8_i.dcth_en1
+dct_tests_01.dct_iv_8x8_i.dcth_start_1_r
+@22
+dct_tests_01.dct_iv_8x8_i.dcth_xin1[23:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dcth_en_out1
+@22
+dct_tests_01.dct_iv_8x8_i.dcth_dout1[24:0]
+dct_tests_01.dct_iv_8x8_i.dcth_yindex1[2:0]
+@200
+-
+@22
+dct_tests_01.dct_iv_8x8_i.transpose_start
+@28
+dct_tests_01.dct_iv_8x8_i.transpose_in_run
+@22
+dct_tests_01.dct_iv_8x8_i.transpose_w_page[1:0]
+[color] 3
+dct_tests_01.dct_iv_8x8_i.transpose_cntr[6:0]
+@28
+dct_tests_01.dct_iv_8x8_i.transpose_wa_decr
+@22
+dct_tests_01.dct_iv_8x8_i.transpose_wa_low[2:0]
+dct_tests_01.dct_iv_8x8_i.transpose_wa_high[4:0]
+dct_tests_01.dct_iv_8x8_i.transpose_wa[7:0]
+@28
+dct_tests_01.dct_iv_8x8_i.transpose_we
+@22
+[color] 2
+dct_tests_01.dct_iv_8x8_i.transpose_debug_di[7:0]
+@28
+dct_tests_01.dct_iv_8x8_i.transpose_out_start
+@800022
+dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0]
+(1)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0]
+(2)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0]
+@1001200
+-group_end
+@c00028
+dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0]
+(1)dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0]
+@1401200
+-group_end
+@c00022
+dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(1)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(2)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(3)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(4)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(5)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+(6)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0]
+@1401200
+-group_end
+@22
+dct_tests_01.dct_iv_8x8_i.transpose_ra[7:0]
+dct_tests_01.dct_iv_8x8_i.transpose_reg[24:0]
+@8420
+dct_tests_01.dct_iv_8x8_i.transpose_out[24:0]
+@22
+dct_tests_01.dct_iv_8x8_i.transpose_out[24:0]
+dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0]
+dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0]
+@8022
+dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0]
+@22
+dct_tests_01.dct_iv_8x8_i.t_wa[3:0]
+@28
+dct_tests_01.dct_iv_8x8_i.t_we0
+dct_tests_01.dct_iv_8x8_i.t_we1
+dct_tests_01.dct_iv_8x8_i.dctv_start_0_r
+dct_tests_01.dct_iv_8x8_i.dctv_start_1_r
+dct_tests_01.dct_iv_8x8_i.dctv_en0
+dct_tests_01.dct_iv_8x8_i.dctv_en1
+dct_tests_01.dct_iv_8x8_i.dctv_phin_start
+dct_tests_01.dct_iv_8x8_i.dctv_phin_run
+@c00022
+[color] 2
+dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(1)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(2)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(3)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(4)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(5)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+(6)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0]
+@1401200
+-group_end
+@8022
+dct_tests_01.dct_iv_8x8_i.t_ra0[2:0]
+dct_tests_01.dct_iv_8x8_i.t_ra1[2:0]
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0]
+@8420
+dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0]
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_xin1[24:0]
+dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0]
+dct_tests_01.dct_iv_8x8_i.dctv_debug_xin0[7:0]
+@8022
+dct_tests_01.dct_iv_8x8_i.dctv_debug_xin0[7:0]
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_debug_xin1[7:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_start_0_r
+dct_tests_01.dct_iv_8x8_i.dctv_start_1_r
+dct_tests_01.dct_iv_8x8_i.dctv_en0
+dct_tests_01.dct_iv_8x8_i.dctv_en1
+dct_tests_01.dct_iv_8x8_i.dctv_en_out0
+dct_tests_01.dct_iv_8x8_i.dctv_en_out1
+@c00022
+dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0]
+@28
+(0)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0]
+(1)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0]
+(2)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0]
+@1401200
+-group_end
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_yindex1[2:0]
+dct_tests_01.dct_iv_8x8_i.dctv_dout0[24:0]
+dct_tests_01.dct_iv_8x8_i.dctv_dout1[24:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_out_start
+dct_tests_01.dct_iv_8x8_i.dctv_out_run
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_out_cntr[6:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_out_we_1
+dct_tests_01.dct_iv_8x8_i.dctv_out_sel
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_out_wa_1[3:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_out_start_1
+dct_tests_01.dct_iv_8x8_i.dctv_out_run_1
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1[6:0]
+dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1_w[3:0]
+dct_tests_01.dct_iv_8x8_i.dctv_out_reg_1[23:0]
+dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_1[2:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_out_we_2
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_out_wa_2[1:0]
+@28
+dct_tests_01.dct_iv_8x8_i.dctv_out_run_2
+@22
+dct_tests_01.dct_iv_8x8_i.dctv_out_ra_2[6:0]
+dct_tests_01.dct_iv_8x8_i.dctv_out_reg_2[23:0]
+dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_2[2:0]
+@1000200
+-dct_iv_8x8
+@800200
+-dct_iv_8x8r
+@200
+-
+@1000200
+-dct_iv_8x8r
+-st22d_test
[pattern_trace] 1
[pattern_trace] 0
diff --git a/dsp/dct_iv.ods b/dsp/dct_iv.ods
index 8248d0a44f3221fa05b88e9873f942ee8610c63c..6d256b7f472248552ae39627f0aa3965b407e248 100644
Binary files a/dsp/dct_iv.ods and b/dsp/dct_iv.ods differ
diff --git a/dsp/dct_iv8_1d.v b/dsp/dct_iv8_1d.v
index ea93dfcb2adc9441d7ac2a155533178ff83b7774..fdd0be20a79a838c9841e995d3979253d64f101e 100644
--- a/dsp/dct_iv8_1d.v
+++ b/dsp/dct_iv8_1d.v
@@ -71,7 +71,9 @@ module dct_iv8_1d#(
output [OUT_WIDTH -1:0] dout,
output reg pre2_start_out, // 2 clock cycle before Y0 output, full dout sequence
// start_out-x-Y0-x-Y7-x-Y4-x-Y3-x-Y1-x-Y6-x-Y2-x-Y5
- output reg en_out // valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
+ output en_out, // valid at the same time slot as pre2_start_out (goes active with pre2_start_out), 1 ahead of data
+ output reg [2:0] y_index // for simulation - valid with dout - index of the data output
+
);
// X6-X7-X5-X2-X1-X3-X0-X4-*-X5-X1-X2-*-X4-X7-*
// X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
@@ -132,6 +134,9 @@ module dct_iv8_1d#(
reg run_in; // receiving input data
reg restart; // restarting next block if en was active at phase=14;
reg run_out; // running output data
+ reg en_out_r;
+
+ assign en_out = en_out_r;
assign dsp_ain_2 = dsp_p_1 [STAGE1_RSHIFT +: A_WIDTH];
@@ -147,6 +152,23 @@ module dct_iv8_1d#(
wire din_zero = ~(|d_in);
assign dsp_cin_1 = {{P_WIDTH-WIDTH-COSINE_SHIFT{d_in[WIDTH-1]}},d_in,~d_in[WIDTH-1]^din_zero,{COSINE_SHIFT-1{d_in[WIDTH-1]}}};
+ always @ (posedge clk) begin
+ if (en_out_r) begin
+ case (phase_cnt[3:1])
+ 3'h0: y_index <= 7;
+ 3'h1: y_index <= 4;
+ 3'h2: y_index <= 3;
+ 3'h3: y_index <= 1;
+ 3'h4: y_index <= 6;
+ 3'h5: y_index <= 2;
+ 3'h6: y_index <= 5;
+ 3'h7: y_index <= 0;
+ endcase
+ end else begin
+ y_index <= 'bx;
+ end
+ end
+
//register files
assign dsp_din_1 = dsp_din_1_ram[dsp_din_1_ra];
@@ -173,7 +195,7 @@ module dct_iv8_1d#(
pre2_start_out <= run_out && (phase_cnt == 14);
- en_out <= run_out && !phase_cnt[0];
+ en_out_r <= run_out && !phase_cnt[0];
// Cosine table, defined to fit into 17 bits for 18-bit signed DSP B-operand
case (phase_cnt)
diff --git a/dsp/dct_iv_8x8.v b/dsp/dct_iv_8x8.v
new file mode 100644
index 0000000000000000000000000000000000000000..73ac5e832e517e94574ca32f965d36fa84b65b2f
--- /dev/null
+++ b/dsp/dct_iv_8x8.v
@@ -0,0 +1,609 @@
+/*!
+ * Module:dct_iv_8x8
+ * @file dct_iv_8x8.v
+ * @date 2016-12-08
+ * @author Andrey Filippov
+ *
+ * @brief 2-d DCT-IV implementation, 1 clock/data word. Input in scanline order, output - transposed
+ *
+ * @copyright Copyright (c) 2016 Elphel, Inc.
+ *
+ * License:
+ *
+ *dct_iv_8x8.v is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dct_iv_8x8.v is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ * Additional permission under GNU GPL version 3 section 7:
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with independent modules provided by the FPGA vendor only (this permission
+ * does not extend to any 3-rd party modules, "soft cores" or macros) under
+ * different license terms solely for the purpose of generating binary "bitstream"
+ * files and/or simulating the code, the copyright holders of this Program give
+ * you the right to distribute the covered work without those independent modules
+ * as long as the source code for them is available from the FPGA vendor free of
+ * charge, and there is no dependence on any encrypted modules for simulating of
+ * the combined code. This permission applies to you if the distributed code
+ * contains all the components and scripts required to completely simulate it
+ * with at least one of the Free Software programs.
+ */
+`timescale 1ns/1ps
+
+module dct_iv_8x8#(
+ parameter INPUT_WIDTH = 25,
+ parameter OUT_WIDTH = 25,
+ parameter OUT_RSHIFT1 = 3, // overall right shift of the result from input, aligned by MSB for pass1 (>=3 will never cause saturation)
+ parameter OUT_RSHIFT2 = 0, // overall right shift of the result from input, aligned by MSB for pass2 (>=3 will never cause saturation)
+ parameter TRANSPOSE_WIDTH = 25, // transpose memory width
+ parameter DSP_B_WIDTH = 18,
+ parameter DSP_A_WIDTH = 25,
+ parameter DSP_P_WIDTH = 48,
+ parameter COSINE_SHIFT= 17,
+ parameter COS_01_32 = 130441, // int(round((1<<17) * cos( 1*pi/32)))
+ parameter COS_03_32 = 125428, // int(round((1<<17) * cos( 3*pi/32)))
+ parameter COS_04_32 = 121095, // int(round((1<<17) * cos( 4*pi/32)))
+ parameter COS_05_32 = 115595, // int(round((1<<17) * cos( 5*pi/32)))
+ parameter COS_07_32 = 101320, // int(round((1<<17) * cos( 7*pi/32)))
+ parameter COS_08_32 = 92682, // int(round((1<<17) * cos( 8*pi/32)))
+ parameter COS_09_32 = 83151, // int(round((1<<17) * cos( 9*pi/32)))
+ parameter COS_11_32 = 61787, // int(round((1<<17) * cos(11*pi/32)))
+ parameter COS_12_32 = 50159, // int(round((1<<17) * cos(12*pi/32)))
+ parameter COS_13_32 = 38048, // int(round((1<<17) * cos(13*pi/32)))
+ parameter COS_15_32 = 12847 // int(round((1<<17) * cos(15*pi/32)))
+ ) (
+ input clk, //!< system clock, posedge
+ input rst, //!< sync reset
+ input start, //!< single-cycle start pulse that goes with the first pixel data.
+ // Next data should be sent in bursts of 8, pause of 8 - total 128 cycles
+ input signed [INPUT_WIDTH-1:0] xin, //!< input data
+ output pre_last_in, //!< output high during input of the pre-last of 64 pixels in a 8x8 block (next can be start
+ output reg pre_first_out, //!< 1 cycle ahead of the first output in a 64 block
+ output reg dv, //!< data output valid. WAS: Will go high on the 94-th cycle after the start
+ output signed [OUT_WIDTH-1:0] d_out, //!< output data
+ output reg pre_busy); //!< start should come each 64-th cycle (next after pre_last_in), and not after pre_busy)
+
+// 1. Two 16xINPUT_WIDTH memories to feed two of the 'horizontal' 1-dct - they should provide outputs shifted by 1 clock
+// 2. of the horizontal DCTs
+// 3. common transpose memory plus 2 input reorder memory for each of the vertical DCT
+// 4. 2 of the vertical DCTs
+// 5. small memory to combine/reorder outputs (2 stages as 1 x16 memory is not enough)
+ reg x_run;
+ reg [5:0] x_wa;
+ wire dcth_phin_start = x_run && (x_wa[5:0] == 6);
+ reg dcth_phin_run;
+ reg dcth_en0;
+ reg dcth_en1;
+ reg [6:0] dcth_phin;
+ reg [2:0] x_ra0;
+ reg [2:0] x_ra1;
+ reg signed [INPUT_WIDTH-1:0] x_ram0[0:7];
+ reg signed [INPUT_WIDTH-1:0] x_ram1[0:7];
+ reg signed [INPUT_WIDTH-1:0] dcth_xin0;
+ reg signed [INPUT_WIDTH-1:0] dcth_xin1;
+
+ wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout0;
+ wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout1;
+// wire dcth_pre2_start_out0;
+// wire dcth_pre2_start_out1;
+ wire dcth_en_out0;
+ wire dcth_en_out1;
+
+ wire dcth_start_0_w = dcth_phin_run && (dcth_phin [6:0] ==0);
+ wire dcth_start_1_w = dcth_phin_run && (dcth_phin [6:0] ==9);
+
+ reg dcth_start_0_r;
+ reg dcth_start_1_r;
+
+ reg [1:0] transpose_w_page;
+ reg [6:0] transpose_cntr; // transpose memory counter, [6] == 1 when the last page is being finished
+ reg transpose_in_run;
+ wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h10);
+ reg [2:0] transpose_wa_low; // [2:0] transpose memory low address bits, [3] - other group (of 16)
+ reg [4:0] transpose_wa_high; // high bits of transpose memory write address
+ wire [7:0] transpose_wa = {transpose_wa_high,transpose_wa_low};
+ wire transpose_wa_decr = (transpose_cntr[0] & ~transpose_cntr[3]);
+ reg transpose_we;
+ wire [TRANSPOSE_WIDTH-1:0] transpose_di = transpose_cntr[0]? dcth_dout0: dcth_dout1;
+
+ reg [TRANSPOSE_WIDTH-1:0] transpose_ram[0:255];
+ wire [2:0] dcth_yindex0;
+ wire [2:0] dcth_yindex1;
+ wire [7:0] transpose_debug_di= {transpose_wa_high, transpose_cntr[0]? dcth_yindex0: dcth_yindex1};
+ reg [7:0] transpose_debug_ram[0:255];
+
+ reg [6:0] transpose_rcntr; // transpose read memory counter, [6] == 1 when the last page is being finished
+ reg [2:0] transpose_out_run;
+ wire transpose_out_start = transpose_in_run && (transpose_cntr[6:0] == 7'h34); // 7'h33 is actual minimum
+ reg [1:0] transpose_r_page;
+
+ reg [TRANSPOSE_WIDTH-1:0] transpose_reg; // internal BRAM register
+ reg [TRANSPOSE_WIDTH-1:0] transpose_out; // output BRAM register
+
+ reg [7:0] transpose_debug_reg; // internal BRAM register
+ reg [7:0] transpose_debug_out; // output BRAM register
+ wire [7:0] transpose_ra = {transpose_r_page, transpose_rcntr[2:0], transpose_rcntr[5:3]};
+ reg [3:0] t_wa;
+ wire t_we0 = transpose_out_run[2] && !t_wa[3];
+ wire t_we1 = transpose_out_run[2] && t_wa[3];
+ reg signed [TRANSPOSE_WIDTH-1:0] t_ram0[0:7];
+ reg signed [TRANSPOSE_WIDTH-1:0] t_ram1[0:7];
+ reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin0;
+ reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin1;
+
+ reg signed [7:0] t_debug_ram0[0:7];
+ reg signed [7:0] t_debug_ram1[0:7];
+ reg signed [7:0] dctv_debug_xin0; // SuppressThisWarning VEditor - simulation only
+ reg signed [7:0] dctv_debug_xin1; // SuppressThisWarning VEditor - simulation only
+
+ wire signed [OUT_WIDTH-1:0] dctv_dout0;
+ wire signed [OUT_WIDTH-1:0] dctv_dout1;
+ wire dctv_en_out0;
+ wire dctv_en_out1;
+ wire [2:0] dctv_yindex0;
+ wire [2:0] dctv_yindex1;
+
+ wire dctv_phin_start = transpose_out_run && (transpose_rcntr[5:0] == 8);
+ reg dctv_phin_run;
+
+ reg dctv_en0;
+ reg dctv_en1;
+ reg [6:0] dctv_phin;
+ reg [2:0] t_ra0;
+ reg [2:0] t_ra1;
+ wire dctv_start_0_w = dctv_phin_run && (dctv_phin [6:0] ==0);
+ wire dctv_start_1_w = dctv_phin_run && (dctv_phin [6:0] ==9);
+ reg dctv_start_0_r;
+ reg dctv_start_1_r;
+
+ reg pre_last_in_r;
+
+ reg [6:0] dctv_out_cntr; // count output data from second (vertical) pass (bit 6 - stopping)
+ reg dctv_out_run; //
+ wire dctv_out_start = dctv_phin [6:0] == 'h10;
+
+ reg [3:0] dctv_out_wa_1;
+ reg dctv_out_we_1;
+ reg dctv_out_sel; // select DCTv channel output;
+ reg signed [OUT_WIDTH-1:0] dctv_out_ram_1[0:15];
+ reg [2:0] dctv_out_debug_ram_1[0:15];
+
+ reg [6:0] dctv_out_ra_1;
+ wire [3:0] dctv_out_ra_1_w = {dctv_out_ra_1[3:1], ~dctv_out_ra_1[0]};
+ wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0c; // 'h0b;
+ reg dctv_out_run_1;
+ reg signed [OUT_WIDTH-1:0] dctv_out_reg_1;
+ reg [2:0] dctv_out_debug_reg_1;
+
+ reg signed [OUT_WIDTH-1:0] dctv_out_ram_2[0:3];
+ reg [2:0] dctv_out_debug_ram_2[0:3];
+ reg dctv_out_we_2;
+ reg [1:0] dctv_out_wa_2;
+ reg [6:0] dctv_out_ra_2;
+ wire dctv_out_start_2 = dctv_out_ra_1[6:0] == 2;
+ reg dctv_out_run_2;
+ reg signed [OUT_WIDTH-1:0] dctv_out_reg_2;
+ reg [2:0] dctv_out_debug_reg_2; // SuppressThisWarning VEditor - simulation only
+
+ assign d_out = dctv_out_reg_2;
+
+ assign pre_last_in = pre_last_in_r;
+
+ always @ (posedge clk) begin
+ if (rst) x_run <= 0;
+ else if (start) x_run <= 1;
+ else if (&x_wa[5:0]) x_run <= 0;
+
+ if (!x_run) x_wa <= 0;
+ else x_wa <= x_wa + 1;
+
+ pre_last_in_r <= x_run && (x_wa[5:0] == 'h3d);
+
+ if (rst) pre_busy <= 0;
+ else if (pre_last_in_r) pre_busy <= 1;
+ else if (dcth_phin [5:0] == 5) pre_busy <= 0; // check actual?
+
+ if (rst) dcth_phin_run <= 0;
+ else if (dcth_phin_start) dcth_phin_run <= 1;
+ else if (dcth_phin [6:0] == 7'h48) dcth_phin_run <= 0; // check actual?
+
+ if (!dcth_phin_run || dcth_phin_start) dcth_phin <= 0;
+ else dcth_phin <= dcth_phin + 1;
+
+ if (rst) dcth_en0 <= 0;
+ else if (dcth_start_0_w) dcth_en0 <= 1;
+ else if (!x_run) dcth_en0 <= 0; // maybe get rid of this signal and send start for each 8?
+
+ if (rst) dcth_en1 <= 0;
+ else if (dcth_start_1_w) dcth_en1 <= 1;
+ else if (dcth_phin [6]) dcth_en1 <= 0; // maybe get rid of this signal and send start for each 8?
+
+ //write input reorder memory
+ if (x_run && !x_wa[3]) x_ram0[x_wa[2:0]] <= xin;
+ if (x_run && x_wa[3]) x_ram1[x_wa[2:0]] <= xin;
+
+ //read input reorder memory
+ dcth_xin0 <= x_ram0[x_ra0[2:0]];
+ dcth_xin1 <= x_ram1[x_ra1[2:0]];
+
+ dcth_start_0_r <= dcth_start_0_w;
+ dcth_start_1_r <= dcth_start_1_w;
+
+ if (rst) transpose_in_run <= 0;
+ else if (transpose_start) transpose_in_run <= 1;
+ else if (transpose_cntr [6:0] == 7'h46) transpose_in_run <= 0; // check actual?
+
+ if (!transpose_in_run || transpose_start) transpose_cntr <= 0;
+ else transpose_cntr <= transpose_cntr + 1;
+
+ if (rst) transpose_w_page <= 0;
+ else if (transpose_in_run && (&transpose_cntr[5:0])) transpose_w_page <= transpose_w_page + 1;
+
+ case (transpose_cntr[3:0])
+ 4'h0: transpose_wa_low <= 0;
+ 4'h1: transpose_wa_low <= 1;
+ 4'h2: transpose_wa_low <= 7;
+ 4'h3: transpose_wa_low <= 6;
+ 4'h4: transpose_wa_low <= 4;
+ 4'h5: transpose_wa_low <= 2;
+ 4'h6: transpose_wa_low <= 3;
+ 4'h7: transpose_wa_low <= 5;
+ 4'h8: transpose_wa_low <= 1;
+ 4'h9: transpose_wa_low <= 0;
+ 4'ha: transpose_wa_low <= 6;
+ 4'hb: transpose_wa_low <= 7;
+ 4'hc: transpose_wa_low <= 2;
+ 4'hd: transpose_wa_low <= 4;
+ 4'he: transpose_wa_low <= 5;
+ 4'hf: transpose_wa_low <= 3;
+ endcase
+ transpose_wa_high <= {transpose_w_page, transpose_cntr[5:4], transpose_cntr[0]} - {transpose_wa_decr,1'b0};
+ transpose_we <= dcth_en_out0 || dcth_en_out1;
+ // Write transpose memory)
+ if (transpose_we) transpose_ram[transpose_wa] <= transpose_di;
+ if (transpose_we) transpose_debug_ram[transpose_wa] <= transpose_debug_di;
+// if (transpose_we) $display("%d %d @%t",transpose_cntr, transpose_wa, $time) ;
+
+ if (rst) transpose_out_run[0] <= 0;
+ else if (transpose_out_start) transpose_out_run[0] <= 1;
+ else if (&transpose_rcntr[5:0]) transpose_out_run[0] <= 0; // check actual?
+
+ transpose_out_run[2:1] <= transpose_out_run[1:0];
+
+ if (!transpose_out_run[0] || transpose_out_start) transpose_rcntr <= 0;
+ else transpose_rcntr <= transpose_rcntr + 1;
+
+ if (transpose_out_start) transpose_r_page <= transpose_w_page;
+
+ // Read transpose memory to 2 small reorder memories, use BRAM register
+ if (transpose_out_run[0]) transpose_reg <= transpose_ram[transpose_ra];
+ if (transpose_out_run[1]) transpose_out <= transpose_reg;
+ if (transpose_out_run[0]) transpose_debug_reg <= transpose_debug_ram[transpose_ra];
+ if (transpose_out_run[1]) transpose_debug_out <= transpose_debug_reg;
+
+ if (!transpose_out_run[2]) t_wa <= 0;
+ else t_wa <= t_wa+1;
+
+ if (rst) dctv_phin_run <= 0;
+ else if (dctv_phin_start) dctv_phin_run <= 1;
+ else if (dctv_phin [6:0] == 7'h48) dctv_phin_run <= 0; // check actual?
+
+
+ if (!dctv_phin_run || dctv_phin_start) dctv_phin <= 0;
+ else dctv_phin <= dctv_phin + 1;
+
+ if (rst) dctv_en0 <= 0;
+ else if (dctv_start_0_w) dctv_en0 <= 1;
+ else if (!transpose_out_run[2]) dctv_en0 <= 0; // maybe get rid of this signal and send satrt for each 8?
+
+ if (rst) dctv_en1 <= 0;
+ else if (dctv_start_1_w) dctv_en1 <= 1;
+ else if (dctv_phin[6]) dctv_en1 <= 0; // maybe get rid of this signal and send satrt for each 8?
+
+ if (t_we0 || t_we1) $display("%d %d",transpose_rcntr-2, transpose_out) ;
+
+ //write vertical dct input reorder memory
+ if (t_we0) t_ram0[t_wa[2:0]] <= transpose_out;
+ if (t_we1) t_ram1[t_wa[2:0]] <= transpose_out;
+
+ if (t_we0) t_debug_ram0[t_wa[2:0]] <= transpose_debug_out;
+ if (t_we1) t_debug_ram1[t_wa[2:0]] <= transpose_debug_out;
+
+ //read vertical dct input reorder memory
+ dctv_xin0 <= t_ram0[t_ra0[2:0]];
+ dctv_xin1 <= t_ram1[t_ra1[2:0]];
+
+ dctv_start_0_r <= dctv_start_0_w;
+ dctv_start_1_r <= dctv_start_1_w;
+
+ dctv_debug_xin0 <= t_debug_ram0[t_ra0[2:0]];
+ dctv_debug_xin1 <= t_debug_ram1[t_ra1[2:0]];
+
+ // Reordering data from a pair of vertical DCTs - 2 steps, 1 is not enough
+ if (rst) dctv_out_run <= 0;
+ else if (dctv_out_start) dctv_out_run <= 1;
+ else if (dctv_out_cntr[6:0] == 'h47) dctv_out_run <= 0;
+
+ if (!dctv_out_run || dctv_out_start) dctv_out_cntr <= 0;
+ else dctv_out_cntr <= dctv_out_cntr + 1;
+
+ dctv_out_we_1 <= dctv_en_out0 || dctv_en_out1;
+
+ dctv_out_sel <= dctv_out_cntr[0];
+
+ case (dctv_out_cntr[3:0])
+ 4'h0: dctv_out_wa_1 <= 0;
+ 4'h1: dctv_out_wa_1 <= 9;
+ 4'h2: dctv_out_wa_1 <= 7;
+ 4'h3: dctv_out_wa_1 <= 14;
+ 4'h4: dctv_out_wa_1 <= 4;
+ 4'h5: dctv_out_wa_1 <= 10;
+ 4'h6: dctv_out_wa_1 <= 3;
+ 4'h7: dctv_out_wa_1 <= 13;
+ 4'h8: dctv_out_wa_1 <= 1;
+ 4'h9: dctv_out_wa_1 <= 8;
+ 4'ha: dctv_out_wa_1 <= 6;
+ 4'hb: dctv_out_wa_1 <= 15;
+ 4'hc: dctv_out_wa_1 <= 2;
+ 4'hd: dctv_out_wa_1 <= 12;
+ 4'he: dctv_out_wa_1 <= 5;
+ 4'hf: dctv_out_wa_1 <= 11;
+ endcase
+
+ // write first stage of output reordering
+ if (dctv_out_we_1) dctv_out_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_dout1: dctv_dout0;
+ if (dctv_out_we_1) dctv_out_debug_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_yindex1: dctv_yindex0;
+
+ if (rst) dctv_out_run_1 <= 0;
+ else if (dctv_out_start_1) dctv_out_run_1 <= 1;
+ else if (&dctv_out_ra_1[5:0]) dctv_out_run_1 <= 0;
+
+ if (!dctv_out_run_1 || dctv_out_start_1) dctv_out_ra_1 <= 0;
+ else dctv_out_ra_1 <= dctv_out_ra_1 + 1;
+ // reading first stage of output reorder RAM
+ if (dctv_out_run_1) dctv_out_reg_1 <= dctv_out_ram_1[dctv_out_ra_1_w];
+ if (dctv_out_run_1) dctv_out_debug_reg_1 <= dctv_out_debug_ram_1[dctv_out_ra_1_w];
+
+ // last stage of the output reordering - 4 register memory
+
+ dctv_out_we_2 <= dctv_out_run_1;
+ dctv_out_wa_2 <= dctv_out_ra_1_w[1:0];
+
+ // write first stage of output reordering
+ if (dctv_out_we_2) dctv_out_ram_2[dctv_out_wa_2] <= dctv_out_reg_1;
+ if (dctv_out_we_2) dctv_out_debug_ram_2[dctv_out_wa_2] <= dctv_out_debug_reg_1;
+
+ if (rst) dctv_out_run_2 <= 0;
+ else if (dctv_out_start_2) dctv_out_run_2 <= 1;
+ else if (&dctv_out_ra_2[5:0]) dctv_out_run_2 <= 0;
+
+ if (!dctv_out_run_2 || dctv_out_start_2) dctv_out_ra_2 <= 0;
+ else dctv_out_ra_2 <= dctv_out_ra_2 + 1;
+
+ // reading first stage of output reorder RAM
+ if (dctv_out_run_2) dctv_out_reg_2 <= dctv_out_ram_2[dctv_out_ra_2[1:0]];
+ if (dctv_out_run_2) dctv_out_debug_reg_2 <= dctv_out_debug_ram_2[dctv_out_ra_2[1:0]];
+
+ pre_first_out <= dctv_out_ra_1[6:0] == 2;
+
+ dv <= dctv_out_run_2;
+ end
+
+ always @ (posedge clk) begin
+ //X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
+ case (dcth_phin[3:0])
+ 4'h0: x_ra0 <= 2;
+ 4'h1: x_ra0 <= 7;
+ 4'h2: x_ra0 <= 3;
+ 4'h3: x_ra0 <= 4;
+ 4'h4: x_ra0 <= 5;
+ 4'h5: x_ra0 <= 6;
+ 4'h6: x_ra0 <= 0;
+ 4'h7: x_ra0 <= 1;
+ 4'h8: x_ra0 <= 'bx;
+ 4'h9: x_ra0 <= 3;
+ 4'ha: x_ra0 <= 5;
+ 4'hb: x_ra0 <= 4;
+ 4'hc: x_ra0 <= 'bx;
+ 4'hd: x_ra0 <= 6;
+ 4'he: x_ra0 <= 7;
+ 4'hf: x_ra0 <= 'bx;
+ endcase
+ case (dcth_phin[3:0])
+ 4'h0: x_ra1 <= 1;
+ 4'h1: x_ra1 <= 'bx;
+ 4'h2: x_ra1 <= 3;
+ 4'h3: x_ra1 <= 5;
+ 4'h4: x_ra1 <= 4;
+ 4'h5: x_ra1 <= 'bx;
+ 4'h6: x_ra1 <= 6;
+ 4'h7: x_ra1 <= 7;
+ 4'h8: x_ra1 <= 'bx;
+ 4'h9: x_ra1 <= 2;
+ 4'ha: x_ra1 <= 7;
+ 4'hb: x_ra1 <= 3;
+ 4'hc: x_ra1 <= 4;
+ 4'hd: x_ra1 <= 5;
+ 4'he: x_ra1 <= 6;
+ 4'hf: x_ra1 <= 0;
+ endcase
+ end
+
+ always @ (posedge clk) begin
+ //X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
+ case (dctv_phin[3:0])
+ 4'h0: t_ra0 <= 2;
+ 4'h1: t_ra0 <= 7;
+ 4'h2: t_ra0 <= 3;
+ 4'h3: t_ra0 <= 4;
+ 4'h4: t_ra0 <= 5;
+ 4'h5: t_ra0 <= 6;
+ 4'h6: t_ra0 <= 0;
+ 4'h7: t_ra0 <= 1;
+ 4'h8: t_ra0 <= 'bx;
+ 4'h9: t_ra0 <= 3;
+ 4'ha: t_ra0 <= 5;
+ 4'hb: t_ra0 <= 4;
+ 4'hc: t_ra0 <= 'bx;
+ 4'hd: t_ra0 <= 6;
+ 4'he: t_ra0 <= 7;
+ 4'hf: t_ra0 <= 'bx;
+ endcase
+ case (dctv_phin[3:0])
+ 4'h0: t_ra1 <= 1;
+ 4'h1: t_ra1 <= 'bx;
+ 4'h2: t_ra1 <= 3;
+ 4'h3: t_ra1 <= 5;
+ 4'h4: t_ra1 <= 4;
+ 4'h5: t_ra1 <= 'bx;
+ 4'h6: t_ra1 <= 6;
+ 4'h7: t_ra1 <= 7;
+ 4'h8: t_ra1 <= 'bx;
+ 4'h9: t_ra1 <= 2;
+ 4'ha: t_ra1 <= 7;
+ 4'hb: t_ra1 <= 3;
+ 4'hc: t_ra1 <= 4;
+ 4'hd: t_ra1 <= 5;
+ 4'he: t_ra1 <= 6;
+ 4'hf: t_ra1 <= 0;
+ endcase
+ end
+
+ dct_iv8_1d #(
+ .WIDTH (INPUT_WIDTH),
+ .OUT_WIDTH (TRANSPOSE_WIDTH),
+ .OUT_RSHIFT (OUT_RSHIFT1),
+ .B_WIDTH (DSP_B_WIDTH),
+ .A_WIDTH (DSP_A_WIDTH),
+ .P_WIDTH (DSP_P_WIDTH),
+ .COSINE_SHIFT (COSINE_SHIFT),
+ .COS_01_32 (COS_01_32),
+ .COS_03_32 (COS_03_32),
+ .COS_04_32 (COS_04_32),
+ .COS_05_32 (COS_05_32),
+ .COS_07_32 (COS_07_32),
+ .COS_08_32 (COS_08_32),
+ .COS_09_32 (COS_09_32),
+ .COS_11_32 (COS_11_32),
+ .COS_12_32 (COS_12_32),
+ .COS_13_32 (COS_13_32),
+ .COS_15_32 (COS_15_32)
+ ) dct_iv8_1d_pass1_0_i (
+ .clk (clk), // input
+ .rst (rst), // input
+ .en (dcth_en0), // input
+ .d_in (dcth_xin0), // input[23:0]
+ .start (dcth_start_0_r), // input
+ .dout (dcth_dout0), // output[23:0]
+ .pre2_start_out (), // output reg
+ .en_out (dcth_en_out0), // output reg
+ .y_index (dcth_yindex0) // output[2:0] reg
+
+ );
+
+ dct_iv8_1d #(
+ .WIDTH (INPUT_WIDTH),
+ .OUT_WIDTH (TRANSPOSE_WIDTH),
+ .OUT_RSHIFT (OUT_RSHIFT1),
+ .B_WIDTH (DSP_B_WIDTH),
+ .A_WIDTH (DSP_A_WIDTH),
+ .P_WIDTH (DSP_P_WIDTH),
+ .COSINE_SHIFT (COSINE_SHIFT),
+ .COS_01_32 (COS_01_32),
+ .COS_03_32 (COS_03_32),
+ .COS_04_32 (COS_04_32),
+ .COS_05_32 (COS_05_32),
+ .COS_07_32 (COS_07_32),
+ .COS_08_32 (COS_08_32),
+ .COS_09_32 (COS_09_32),
+ .COS_11_32 (COS_11_32),
+ .COS_12_32 (COS_12_32),
+ .COS_13_32 (COS_13_32),
+ .COS_15_32 (COS_15_32)
+ ) dct_iv8_1d_pass1_1_i (
+ .clk (clk), // input
+ .rst (rst), // input
+ .en (dcth_en1), // input
+ .d_in (dcth_xin1), // input[23:0]
+ .start (dcth_start_1_r), // input
+ .dout (dcth_dout1), // output[23:0]
+ .pre2_start_out (), // output reg
+ .en_out (dcth_en_out1), // output reg
+ .y_index (dcth_yindex1) // output[2:0] reg
+
+ );
+//dcth_phin_run && (dcth_phin [6:0] ==9)
+
+ dct_iv8_1d #(
+ .WIDTH (TRANSPOSE_WIDTH),
+ .OUT_WIDTH (OUT_WIDTH),
+ .OUT_RSHIFT (OUT_RSHIFT2),
+ .B_WIDTH (DSP_B_WIDTH),
+ .A_WIDTH (DSP_A_WIDTH),
+ .P_WIDTH (DSP_P_WIDTH),
+ .COSINE_SHIFT (COSINE_SHIFT),
+ .COS_01_32 (COS_01_32),
+ .COS_03_32 (COS_03_32),
+ .COS_04_32 (COS_04_32),
+ .COS_05_32 (COS_05_32),
+ .COS_07_32 (COS_07_32),
+ .COS_08_32 (COS_08_32),
+ .COS_09_32 (COS_09_32),
+ .COS_11_32 (COS_11_32),
+ .COS_12_32 (COS_12_32),
+ .COS_13_32 (COS_13_32),
+ .COS_15_32 (COS_15_32)
+ ) dct_iv8_1d_pass2_0_i (
+ .clk (clk), // input
+ .rst (rst), // input
+ .en (dctv_en0), // input
+ .d_in (dctv_xin0), // input[23:0]
+ .start (dctv_start_0_r), // input
+ .dout (dctv_dout0), // output[23:0]
+ .pre2_start_out (), // output reg
+ .en_out (dctv_en_out0), // output reg
+ .y_index (dctv_yindex0) // output[2:0] reg
+
+ );
+
+ dct_iv8_1d #(
+ .WIDTH (TRANSPOSE_WIDTH),
+ .OUT_WIDTH (OUT_WIDTH),
+ .OUT_RSHIFT (OUT_RSHIFT2),
+ .B_WIDTH (DSP_B_WIDTH),
+ .A_WIDTH (DSP_A_WIDTH),
+ .P_WIDTH (DSP_P_WIDTH),
+ .COSINE_SHIFT (COSINE_SHIFT),
+ .COS_01_32 (COS_01_32),
+ .COS_03_32 (COS_03_32),
+ .COS_04_32 (COS_04_32),
+ .COS_05_32 (COS_05_32),
+ .COS_07_32 (COS_07_32),
+ .COS_08_32 (COS_08_32),
+ .COS_09_32 (COS_09_32),
+ .COS_11_32 (COS_11_32),
+ .COS_12_32 (COS_12_32),
+ .COS_13_32 (COS_13_32),
+ .COS_15_32 (COS_15_32)
+ ) dct_iv8_1d_pass2_1_i (
+ .clk (clk), // input
+ .rst (rst), // input
+ .en (dctv_en1), // input
+ .d_in (dctv_xin1), // input[23:0]
+ .start (dctv_start_1_r), // input
+ .dout (dctv_dout1), // output[23:0]
+ .pre2_start_out (), // output reg
+ .en_out (dctv_en_out1), // output reg
+ .y_index (dctv_yindex1) // output[2:0] reg
+ );
+
+endmodule
+
diff --git a/dsp/dct_tests_01.tf b/dsp/dct_tests_01.tf
index 0e6e0633c361672fd131752ec41ee59170c17e80..cde8694062d0177900ae7e565d2a4f3579f7bdbe 100644
--- a/dsp/dct_tests_01.tf
+++ b/dsp/dct_tests_01.tf
@@ -40,7 +40,7 @@
`timescale 1ns/1ps
// No saturation here, and no rounding as we do not need to match decoder (be bit-precise), skipping rounding adder
// will reduce needed resources
-//`define DCT_INPUT_UNITY
+`define DCT_INPUT_UNITY
module dct_tests_01 ();
// parameter fstname="dct_tests_01.fst";
`ifdef IVERILOG
@@ -61,11 +61,16 @@ module dct_tests_01 ();
`endif // CVC
`endif // IVERILOG
- parameter CLK_PERIOD = 10; // ns
- parameter WIDTH = 24; // input data width
-// parameter OUT_WIDTH = 16; // output data width
- parameter OUT_WIDTH = 24; // output data width
- parameter OUT_RSHIFT = 3; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation)
+ parameter CLK_PERIOD = 10; // ns
+ parameter WIDTH = 24; // input data width
+// parameter OUT_WIDTH = 16; // output data width
+ parameter OUT_WIDTH = 24; // output data width
+ parameter TRANSPOSE_WIDTH = 25; // width of the transpose memory (intermediate results)
+ parameter OUT_RSHIFT = 3; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation)
+ parameter OUT_RSHIFT2 = 0; // overall right shift for the second (vertical) pass
+
+ parameter DCT_GAP = 16; // between runs
+
reg RST = 1'b1;
reg CLK = 1'b0;
@@ -83,11 +88,13 @@ module dct_tests_01 ();
wire x_we = !phase_in[3] && run_in;
reg [WIDTH-1:0] x_in;
+ reg [WIDTH-1:0] x_in_2d;
reg [WIDTH-1:0] x_out;
reg [WIDTH-1:0] x_ram[0:7];
wire [WIDTH-1:0] x_out_w = x_ram[x_ra];
reg start = 0;
+ reg start2 = 0; // second start for 2d
wire [OUT_WIDTH-1:0] y_dct; // S uppressThisWarning VEditor - simulation only
wire pre2_start_out; // S uppressThisWarning VEditor - simulation only
@@ -103,13 +110,28 @@ module dct_tests_01 ();
wire signed [OUT_WIDTH-1:0] y_out = y_ram[y_ra]; // SuppressThisWarning VEditor - simulation only
reg signed [WIDTH-1:0] data_in[0:63];
reg signed [OUT_WIDTH-1:0] data_out[0:63];
- integer i,j;
+
+ reg signed [WIDTH-1:0] d_in;
+ wire pre_last_in_2d;
+ wire pre_first_out_2d;
+ wire pre_busy_2d;
+ wire dv_2d;
+ wire signed [OUT_WIDTH-1:0] d_out_2d;
+
+ wire pre_last_in_2dr;
+ wire pre_first_out_2dr;
+ wire pre_busy_2dr;
+ wire dv_2dr;
+ wire signed [OUT_WIDTH-1:0] d_out_2dr;
+
+
+ integer i,j, i1, j1;
initial begin
for (i=0; i<64; i=i+1) begin
`ifdef DCT_INPUT_UNITY
- data_in[i] = (i[2:0] == i[5:3]) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
+ data_in[i] = (i[2:0] == i[5:3]) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
`else
- data_in[i] = $random;
+ data_in[i] = $random;
`endif
end
$display("Input data in line-scan order:");
@@ -147,23 +169,6 @@ module dct_tests_01 ();
if (&i[2:0]) repeat (8) @(posedge CLK);
end
#1 x_in = 0;
-/*
- // running 'one' - just make a period == 17
- repeat (7) begin
- @(posedge CLK);
-#1 x_in = {2'b1,{WIDTH-2{1'b0}}}; // >>x_wa;
- @(posedge CLK);
-#1 x_in = 0;
- repeat (15) @(posedge CLK); // 16+1= 17, non-zero will go through all of the 8 x[i]
- end
- begin
- @(posedge CLK);
-#1 x_in = {2'b1,{WIDTH-2{1'b0}}};
- @(posedge CLK);
-#1 x_in = 0;
- en_x = 0;
- end
-*/
repeat (64) @(posedge CLK);
$display("");
@@ -173,8 +178,44 @@ module dct_tests_01 ();
data_out[i+4],data_out[i+5],data_out[i+6],data_out[i+7]);
end
+// repeat (64) @(posedge CLK);
+// $finish;
+ end
+
+ initial begin
+ wait (!RST);
+ while (!start) begin
+ @(posedge CLK);
+ #1;
+ end
+ for (i1 = 0; i1 < 64; i1 = i1+1) begin
+ @(posedge CLK);
+ #1;
+ x_in_2d = data_in[i1];
+ if (i1 == 63) start2 = 1;
+ end
+ for (i1 = 0; i1 < 64; i1 = i1+1) begin
+ @(posedge CLK);
+ #1;
+ start2 = 0;
+ x_in_2d = data_in[i1];
+ end
+
+ repeat (DCT_GAP) @(posedge CLK);
+ #1;
+ start2 = 1;
+ for (i1 = 0; i1 < 64; i1 = i1+1) begin
+ @(posedge CLK);
+ #1;
+ start2 = 0;
+ x_in_2d = data_in[63-i1];
+ end
+
+ repeat (300) @(posedge CLK);
$finish;
+
end
+
initial j = 0;
always @ (posedge CLK) begin
@@ -285,7 +326,53 @@ module dct_tests_01 ();
.start (start), // input
.dout (y_dct), // output[15:0]
.pre2_start_out (pre2_start_out), // output reg
- .en_out (en_out) // output reg
+ .en_out (en_out), // output reg
+ .y_index () // output[2:0] reg
);
+
+
+ dct_iv_8x8 #(
+ .INPUT_WIDTH (WIDTH),
+ .OUT_WIDTH (OUT_WIDTH),
+ .OUT_RSHIFT1 (OUT_RSHIFT),
+ .OUT_RSHIFT2 (OUT_RSHIFT2),
+ .TRANSPOSE_WIDTH (TRANSPOSE_WIDTH),
+ .DSP_B_WIDTH (18),
+ .DSP_A_WIDTH (25),
+ .DSP_P_WIDTH (48)
+ ) dct_iv_8x8_i (
+ .clk (CLK), // input
+ .rst (RST), // input
+ .start (start || start2), // input
+ .xin (x_in_2d), // input[24:0] signed
+ .pre_last_in (pre_last_in_2d), // output reg
+ .pre_first_out (pre_first_out_2d), // output
+ .dv (dv_2d), // output
+ .d_out (d_out_2d), // output[24:0] signed
+ .pre_busy (pre_busy_2d) // output reg
+ );
+
+ dct_iv_8x8 #(
+ .INPUT_WIDTH (WIDTH),
+ .OUT_WIDTH (OUT_WIDTH),
+ .OUT_RSHIFT1 (OUT_RSHIFT),
+ .OUT_RSHIFT2 (OUT_RSHIFT2),
+ .TRANSPOSE_WIDTH (TRANSPOSE_WIDTH),
+ .DSP_B_WIDTH (18),
+ .DSP_A_WIDTH (25),
+ .DSP_P_WIDTH (48)
+ ) dct_iv_8x8r_i (
+ .clk (CLK), // input
+ .rst (RST), // input
+ .start (pre_first_out_2d), // input
+ .xin (d_out_2d), // input[24:0] signed
+ .pre_last_in (pre_last_in_2dr), // output reg
+ .pre_first_out (pre_first_out_2dr), // output
+ .dv (dv_2dr), // output
+ .d_out (d_out_2dr), // output[24:0] signed
+ .pre_busy (pre_busy_2dr) // output reg
+ );
+
+
endmodule