diff --git a/dct_tests_01.sav b/dct_tests_01.sav index df3e21c9c3c86c32bac58882eb8bc4ca5c36f6ee..d4720f4e320f3ec330572c668f44cf7fbe93afcc 100644 --- a/dct_tests_01.sav +++ b/dct_tests_01.sav @@ -1,25 +1,26 @@ [*] -[*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI -[*] Tue Dec 6 17:55:24 2016 +[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI +[*] Tue Dec 13 06:32:08 2016 [*] -[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161206105514691.fst" -[dumpfile_mtime] "Tue Dec 6 17:55:14 2016" -[dumpfile_size] 10348 +[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161212230744155.fst" +[dumpfile_mtime] "Tue Dec 13 06:07:44 2016" +[dumpfile_size] 100634 [savefile] "/home/eyesis/git/x393-neon/dct_tests_01.sav" [timestart] 0 [size] 1814 1171 -[pos] 1937 0 -*-18.387537 1752000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[pos] 1920 0 +*-19.687614 1195000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 [treeopen] dct_tests_01. [treeopen] dct_tests_01.dct_iv8_1d_i. [treeopen] dct_tests_01.dct_iv8_1d_i.dsp_ma_preadd_c_1_i. +[treeopen] dct_tests_01.dct_iv_8x8_i. [sst_width] 204 -[signals_width] 305 +[signals_width] 325 [sst_expanded] 1 [sst_vpaned_height] 344 -@800200 +@c00200 -top -@25 +@24 dct_tests_01.i dct_tests_01.j @28 @@ -87,10 +88,50 @@ dct_tests_01.y_we dct_tests_01.phase_y[3:0] dct_tests_01.y_dct[23:0] dct_tests_01.y_out[23:0] -@1000200 +dct_tests_01.dct_iv8_1d_i.y_index[2:0] +@1401200 -top @800200 +-2d-1d +@28 +dct_tests_01.start +@22 +dct_tests_01.x_out[23:0] +@8420 +dct_tests_01.x_out[23:0] +dct_tests_01.dct_iv8_1d_i.d_in[23:0] +dct_tests_01.dct_iv8_1d_i.dout[23:0] +@28 +dct_tests_01.dct_iv8_1d_i.en_out +@22 +dct_tests_01.dct_iv8_1d_i.y_index[2:0] +@8420 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.d_in[23:0] +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.dout[24:0] +@28 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.en_out +@22 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.y_index[2:0] +@8420 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.d_in[23:0] +@8421 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.dout[24:0] +@28 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.en_out +@22 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.y_index[2:0] +@200 +- +@1000200 +-2d-1d +@c00200 -dct_iv8_1d +@22 +[color] 2 +dct_tests_01.dct_iv8_1d_i.phase_cnt[3:0] +@28 +dct_tests_01.dct_iv8_1d_i.en_out +dct_tests_01.dct_iv8_1d_i.run_out @c08022 dct_tests_01.phase_out[3:0] @28 @@ -105,8 +146,6 @@ dct_tests_01.dct_iv8_1d_i.start dct_tests_01.dct_iv8_1d_i.restart dct_tests_01.dct_iv8_1d_i.clk @8022 -[color] 2 -dct_tests_01.dct_iv8_1d_i.phase_cnt[3:0] dct_tests_01.dct_iv8_1d_i.d_in[23:0] dct_tests_01.dct_iv8_1d_i.dsp_ain_1[24:0] @28 @@ -124,8 +163,8 @@ dct_tests_01.dct_iv8_1d_i.dsp_ced_1 @22 dct_tests_01.dct_iv8_1d_i.dsp_cin_1[47:0] @28 -dct_tests_01.dct_iv8_1d_i.dsp_cec_1 dct_tests_01.dct_iv8_1d_i.dsp_neg_m_1 +dct_tests_01.dct_iv8_1d_i.dsp_cec_1 dct_tests_01.dct_iv8_1d_i.dsp_post_add_1 dct_tests_01.dct_iv8_1d_i.dsp_accum_1 @22 @@ -238,7 +277,305 @@ dct_tests_01.dct_iv8_1d_i.pre2_start_out dct_tests_01.dct_iv8_1d_i.rst dct_tests_01.dct_iv8_1d_i.run_in dct_tests_01.dct_iv8_1d_i.run_out -@1000200 +@1401200 -dct_iv8_1d +@800200 +-st22d_test +@28 +dct_tests_01.CLK +dct_tests_01.RST +[color] 2 +dct_tests_01.start +[color] 2 +dct_tests_01.start2 +@c00022 +dct_tests_01.x_in_2d[23:0] +@28 +(0)dct_tests_01.x_in_2d[23:0] +(1)dct_tests_01.x_in_2d[23:0] +(2)dct_tests_01.x_in_2d[23:0] +(3)dct_tests_01.x_in_2d[23:0] +(4)dct_tests_01.x_in_2d[23:0] +(5)dct_tests_01.x_in_2d[23:0] +(6)dct_tests_01.x_in_2d[23:0] +(7)dct_tests_01.x_in_2d[23:0] +(8)dct_tests_01.x_in_2d[23:0] +(9)dct_tests_01.x_in_2d[23:0] +(10)dct_tests_01.x_in_2d[23:0] +(11)dct_tests_01.x_in_2d[23:0] +(12)dct_tests_01.x_in_2d[23:0] +(13)dct_tests_01.x_in_2d[23:0] +(14)dct_tests_01.x_in_2d[23:0] +(15)dct_tests_01.x_in_2d[23:0] +(16)dct_tests_01.x_in_2d[23:0] +(17)dct_tests_01.x_in_2d[23:0] +(18)dct_tests_01.x_in_2d[23:0] +(19)dct_tests_01.x_in_2d[23:0] +(20)dct_tests_01.x_in_2d[23:0] +(21)dct_tests_01.x_in_2d[23:0] +(22)dct_tests_01.x_in_2d[23:0] +(23)dct_tests_01.x_in_2d[23:0] +@1401200 +-group_end +@c08420 +dct_tests_01.x_in_2d[23:0] +@28 +(0)dct_tests_01.x_in_2d[23:0] +(1)dct_tests_01.x_in_2d[23:0] +(2)dct_tests_01.x_in_2d[23:0] +(3)dct_tests_01.x_in_2d[23:0] +(4)dct_tests_01.x_in_2d[23:0] +(5)dct_tests_01.x_in_2d[23:0] +(6)dct_tests_01.x_in_2d[23:0] +(7)dct_tests_01.x_in_2d[23:0] +(8)dct_tests_01.x_in_2d[23:0] +(9)dct_tests_01.x_in_2d[23:0] +(10)dct_tests_01.x_in_2d[23:0] +(11)dct_tests_01.x_in_2d[23:0] +(12)dct_tests_01.x_in_2d[23:0] +(13)dct_tests_01.x_in_2d[23:0] +(14)dct_tests_01.x_in_2d[23:0] +(15)dct_tests_01.x_in_2d[23:0] +(16)dct_tests_01.x_in_2d[23:0] +(17)dct_tests_01.x_in_2d[23:0] +(18)dct_tests_01.x_in_2d[23:0] +(19)dct_tests_01.x_in_2d[23:0] +(20)dct_tests_01.x_in_2d[23:0] +(21)dct_tests_01.x_in_2d[23:0] +(22)dct_tests_01.x_in_2d[23:0] +(23)dct_tests_01.x_in_2d[23:0] +@1401200 +-group_end +@28 +dct_tests_01.pre_busy_2d +dct_tests_01.pre_last_in_2d +dct_tests_01.pre_first_out_2d +dct_tests_01.dv_2d +@22 +dct_tests_01.d_out_2d[23:0] +@28 +dct_tests_01.dv_2dr +@22 +dct_tests_01.d_out_2dr[23:0] +@8420 +dct_tests_01.d_out_2dr[23:0] +@200 +- +@800200 +-dct_iv_8x8 +@28 +dct_tests_01.dct_iv_8x8_i.clk +dct_tests_01.dct_iv_8x8_i.start +dct_tests_01.dct_iv_8x8_i.pre_last_in +dct_tests_01.dct_iv_8x8_i.pre_busy +dct_tests_01.dct_iv_8x8_i.x_run +@c00022 +dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +(1)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +(2)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +(3)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +(4)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +(5)dct_tests_01.dct_iv_8x8_i.x_wa[5:0] +@1401200 +-group_end +@28 +dct_tests_01.dct_iv_8x8_i.dcth_phin_start +@22 +dct_tests_01.dct_iv_8x8_i.dcth_phin_run +@c00022 +dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(1)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(2)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(3)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(4)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(5)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +(6)dct_tests_01.dct_iv_8x8_i.dcth_phin[6:0] +@1401200 +-group_end +@22 +dct_tests_01.dct_iv_8x8_i.x_ra0[2:0] +dct_tests_01.dct_iv_8x8_i.x_ra1[2:0] +@28 +dct_tests_01.dct_iv_8x8_i.dcth_en0 +dct_tests_01.dct_iv_8x8_i.dcth_start_0_r +@22 +dct_tests_01.dct_iv_8x8_i.dcth_xin0[23:0] +@28 +dct_tests_01.dct_iv_8x8_i.dcth_en_out0 +@22 +dct_tests_01.dct_iv_8x8_i.dcth_dout0[24:0] +dct_tests_01.dct_iv_8x8_i.dcth_yindex0[2:0] +@200 +- +@28 +dct_tests_01.dct_iv_8x8_i.dcth_en1 +dct_tests_01.dct_iv_8x8_i.dcth_start_1_r +@22 +dct_tests_01.dct_iv_8x8_i.dcth_xin1[23:0] +@28 +dct_tests_01.dct_iv_8x8_i.dcth_en_out1 +@22 +dct_tests_01.dct_iv_8x8_i.dcth_dout1[24:0] +dct_tests_01.dct_iv_8x8_i.dcth_yindex1[2:0] +@200 +- +@22 +dct_tests_01.dct_iv_8x8_i.transpose_start +@28 +dct_tests_01.dct_iv_8x8_i.transpose_in_run +@22 +dct_tests_01.dct_iv_8x8_i.transpose_w_page[1:0] +[color] 3 +dct_tests_01.dct_iv_8x8_i.transpose_cntr[6:0] +@28 +dct_tests_01.dct_iv_8x8_i.transpose_wa_decr +@22 +dct_tests_01.dct_iv_8x8_i.transpose_wa_low[2:0] +dct_tests_01.dct_iv_8x8_i.transpose_wa_high[4:0] +dct_tests_01.dct_iv_8x8_i.transpose_wa[7:0] +@28 +dct_tests_01.dct_iv_8x8_i.transpose_we +@22 +[color] 2 +dct_tests_01.dct_iv_8x8_i.transpose_debug_di[7:0] +@28 +dct_tests_01.dct_iv_8x8_i.transpose_out_start +@800022 +dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0] +(1)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0] +(2)dct_tests_01.dct_iv_8x8_i.transpose_out_run[2:0] +@1001200 +-group_end +@c00028 +dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0] +(1)dct_tests_01.dct_iv_8x8_i.transpose_r_page[1:0] +@1401200 +-group_end +@c00022 +dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(1)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(2)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(3)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(4)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(5)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +(6)dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] +@1401200 +-group_end +@22 +dct_tests_01.dct_iv_8x8_i.transpose_ra[7:0] +dct_tests_01.dct_iv_8x8_i.transpose_reg[24:0] +@8420 +dct_tests_01.dct_iv_8x8_i.transpose_out[24:0] +@22 +dct_tests_01.dct_iv_8x8_i.transpose_out[24:0] +dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +@8022 +dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +@22 +dct_tests_01.dct_iv_8x8_i.t_wa[3:0] +@28 +dct_tests_01.dct_iv_8x8_i.t_we0 +dct_tests_01.dct_iv_8x8_i.t_we1 +dct_tests_01.dct_iv_8x8_i.dctv_start_0_r +dct_tests_01.dct_iv_8x8_i.dctv_start_1_r +dct_tests_01.dct_iv_8x8_i.dctv_en0 +dct_tests_01.dct_iv_8x8_i.dctv_en1 +dct_tests_01.dct_iv_8x8_i.dctv_phin_start +dct_tests_01.dct_iv_8x8_i.dctv_phin_run +@c00022 +[color] 2 +dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(1)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(2)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(3)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(4)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(5)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +(6)dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] +@1401200 +-group_end +@8022 +dct_tests_01.dct_iv_8x8_i.t_ra0[2:0] +dct_tests_01.dct_iv_8x8_i.t_ra1[2:0] +@22 +dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0] +@8420 +dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0] +@22 +dct_tests_01.dct_iv_8x8_i.dctv_xin1[24:0] +dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +dct_tests_01.dct_iv_8x8_i.dctv_debug_xin0[7:0] +@8022 +dct_tests_01.dct_iv_8x8_i.dctv_debug_xin0[7:0] +@22 +dct_tests_01.dct_iv_8x8_i.dctv_debug_xin1[7:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_start_0_r +dct_tests_01.dct_iv_8x8_i.dctv_start_1_r +dct_tests_01.dct_iv_8x8_i.dctv_en0 +dct_tests_01.dct_iv_8x8_i.dctv_en1 +dct_tests_01.dct_iv_8x8_i.dctv_en_out0 +dct_tests_01.dct_iv_8x8_i.dctv_en_out1 +@c00022 +dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] +(1)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] +(2)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] +@1401200 +-group_end +@22 +dct_tests_01.dct_iv_8x8_i.dctv_yindex1[2:0] +dct_tests_01.dct_iv_8x8_i.dctv_dout0[24:0] +dct_tests_01.dct_iv_8x8_i.dctv_dout1[24:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_out_start +dct_tests_01.dct_iv_8x8_i.dctv_out_run +@22 +dct_tests_01.dct_iv_8x8_i.dctv_out_cntr[6:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_out_we_1 +dct_tests_01.dct_iv_8x8_i.dctv_out_sel +@22 +dct_tests_01.dct_iv_8x8_i.dctv_out_wa_1[3:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_out_start_1 +dct_tests_01.dct_iv_8x8_i.dctv_out_run_1 +@22 +dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1[6:0] +dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1_w[3:0] +dct_tests_01.dct_iv_8x8_i.dctv_out_reg_1[23:0] +dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_1[2:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_out_we_2 +@22 +dct_tests_01.dct_iv_8x8_i.dctv_out_wa_2[1:0] +@28 +dct_tests_01.dct_iv_8x8_i.dctv_out_run_2 +@22 +dct_tests_01.dct_iv_8x8_i.dctv_out_ra_2[6:0] +dct_tests_01.dct_iv_8x8_i.dctv_out_reg_2[23:0] +dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_2[2:0] +@1000200 +-dct_iv_8x8 +@800200 +-dct_iv_8x8r +@200 +- +@1000200 +-dct_iv_8x8r +-st22d_test [pattern_trace] 1 [pattern_trace] 0 diff --git a/dsp/dct_iv.ods b/dsp/dct_iv.ods index 8248d0a44f3221fa05b88e9873f942ee8610c63c..6d256b7f472248552ae39627f0aa3965b407e248 100644 Binary files a/dsp/dct_iv.ods and b/dsp/dct_iv.ods differ diff --git a/dsp/dct_iv8_1d.v b/dsp/dct_iv8_1d.v index ea93dfcb2adc9441d7ac2a155533178ff83b7774..fdd0be20a79a838c9841e995d3979253d64f101e 100644 --- a/dsp/dct_iv8_1d.v +++ b/dsp/dct_iv8_1d.v @@ -71,7 +71,9 @@ module dct_iv8_1d#( output [OUT_WIDTH -1:0] dout, output reg pre2_start_out, // 2 clock cycle before Y0 output, full dout sequence // start_out-x-Y0-x-Y7-x-Y4-x-Y3-x-Y1-x-Y6-x-Y2-x-Y5 - output reg en_out // valid at the same time slot as pre2_start_out (goes active with pre2_start_out) + output en_out, // valid at the same time slot as pre2_start_out (goes active with pre2_start_out), 1 ahead of data + output reg [2:0] y_index // for simulation - valid with dout - index of the data output + ); // X6-X7-X5-X2-X1-X3-X0-X4-*-X5-X1-X2-*-X4-X7-* // X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-* @@ -132,6 +134,9 @@ module dct_iv8_1d#( reg run_in; // receiving input data reg restart; // restarting next block if en was active at phase=14; reg run_out; // running output data + reg en_out_r; + + assign en_out = en_out_r; assign dsp_ain_2 = dsp_p_1 [STAGE1_RSHIFT +: A_WIDTH]; @@ -147,6 +152,23 @@ module dct_iv8_1d#( wire din_zero = ~(|d_in); assign dsp_cin_1 = {{P_WIDTH-WIDTH-COSINE_SHIFT{d_in[WIDTH-1]}},d_in,~d_in[WIDTH-1]^din_zero,{COSINE_SHIFT-1{d_in[WIDTH-1]}}}; + always @ (posedge clk) begin + if (en_out_r) begin + case (phase_cnt[3:1]) + 3'h0: y_index <= 7; + 3'h1: y_index <= 4; + 3'h2: y_index <= 3; + 3'h3: y_index <= 1; + 3'h4: y_index <= 6; + 3'h5: y_index <= 2; + 3'h6: y_index <= 5; + 3'h7: y_index <= 0; + endcase + end else begin + y_index <= 'bx; + end + end + //register files assign dsp_din_1 = dsp_din_1_ram[dsp_din_1_ra]; @@ -173,7 +195,7 @@ module dct_iv8_1d#( pre2_start_out <= run_out && (phase_cnt == 14); - en_out <= run_out && !phase_cnt[0]; + en_out_r <= run_out && !phase_cnt[0]; // Cosine table, defined to fit into 17 bits for 18-bit signed DSP B-operand case (phase_cnt) diff --git a/dsp/dct_iv_8x8.v b/dsp/dct_iv_8x8.v new file mode 100644 index 0000000000000000000000000000000000000000..73ac5e832e517e94574ca32f965d36fa84b65b2f --- /dev/null +++ b/dsp/dct_iv_8x8.v @@ -0,0 +1,609 @@ +/*! + * Module:dct_iv_8x8 + * @file dct_iv_8x8.v + * @date 2016-12-08 + * @author Andrey Filippov + * + * @brief 2-d DCT-IV implementation, 1 clock/data word. Input in scanline order, output - transposed + * + * @copyright Copyright (c) 2016 Elphel, Inc. + * + * License: + * + *dct_iv_8x8.v is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dct_iv_8x8.v is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Additional permission under GNU GPL version 3 section 7: + * If you modify this Program, or any covered work, by linking or combining it + * with independent modules provided by the FPGA vendor only (this permission + * does not extend to any 3-rd party modules, "soft cores" or macros) under + * different license terms solely for the purpose of generating binary "bitstream" + * files and/or simulating the code, the copyright holders of this Program give + * you the right to distribute the covered work without those independent modules + * as long as the source code for them is available from the FPGA vendor free of + * charge, and there is no dependence on any encrypted modules for simulating of + * the combined code. This permission applies to you if the distributed code + * contains all the components and scripts required to completely simulate it + * with at least one of the Free Software programs. + */ +`timescale 1ns/1ps + +module dct_iv_8x8#( + parameter INPUT_WIDTH = 25, + parameter OUT_WIDTH = 25, + parameter OUT_RSHIFT1 = 3, // overall right shift of the result from input, aligned by MSB for pass1 (>=3 will never cause saturation) + parameter OUT_RSHIFT2 = 0, // overall right shift of the result from input, aligned by MSB for pass2 (>=3 will never cause saturation) + parameter TRANSPOSE_WIDTH = 25, // transpose memory width + parameter DSP_B_WIDTH = 18, + parameter DSP_A_WIDTH = 25, + parameter DSP_P_WIDTH = 48, + parameter COSINE_SHIFT= 17, + parameter COS_01_32 = 130441, // int(round((1<<17) * cos( 1*pi/32))) + parameter COS_03_32 = 125428, // int(round((1<<17) * cos( 3*pi/32))) + parameter COS_04_32 = 121095, // int(round((1<<17) * cos( 4*pi/32))) + parameter COS_05_32 = 115595, // int(round((1<<17) * cos( 5*pi/32))) + parameter COS_07_32 = 101320, // int(round((1<<17) * cos( 7*pi/32))) + parameter COS_08_32 = 92682, // int(round((1<<17) * cos( 8*pi/32))) + parameter COS_09_32 = 83151, // int(round((1<<17) * cos( 9*pi/32))) + parameter COS_11_32 = 61787, // int(round((1<<17) * cos(11*pi/32))) + parameter COS_12_32 = 50159, // int(round((1<<17) * cos(12*pi/32))) + parameter COS_13_32 = 38048, // int(round((1<<17) * cos(13*pi/32))) + parameter COS_15_32 = 12847 // int(round((1<<17) * cos(15*pi/32))) + ) ( + input clk, //!< system clock, posedge + input rst, //!< sync reset + input start, //!< single-cycle start pulse that goes with the first pixel data. + // Next data should be sent in bursts of 8, pause of 8 - total 128 cycles + input signed [INPUT_WIDTH-1:0] xin, //!< input data + output pre_last_in, //!< output high during input of the pre-last of 64 pixels in a 8x8 block (next can be start + output reg pre_first_out, //!< 1 cycle ahead of the first output in a 64 block + output reg dv, //!< data output valid. WAS: Will go high on the 94-th cycle after the start + output signed [OUT_WIDTH-1:0] d_out, //!< output data + output reg pre_busy); //!< start should come each 64-th cycle (next after pre_last_in), and not after pre_busy) + +// 1. Two 16xINPUT_WIDTH memories to feed two of the 'horizontal' 1-dct - they should provide outputs shifted by 1 clock +// 2. of the horizontal DCTs +// 3. common transpose memory plus 2 input reorder memory for each of the vertical DCT +// 4. 2 of the vertical DCTs +// 5. small memory to combine/reorder outputs (2 stages as 1 x16 memory is not enough) + reg x_run; + reg [5:0] x_wa; + wire dcth_phin_start = x_run && (x_wa[5:0] == 6); + reg dcth_phin_run; + reg dcth_en0; + reg dcth_en1; + reg [6:0] dcth_phin; + reg [2:0] x_ra0; + reg [2:0] x_ra1; + reg signed [INPUT_WIDTH-1:0] x_ram0[0:7]; + reg signed [INPUT_WIDTH-1:0] x_ram1[0:7]; + reg signed [INPUT_WIDTH-1:0] dcth_xin0; + reg signed [INPUT_WIDTH-1:0] dcth_xin1; + + wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout0; + wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout1; +// wire dcth_pre2_start_out0; +// wire dcth_pre2_start_out1; + wire dcth_en_out0; + wire dcth_en_out1; + + wire dcth_start_0_w = dcth_phin_run && (dcth_phin [6:0] ==0); + wire dcth_start_1_w = dcth_phin_run && (dcth_phin [6:0] ==9); + + reg dcth_start_0_r; + reg dcth_start_1_r; + + reg [1:0] transpose_w_page; + reg [6:0] transpose_cntr; // transpose memory counter, [6] == 1 when the last page is being finished + reg transpose_in_run; + wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h10); + reg [2:0] transpose_wa_low; // [2:0] transpose memory low address bits, [3] - other group (of 16) + reg [4:0] transpose_wa_high; // high bits of transpose memory write address + wire [7:0] transpose_wa = {transpose_wa_high,transpose_wa_low}; + wire transpose_wa_decr = (transpose_cntr[0] & ~transpose_cntr[3]); + reg transpose_we; + wire [TRANSPOSE_WIDTH-1:0] transpose_di = transpose_cntr[0]? dcth_dout0: dcth_dout1; + + reg [TRANSPOSE_WIDTH-1:0] transpose_ram[0:255]; + wire [2:0] dcth_yindex0; + wire [2:0] dcth_yindex1; + wire [7:0] transpose_debug_di= {transpose_wa_high, transpose_cntr[0]? dcth_yindex0: dcth_yindex1}; + reg [7:0] transpose_debug_ram[0:255]; + + reg [6:0] transpose_rcntr; // transpose read memory counter, [6] == 1 when the last page is being finished + reg [2:0] transpose_out_run; + wire transpose_out_start = transpose_in_run && (transpose_cntr[6:0] == 7'h34); // 7'h33 is actual minimum + reg [1:0] transpose_r_page; + + reg [TRANSPOSE_WIDTH-1:0] transpose_reg; // internal BRAM register + reg [TRANSPOSE_WIDTH-1:0] transpose_out; // output BRAM register + + reg [7:0] transpose_debug_reg; // internal BRAM register + reg [7:0] transpose_debug_out; // output BRAM register + wire [7:0] transpose_ra = {transpose_r_page, transpose_rcntr[2:0], transpose_rcntr[5:3]}; + reg [3:0] t_wa; + wire t_we0 = transpose_out_run[2] && !t_wa[3]; + wire t_we1 = transpose_out_run[2] && t_wa[3]; + reg signed [TRANSPOSE_WIDTH-1:0] t_ram0[0:7]; + reg signed [TRANSPOSE_WIDTH-1:0] t_ram1[0:7]; + reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin0; + reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin1; + + reg signed [7:0] t_debug_ram0[0:7]; + reg signed [7:0] t_debug_ram1[0:7]; + reg signed [7:0] dctv_debug_xin0; // SuppressThisWarning VEditor - simulation only + reg signed [7:0] dctv_debug_xin1; // SuppressThisWarning VEditor - simulation only + + wire signed [OUT_WIDTH-1:0] dctv_dout0; + wire signed [OUT_WIDTH-1:0] dctv_dout1; + wire dctv_en_out0; + wire dctv_en_out1; + wire [2:0] dctv_yindex0; + wire [2:0] dctv_yindex1; + + wire dctv_phin_start = transpose_out_run && (transpose_rcntr[5:0] == 8); + reg dctv_phin_run; + + reg dctv_en0; + reg dctv_en1; + reg [6:0] dctv_phin; + reg [2:0] t_ra0; + reg [2:0] t_ra1; + wire dctv_start_0_w = dctv_phin_run && (dctv_phin [6:0] ==0); + wire dctv_start_1_w = dctv_phin_run && (dctv_phin [6:0] ==9); + reg dctv_start_0_r; + reg dctv_start_1_r; + + reg pre_last_in_r; + + reg [6:0] dctv_out_cntr; // count output data from second (vertical) pass (bit 6 - stopping) + reg dctv_out_run; // + wire dctv_out_start = dctv_phin [6:0] == 'h10; + + reg [3:0] dctv_out_wa_1; + reg dctv_out_we_1; + reg dctv_out_sel; // select DCTv channel output; + reg signed [OUT_WIDTH-1:0] dctv_out_ram_1[0:15]; + reg [2:0] dctv_out_debug_ram_1[0:15]; + + reg [6:0] dctv_out_ra_1; + wire [3:0] dctv_out_ra_1_w = {dctv_out_ra_1[3:1], ~dctv_out_ra_1[0]}; + wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0c; // 'h0b; + reg dctv_out_run_1; + reg signed [OUT_WIDTH-1:0] dctv_out_reg_1; + reg [2:0] dctv_out_debug_reg_1; + + reg signed [OUT_WIDTH-1:0] dctv_out_ram_2[0:3]; + reg [2:0] dctv_out_debug_ram_2[0:3]; + reg dctv_out_we_2; + reg [1:0] dctv_out_wa_2; + reg [6:0] dctv_out_ra_2; + wire dctv_out_start_2 = dctv_out_ra_1[6:0] == 2; + reg dctv_out_run_2; + reg signed [OUT_WIDTH-1:0] dctv_out_reg_2; + reg [2:0] dctv_out_debug_reg_2; // SuppressThisWarning VEditor - simulation only + + assign d_out = dctv_out_reg_2; + + assign pre_last_in = pre_last_in_r; + + always @ (posedge clk) begin + if (rst) x_run <= 0; + else if (start) x_run <= 1; + else if (&x_wa[5:0]) x_run <= 0; + + if (!x_run) x_wa <= 0; + else x_wa <= x_wa + 1; + + pre_last_in_r <= x_run && (x_wa[5:0] == 'h3d); + + if (rst) pre_busy <= 0; + else if (pre_last_in_r) pre_busy <= 1; + else if (dcth_phin [5:0] == 5) pre_busy <= 0; // check actual? + + if (rst) dcth_phin_run <= 0; + else if (dcth_phin_start) dcth_phin_run <= 1; + else if (dcth_phin [6:0] == 7'h48) dcth_phin_run <= 0; // check actual? + + if (!dcth_phin_run || dcth_phin_start) dcth_phin <= 0; + else dcth_phin <= dcth_phin + 1; + + if (rst) dcth_en0 <= 0; + else if (dcth_start_0_w) dcth_en0 <= 1; + else if (!x_run) dcth_en0 <= 0; // maybe get rid of this signal and send start for each 8? + + if (rst) dcth_en1 <= 0; + else if (dcth_start_1_w) dcth_en1 <= 1; + else if (dcth_phin [6]) dcth_en1 <= 0; // maybe get rid of this signal and send start for each 8? + + //write input reorder memory + if (x_run && !x_wa[3]) x_ram0[x_wa[2:0]] <= xin; + if (x_run && x_wa[3]) x_ram1[x_wa[2:0]] <= xin; + + //read input reorder memory + dcth_xin0 <= x_ram0[x_ra0[2:0]]; + dcth_xin1 <= x_ram1[x_ra1[2:0]]; + + dcth_start_0_r <= dcth_start_0_w; + dcth_start_1_r <= dcth_start_1_w; + + if (rst) transpose_in_run <= 0; + else if (transpose_start) transpose_in_run <= 1; + else if (transpose_cntr [6:0] == 7'h46) transpose_in_run <= 0; // check actual? + + if (!transpose_in_run || transpose_start) transpose_cntr <= 0; + else transpose_cntr <= transpose_cntr + 1; + + if (rst) transpose_w_page <= 0; + else if (transpose_in_run && (&transpose_cntr[5:0])) transpose_w_page <= transpose_w_page + 1; + + case (transpose_cntr[3:0]) + 4'h0: transpose_wa_low <= 0; + 4'h1: transpose_wa_low <= 1; + 4'h2: transpose_wa_low <= 7; + 4'h3: transpose_wa_low <= 6; + 4'h4: transpose_wa_low <= 4; + 4'h5: transpose_wa_low <= 2; + 4'h6: transpose_wa_low <= 3; + 4'h7: transpose_wa_low <= 5; + 4'h8: transpose_wa_low <= 1; + 4'h9: transpose_wa_low <= 0; + 4'ha: transpose_wa_low <= 6; + 4'hb: transpose_wa_low <= 7; + 4'hc: transpose_wa_low <= 2; + 4'hd: transpose_wa_low <= 4; + 4'he: transpose_wa_low <= 5; + 4'hf: transpose_wa_low <= 3; + endcase + transpose_wa_high <= {transpose_w_page, transpose_cntr[5:4], transpose_cntr[0]} - {transpose_wa_decr,1'b0}; + transpose_we <= dcth_en_out0 || dcth_en_out1; + // Write transpose memory) + if (transpose_we) transpose_ram[transpose_wa] <= transpose_di; + if (transpose_we) transpose_debug_ram[transpose_wa] <= transpose_debug_di; +// if (transpose_we) $display("%d %d @%t",transpose_cntr, transpose_wa, $time) ; + + if (rst) transpose_out_run[0] <= 0; + else if (transpose_out_start) transpose_out_run[0] <= 1; + else if (&transpose_rcntr[5:0]) transpose_out_run[0] <= 0; // check actual? + + transpose_out_run[2:1] <= transpose_out_run[1:0]; + + if (!transpose_out_run[0] || transpose_out_start) transpose_rcntr <= 0; + else transpose_rcntr <= transpose_rcntr + 1; + + if (transpose_out_start) transpose_r_page <= transpose_w_page; + + // Read transpose memory to 2 small reorder memories, use BRAM register + if (transpose_out_run[0]) transpose_reg <= transpose_ram[transpose_ra]; + if (transpose_out_run[1]) transpose_out <= transpose_reg; + if (transpose_out_run[0]) transpose_debug_reg <= transpose_debug_ram[transpose_ra]; + if (transpose_out_run[1]) transpose_debug_out <= transpose_debug_reg; + + if (!transpose_out_run[2]) t_wa <= 0; + else t_wa <= t_wa+1; + + if (rst) dctv_phin_run <= 0; + else if (dctv_phin_start) dctv_phin_run <= 1; + else if (dctv_phin [6:0] == 7'h48) dctv_phin_run <= 0; // check actual? + + + if (!dctv_phin_run || dctv_phin_start) dctv_phin <= 0; + else dctv_phin <= dctv_phin + 1; + + if (rst) dctv_en0 <= 0; + else if (dctv_start_0_w) dctv_en0 <= 1; + else if (!transpose_out_run[2]) dctv_en0 <= 0; // maybe get rid of this signal and send satrt for each 8? + + if (rst) dctv_en1 <= 0; + else if (dctv_start_1_w) dctv_en1 <= 1; + else if (dctv_phin[6]) dctv_en1 <= 0; // maybe get rid of this signal and send satrt for each 8? + + if (t_we0 || t_we1) $display("%d %d",transpose_rcntr-2, transpose_out) ; + + //write vertical dct input reorder memory + if (t_we0) t_ram0[t_wa[2:0]] <= transpose_out; + if (t_we1) t_ram1[t_wa[2:0]] <= transpose_out; + + if (t_we0) t_debug_ram0[t_wa[2:0]] <= transpose_debug_out; + if (t_we1) t_debug_ram1[t_wa[2:0]] <= transpose_debug_out; + + //read vertical dct input reorder memory + dctv_xin0 <= t_ram0[t_ra0[2:0]]; + dctv_xin1 <= t_ram1[t_ra1[2:0]]; + + dctv_start_0_r <= dctv_start_0_w; + dctv_start_1_r <= dctv_start_1_w; + + dctv_debug_xin0 <= t_debug_ram0[t_ra0[2:0]]; + dctv_debug_xin1 <= t_debug_ram1[t_ra1[2:0]]; + + // Reordering data from a pair of vertical DCTs - 2 steps, 1 is not enough + if (rst) dctv_out_run <= 0; + else if (dctv_out_start) dctv_out_run <= 1; + else if (dctv_out_cntr[6:0] == 'h47) dctv_out_run <= 0; + + if (!dctv_out_run || dctv_out_start) dctv_out_cntr <= 0; + else dctv_out_cntr <= dctv_out_cntr + 1; + + dctv_out_we_1 <= dctv_en_out0 || dctv_en_out1; + + dctv_out_sel <= dctv_out_cntr[0]; + + case (dctv_out_cntr[3:0]) + 4'h0: dctv_out_wa_1 <= 0; + 4'h1: dctv_out_wa_1 <= 9; + 4'h2: dctv_out_wa_1 <= 7; + 4'h3: dctv_out_wa_1 <= 14; + 4'h4: dctv_out_wa_1 <= 4; + 4'h5: dctv_out_wa_1 <= 10; + 4'h6: dctv_out_wa_1 <= 3; + 4'h7: dctv_out_wa_1 <= 13; + 4'h8: dctv_out_wa_1 <= 1; + 4'h9: dctv_out_wa_1 <= 8; + 4'ha: dctv_out_wa_1 <= 6; + 4'hb: dctv_out_wa_1 <= 15; + 4'hc: dctv_out_wa_1 <= 2; + 4'hd: dctv_out_wa_1 <= 12; + 4'he: dctv_out_wa_1 <= 5; + 4'hf: dctv_out_wa_1 <= 11; + endcase + + // write first stage of output reordering + if (dctv_out_we_1) dctv_out_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_dout1: dctv_dout0; + if (dctv_out_we_1) dctv_out_debug_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_yindex1: dctv_yindex0; + + if (rst) dctv_out_run_1 <= 0; + else if (dctv_out_start_1) dctv_out_run_1 <= 1; + else if (&dctv_out_ra_1[5:0]) dctv_out_run_1 <= 0; + + if (!dctv_out_run_1 || dctv_out_start_1) dctv_out_ra_1 <= 0; + else dctv_out_ra_1 <= dctv_out_ra_1 + 1; + // reading first stage of output reorder RAM + if (dctv_out_run_1) dctv_out_reg_1 <= dctv_out_ram_1[dctv_out_ra_1_w]; + if (dctv_out_run_1) dctv_out_debug_reg_1 <= dctv_out_debug_ram_1[dctv_out_ra_1_w]; + + // last stage of the output reordering - 4 register memory + + dctv_out_we_2 <= dctv_out_run_1; + dctv_out_wa_2 <= dctv_out_ra_1_w[1:0]; + + // write first stage of output reordering + if (dctv_out_we_2) dctv_out_ram_2[dctv_out_wa_2] <= dctv_out_reg_1; + if (dctv_out_we_2) dctv_out_debug_ram_2[dctv_out_wa_2] <= dctv_out_debug_reg_1; + + if (rst) dctv_out_run_2 <= 0; + else if (dctv_out_start_2) dctv_out_run_2 <= 1; + else if (&dctv_out_ra_2[5:0]) dctv_out_run_2 <= 0; + + if (!dctv_out_run_2 || dctv_out_start_2) dctv_out_ra_2 <= 0; + else dctv_out_ra_2 <= dctv_out_ra_2 + 1; + + // reading first stage of output reorder RAM + if (dctv_out_run_2) dctv_out_reg_2 <= dctv_out_ram_2[dctv_out_ra_2[1:0]]; + if (dctv_out_run_2) dctv_out_debug_reg_2 <= dctv_out_debug_ram_2[dctv_out_ra_2[1:0]]; + + pre_first_out <= dctv_out_ra_1[6:0] == 2; + + dv <= dctv_out_run_2; + end + + always @ (posedge clk) begin + //X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-* + case (dcth_phin[3:0]) + 4'h0: x_ra0 <= 2; + 4'h1: x_ra0 <= 7; + 4'h2: x_ra0 <= 3; + 4'h3: x_ra0 <= 4; + 4'h4: x_ra0 <= 5; + 4'h5: x_ra0 <= 6; + 4'h6: x_ra0 <= 0; + 4'h7: x_ra0 <= 1; + 4'h8: x_ra0 <= 'bx; + 4'h9: x_ra0 <= 3; + 4'ha: x_ra0 <= 5; + 4'hb: x_ra0 <= 4; + 4'hc: x_ra0 <= 'bx; + 4'hd: x_ra0 <= 6; + 4'he: x_ra0 <= 7; + 4'hf: x_ra0 <= 'bx; + endcase + case (dcth_phin[3:0]) + 4'h0: x_ra1 <= 1; + 4'h1: x_ra1 <= 'bx; + 4'h2: x_ra1 <= 3; + 4'h3: x_ra1 <= 5; + 4'h4: x_ra1 <= 4; + 4'h5: x_ra1 <= 'bx; + 4'h6: x_ra1 <= 6; + 4'h7: x_ra1 <= 7; + 4'h8: x_ra1 <= 'bx; + 4'h9: x_ra1 <= 2; + 4'ha: x_ra1 <= 7; + 4'hb: x_ra1 <= 3; + 4'hc: x_ra1 <= 4; + 4'hd: x_ra1 <= 5; + 4'he: x_ra1 <= 6; + 4'hf: x_ra1 <= 0; + endcase + end + + always @ (posedge clk) begin + //X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-* + case (dctv_phin[3:0]) + 4'h0: t_ra0 <= 2; + 4'h1: t_ra0 <= 7; + 4'h2: t_ra0 <= 3; + 4'h3: t_ra0 <= 4; + 4'h4: t_ra0 <= 5; + 4'h5: t_ra0 <= 6; + 4'h6: t_ra0 <= 0; + 4'h7: t_ra0 <= 1; + 4'h8: t_ra0 <= 'bx; + 4'h9: t_ra0 <= 3; + 4'ha: t_ra0 <= 5; + 4'hb: t_ra0 <= 4; + 4'hc: t_ra0 <= 'bx; + 4'hd: t_ra0 <= 6; + 4'he: t_ra0 <= 7; + 4'hf: t_ra0 <= 'bx; + endcase + case (dctv_phin[3:0]) + 4'h0: t_ra1 <= 1; + 4'h1: t_ra1 <= 'bx; + 4'h2: t_ra1 <= 3; + 4'h3: t_ra1 <= 5; + 4'h4: t_ra1 <= 4; + 4'h5: t_ra1 <= 'bx; + 4'h6: t_ra1 <= 6; + 4'h7: t_ra1 <= 7; + 4'h8: t_ra1 <= 'bx; + 4'h9: t_ra1 <= 2; + 4'ha: t_ra1 <= 7; + 4'hb: t_ra1 <= 3; + 4'hc: t_ra1 <= 4; + 4'hd: t_ra1 <= 5; + 4'he: t_ra1 <= 6; + 4'hf: t_ra1 <= 0; + endcase + end + + dct_iv8_1d #( + .WIDTH (INPUT_WIDTH), + .OUT_WIDTH (TRANSPOSE_WIDTH), + .OUT_RSHIFT (OUT_RSHIFT1), + .B_WIDTH (DSP_B_WIDTH), + .A_WIDTH (DSP_A_WIDTH), + .P_WIDTH (DSP_P_WIDTH), + .COSINE_SHIFT (COSINE_SHIFT), + .COS_01_32 (COS_01_32), + .COS_03_32 (COS_03_32), + .COS_04_32 (COS_04_32), + .COS_05_32 (COS_05_32), + .COS_07_32 (COS_07_32), + .COS_08_32 (COS_08_32), + .COS_09_32 (COS_09_32), + .COS_11_32 (COS_11_32), + .COS_12_32 (COS_12_32), + .COS_13_32 (COS_13_32), + .COS_15_32 (COS_15_32) + ) dct_iv8_1d_pass1_0_i ( + .clk (clk), // input + .rst (rst), // input + .en (dcth_en0), // input + .d_in (dcth_xin0), // input[23:0] + .start (dcth_start_0_r), // input + .dout (dcth_dout0), // output[23:0] + .pre2_start_out (), // output reg + .en_out (dcth_en_out0), // output reg + .y_index (dcth_yindex0) // output[2:0] reg + + ); + + dct_iv8_1d #( + .WIDTH (INPUT_WIDTH), + .OUT_WIDTH (TRANSPOSE_WIDTH), + .OUT_RSHIFT (OUT_RSHIFT1), + .B_WIDTH (DSP_B_WIDTH), + .A_WIDTH (DSP_A_WIDTH), + .P_WIDTH (DSP_P_WIDTH), + .COSINE_SHIFT (COSINE_SHIFT), + .COS_01_32 (COS_01_32), + .COS_03_32 (COS_03_32), + .COS_04_32 (COS_04_32), + .COS_05_32 (COS_05_32), + .COS_07_32 (COS_07_32), + .COS_08_32 (COS_08_32), + .COS_09_32 (COS_09_32), + .COS_11_32 (COS_11_32), + .COS_12_32 (COS_12_32), + .COS_13_32 (COS_13_32), + .COS_15_32 (COS_15_32) + ) dct_iv8_1d_pass1_1_i ( + .clk (clk), // input + .rst (rst), // input + .en (dcth_en1), // input + .d_in (dcth_xin1), // input[23:0] + .start (dcth_start_1_r), // input + .dout (dcth_dout1), // output[23:0] + .pre2_start_out (), // output reg + .en_out (dcth_en_out1), // output reg + .y_index (dcth_yindex1) // output[2:0] reg + + ); +//dcth_phin_run && (dcth_phin [6:0] ==9) + + dct_iv8_1d #( + .WIDTH (TRANSPOSE_WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .OUT_RSHIFT (OUT_RSHIFT2), + .B_WIDTH (DSP_B_WIDTH), + .A_WIDTH (DSP_A_WIDTH), + .P_WIDTH (DSP_P_WIDTH), + .COSINE_SHIFT (COSINE_SHIFT), + .COS_01_32 (COS_01_32), + .COS_03_32 (COS_03_32), + .COS_04_32 (COS_04_32), + .COS_05_32 (COS_05_32), + .COS_07_32 (COS_07_32), + .COS_08_32 (COS_08_32), + .COS_09_32 (COS_09_32), + .COS_11_32 (COS_11_32), + .COS_12_32 (COS_12_32), + .COS_13_32 (COS_13_32), + .COS_15_32 (COS_15_32) + ) dct_iv8_1d_pass2_0_i ( + .clk (clk), // input + .rst (rst), // input + .en (dctv_en0), // input + .d_in (dctv_xin0), // input[23:0] + .start (dctv_start_0_r), // input + .dout (dctv_dout0), // output[23:0] + .pre2_start_out (), // output reg + .en_out (dctv_en_out0), // output reg + .y_index (dctv_yindex0) // output[2:0] reg + + ); + + dct_iv8_1d #( + .WIDTH (TRANSPOSE_WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .OUT_RSHIFT (OUT_RSHIFT2), + .B_WIDTH (DSP_B_WIDTH), + .A_WIDTH (DSP_A_WIDTH), + .P_WIDTH (DSP_P_WIDTH), + .COSINE_SHIFT (COSINE_SHIFT), + .COS_01_32 (COS_01_32), + .COS_03_32 (COS_03_32), + .COS_04_32 (COS_04_32), + .COS_05_32 (COS_05_32), + .COS_07_32 (COS_07_32), + .COS_08_32 (COS_08_32), + .COS_09_32 (COS_09_32), + .COS_11_32 (COS_11_32), + .COS_12_32 (COS_12_32), + .COS_13_32 (COS_13_32), + .COS_15_32 (COS_15_32) + ) dct_iv8_1d_pass2_1_i ( + .clk (clk), // input + .rst (rst), // input + .en (dctv_en1), // input + .d_in (dctv_xin1), // input[23:0] + .start (dctv_start_1_r), // input + .dout (dctv_dout1), // output[23:0] + .pre2_start_out (), // output reg + .en_out (dctv_en_out1), // output reg + .y_index (dctv_yindex1) // output[2:0] reg + ); + +endmodule + diff --git a/dsp/dct_tests_01.tf b/dsp/dct_tests_01.tf index 0e6e0633c361672fd131752ec41ee59170c17e80..cde8694062d0177900ae7e565d2a4f3579f7bdbe 100644 --- a/dsp/dct_tests_01.tf +++ b/dsp/dct_tests_01.tf @@ -40,7 +40,7 @@ `timescale 1ns/1ps // No saturation here, and no rounding as we do not need to match decoder (be bit-precise), skipping rounding adder // will reduce needed resources -//`define DCT_INPUT_UNITY +`define DCT_INPUT_UNITY module dct_tests_01 (); // parameter fstname="dct_tests_01.fst"; `ifdef IVERILOG @@ -61,11 +61,16 @@ module dct_tests_01 (); `endif // CVC `endif // IVERILOG - parameter CLK_PERIOD = 10; // ns - parameter WIDTH = 24; // input data width -// parameter OUT_WIDTH = 16; // output data width - parameter OUT_WIDTH = 24; // output data width - parameter OUT_RSHIFT = 3; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation) + parameter CLK_PERIOD = 10; // ns + parameter WIDTH = 24; // input data width +// parameter OUT_WIDTH = 16; // output data width + parameter OUT_WIDTH = 24; // output data width + parameter TRANSPOSE_WIDTH = 25; // width of the transpose memory (intermediate results) + parameter OUT_RSHIFT = 3; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation) + parameter OUT_RSHIFT2 = 0; // overall right shift for the second (vertical) pass + + parameter DCT_GAP = 16; // between runs + reg RST = 1'b1; reg CLK = 1'b0; @@ -83,11 +88,13 @@ module dct_tests_01 (); wire x_we = !phase_in[3] && run_in; reg [WIDTH-1:0] x_in; + reg [WIDTH-1:0] x_in_2d; reg [WIDTH-1:0] x_out; reg [WIDTH-1:0] x_ram[0:7]; wire [WIDTH-1:0] x_out_w = x_ram[x_ra]; reg start = 0; + reg start2 = 0; // second start for 2d wire [OUT_WIDTH-1:0] y_dct; // S uppressThisWarning VEditor - simulation only wire pre2_start_out; // S uppressThisWarning VEditor - simulation only @@ -103,13 +110,28 @@ module dct_tests_01 (); wire signed [OUT_WIDTH-1:0] y_out = y_ram[y_ra]; // SuppressThisWarning VEditor - simulation only reg signed [WIDTH-1:0] data_in[0:63]; reg signed [OUT_WIDTH-1:0] data_out[0:63]; - integer i,j; + + reg signed [WIDTH-1:0] d_in; + wire pre_last_in_2d; + wire pre_first_out_2d; + wire pre_busy_2d; + wire dv_2d; + wire signed [OUT_WIDTH-1:0] d_out_2d; + + wire pre_last_in_2dr; + wire pre_first_out_2dr; + wire pre_busy_2dr; + wire dv_2dr; + wire signed [OUT_WIDTH-1:0] d_out_2dr; + + + integer i,j, i1, j1; initial begin for (i=0; i<64; i=i+1) begin `ifdef DCT_INPUT_UNITY - data_in[i] = (i[2:0] == i[5:3]) ? {2'b1,{WIDTH-2{1'b0}}} : 0; + data_in[i] = (i[2:0] == i[5:3]) ? {2'b1,{WIDTH-2{1'b0}}} : 0; `else - data_in[i] = $random; + data_in[i] = $random; `endif end $display("Input data in line-scan order:"); @@ -147,23 +169,6 @@ module dct_tests_01 (); if (&i[2:0]) repeat (8) @(posedge CLK); end #1 x_in = 0; -/* - // running 'one' - just make a period == 17 - repeat (7) begin - @(posedge CLK); -#1 x_in = {2'b1,{WIDTH-2{1'b0}}}; // >>x_wa; - @(posedge CLK); -#1 x_in = 0; - repeat (15) @(posedge CLK); // 16+1= 17, non-zero will go through all of the 8 x[i] - end - begin - @(posedge CLK); -#1 x_in = {2'b1,{WIDTH-2{1'b0}}}; - @(posedge CLK); -#1 x_in = 0; - en_x = 0; - end -*/ repeat (64) @(posedge CLK); $display(""); @@ -173,8 +178,44 @@ module dct_tests_01 (); data_out[i+4],data_out[i+5],data_out[i+6],data_out[i+7]); end +// repeat (64) @(posedge CLK); +// $finish; + end + + initial begin + wait (!RST); + while (!start) begin + @(posedge CLK); + #1; + end + for (i1 = 0; i1 < 64; i1 = i1+1) begin + @(posedge CLK); + #1; + x_in_2d = data_in[i1]; + if (i1 == 63) start2 = 1; + end + for (i1 = 0; i1 < 64; i1 = i1+1) begin + @(posedge CLK); + #1; + start2 = 0; + x_in_2d = data_in[i1]; + end + + repeat (DCT_GAP) @(posedge CLK); + #1; + start2 = 1; + for (i1 = 0; i1 < 64; i1 = i1+1) begin + @(posedge CLK); + #1; + start2 = 0; + x_in_2d = data_in[63-i1]; + end + + repeat (300) @(posedge CLK); $finish; + end + initial j = 0; always @ (posedge CLK) begin @@ -285,7 +326,53 @@ module dct_tests_01 (); .start (start), // input .dout (y_dct), // output[15:0] .pre2_start_out (pre2_start_out), // output reg - .en_out (en_out) // output reg + .en_out (en_out), // output reg + .y_index () // output[2:0] reg ); + + + dct_iv_8x8 #( + .INPUT_WIDTH (WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .OUT_RSHIFT1 (OUT_RSHIFT), + .OUT_RSHIFT2 (OUT_RSHIFT2), + .TRANSPOSE_WIDTH (TRANSPOSE_WIDTH), + .DSP_B_WIDTH (18), + .DSP_A_WIDTH (25), + .DSP_P_WIDTH (48) + ) dct_iv_8x8_i ( + .clk (CLK), // input + .rst (RST), // input + .start (start || start2), // input + .xin (x_in_2d), // input[24:0] signed + .pre_last_in (pre_last_in_2d), // output reg + .pre_first_out (pre_first_out_2d), // output + .dv (dv_2d), // output + .d_out (d_out_2d), // output[24:0] signed + .pre_busy (pre_busy_2d) // output reg + ); + + dct_iv_8x8 #( + .INPUT_WIDTH (WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .OUT_RSHIFT1 (OUT_RSHIFT), + .OUT_RSHIFT2 (OUT_RSHIFT2), + .TRANSPOSE_WIDTH (TRANSPOSE_WIDTH), + .DSP_B_WIDTH (18), + .DSP_A_WIDTH (25), + .DSP_P_WIDTH (48) + ) dct_iv_8x8r_i ( + .clk (CLK), // input + .rst (RST), // input + .start (pre_first_out_2d), // input + .xin (d_out_2d), // input[24:0] signed + .pre_last_in (pre_last_in_2dr), // output reg + .pre_first_out (pre_first_out_2dr), // output + .dv (dv_2dr), // output + .d_out (d_out_2dr), // output[24:0] signed + .pre_busy (pre_busy_2dr) // output reg + ); + + endmodule