From a6f8bc04fbb7df79bac049035821a9f15b770468 Mon Sep 17 00:00:00 2001 From: AndreyFilippov Date: Tue, 13 Dec 2016 12:24:05 -0700 Subject: [PATCH] twice dct-iv-2d returned original --- dct_tests_01.sav | 110 +++++++++++++++++++++++++++++++++----------- dsp/dct_iv8_1d.v | 23 +++++---- dsp/dct_iv_8x8.v | 29 ++++++------ dsp/dct_tests_01.tf | 2 +- 4 files changed, 114 insertions(+), 50 deletions(-) diff --git a/dct_tests_01.sav b/dct_tests_01.sav index d4720f4..fb63551 100644 --- a/dct_tests_01.sav +++ b/dct_tests_01.sav @@ -1,21 +1,23 @@ [*] [*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI -[*] Tue Dec 13 06:32:08 2016 +[*] Tue Dec 13 19:22:06 2016 [*] -[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161212230744155.fst" -[dumpfile_mtime] "Tue Dec 13 06:07:44 2016" -[dumpfile_size] 100634 +[dumpfile] "/home/eyesis/git/x393-neon/simulation/dct_tests_01-20161213121815935.fst" +[dumpfile_mtime] "Tue Dec 13 19:18:19 2016" +[dumpfile_size] 101333 [savefile] "/home/eyesis/git/x393-neon/dct_tests_01.sav" [timestart] 0 [size] 1814 1171 -[pos] 1920 0 -*-19.687614 1195000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[pos] 1912 0 +*-19.687614 1470200 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 [treeopen] dct_tests_01. [treeopen] dct_tests_01.dct_iv8_1d_i. [treeopen] dct_tests_01.dct_iv8_1d_i.dsp_ma_preadd_c_1_i. [treeopen] dct_tests_01.dct_iv_8x8_i. +[treeopen] dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i. +[treeopen] dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i. [sst_width] 204 -[signals_width] 325 +[signals_width] 264 [sst_expanded] 1 [sst_vpaned_height] 344 @c00200 @@ -99,24 +101,52 @@ dct_tests_01.start dct_tests_01.x_out[23:0] @8420 dct_tests_01.x_out[23:0] +@28 +dct_tests_01.dct_iv8_1d_i.start +@22 +dct_tests_01.dct_iv8_1d_i.phase_cnt[3:0] +@8420 dct_tests_01.dct_iv8_1d_i.d_in[23:0] +@28 +dct_tests_01.dct_iv8_1d_i.pre2_start_out +@8420 +[color] 7 dct_tests_01.dct_iv8_1d_i.dout[23:0] +dct_tests_01.y_dct[23:0] +@22 +dct_tests_01.y_wa[2:0] @28 +dct_tests_01.y_we dct_tests_01.dct_iv8_1d_i.en_out +dct_tests_01.dct_iv8_1d_i.en_out_r @22 dct_tests_01.dct_iv8_1d_i.y_index[2:0] +dct_tests_01.y_ra[2:0] +@28 +dct_tests_01.y_dv +@8420 +dct_tests_01.y_out[23:0] +@28 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.start +@22 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.phase_cnt[3:0] @8420 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.d_in[23:0] -dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.dout[24:0] +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.dout[23:0] @28 +[color] 7 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.en_out @22 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_0_i.y_index[2:0] +@28 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.start +@22 +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.phase_cnt[3:0] @8420 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.d_in[23:0] -@8421 -dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.dout[24:0] +dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.dout[23:0] @28 +[color] 7 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.en_out @22 dct_tests_01.dct_iv_8x8_i.dct_iv8_1d_pass1_1_i.y_index[2:0] @@ -407,8 +437,8 @@ dct_tests_01.dct_iv_8x8_i.dcth_xin0[23:0] @28 dct_tests_01.dct_iv_8x8_i.dcth_en_out0 @22 -dct_tests_01.dct_iv_8x8_i.dcth_dout0[24:0] dct_tests_01.dct_iv_8x8_i.dcth_yindex0[2:0] +dct_tests_01.dct_iv_8x8_i.dcth_dout0[23:0] @200 - @28 @@ -419,8 +449,8 @@ dct_tests_01.dct_iv_8x8_i.dcth_xin1[23:0] @28 dct_tests_01.dct_iv_8x8_i.dcth_en_out1 @22 -dct_tests_01.dct_iv_8x8_i.dcth_dout1[24:0] dct_tests_01.dct_iv_8x8_i.dcth_yindex1[2:0] +dct_tests_01.dct_iv_8x8_i.dcth_dout1[23:0] @200 - @22 @@ -438,7 +468,7 @@ dct_tests_01.dct_iv_8x8_i.transpose_wa_low[2:0] dct_tests_01.dct_iv_8x8_i.transpose_wa_high[4:0] dct_tests_01.dct_iv_8x8_i.transpose_wa[7:0] @28 -dct_tests_01.dct_iv_8x8_i.transpose_we +(0)dct_tests_01.dct_iv_8x8_i.transpose_we[1:0] @22 [color] 2 dct_tests_01.dct_iv_8x8_i.transpose_debug_di[7:0] @@ -472,16 +502,39 @@ dct_tests_01.dct_iv_8x8_i.transpose_rcntr[6:0] @1401200 -group_end @22 +[color] 5 dct_tests_01.dct_iv_8x8_i.transpose_ra[7:0] -dct_tests_01.dct_iv_8x8_i.transpose_reg[24:0] @8420 -dct_tests_01.dct_iv_8x8_i.transpose_out[24:0] -@22 -dct_tests_01.dct_iv_8x8_i.transpose_out[24:0] +dct_tests_01.dct_iv_8x8_i.transpose_reg[23:0] +dct_tests_01.dct_iv_8x8_i.transpose_out[23:0] +@c00022 dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(1)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(2)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(3)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(4)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(5)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(6)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +(7)dct_tests_01.dct_iv_8x8_i.transpose_debug_reg[7:0] +@1401200 +-group_end +@22 dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] -@8022 +@c08022 dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +@28 +(0)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(1)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(2)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(3)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(4)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(5)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(6)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +(7)dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] +@1401200 +-group_end @22 dct_tests_01.dct_iv_8x8_i.t_wa[3:0] @28 @@ -510,11 +563,6 @@ dct_tests_01.dct_iv_8x8_i.dctv_phin[6:0] dct_tests_01.dct_iv_8x8_i.t_ra0[2:0] dct_tests_01.dct_iv_8x8_i.t_ra1[2:0] @22 -dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0] -@8420 -dct_tests_01.dct_iv_8x8_i.dctv_xin0[24:0] -@22 -dct_tests_01.dct_iv_8x8_i.dctv_xin1[24:0] dct_tests_01.dct_iv_8x8_i.transpose_debug_out[7:0] dct_tests_01.dct_iv_8x8_i.dctv_debug_xin0[7:0] @8022 @@ -528,6 +576,8 @@ dct_tests_01.dct_iv_8x8_i.dctv_en0 dct_tests_01.dct_iv_8x8_i.dctv_en1 dct_tests_01.dct_iv_8x8_i.dctv_en_out0 dct_tests_01.dct_iv_8x8_i.dctv_en_out1 +@8420 +dct_tests_01.dct_iv_8x8_i.dctv_dout0[23:0] @c00022 dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] @28 @@ -536,17 +586,17 @@ dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] (2)dct_tests_01.dct_iv_8x8_i.dctv_yindex0[2:0] @1401200 -group_end +@8420 +dct_tests_01.dct_iv_8x8_i.dctv_dout1[23:0] @22 dct_tests_01.dct_iv_8x8_i.dctv_yindex1[2:0] -dct_tests_01.dct_iv_8x8_i.dctv_dout0[24:0] -dct_tests_01.dct_iv_8x8_i.dctv_dout1[24:0] @28 dct_tests_01.dct_iv_8x8_i.dctv_out_start dct_tests_01.dct_iv_8x8_i.dctv_out_run @22 dct_tests_01.dct_iv_8x8_i.dctv_out_cntr[6:0] @28 -dct_tests_01.dct_iv_8x8_i.dctv_out_we_1 +(0)dct_tests_01.dct_iv_8x8_i.dctv_out_we_1[1:0] dct_tests_01.dct_iv_8x8_i.dctv_out_sel @22 dct_tests_01.dct_iv_8x8_i.dctv_out_wa_1[3:0] @@ -556,7 +606,11 @@ dct_tests_01.dct_iv_8x8_i.dctv_out_run_1 @22 dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1[6:0] dct_tests_01.dct_iv_8x8_i.dctv_out_ra_1_w[3:0] +@420 +dct_tests_01.dct_iv_8x8_i.dctv_out_reg_1[23:0] +@8420 dct_tests_01.dct_iv_8x8_i.dctv_out_reg_1[23:0] +@22 dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_1[2:0] @28 dct_tests_01.dct_iv_8x8_i.dctv_out_we_2 @@ -566,7 +620,11 @@ dct_tests_01.dct_iv_8x8_i.dctv_out_wa_2[1:0] dct_tests_01.dct_iv_8x8_i.dctv_out_run_2 @22 dct_tests_01.dct_iv_8x8_i.dctv_out_ra_2[6:0] +@420 dct_tests_01.dct_iv_8x8_i.dctv_out_reg_2[23:0] +@8421 +dct_tests_01.dct_iv_8x8_i.dctv_out_reg_2[23:0] +@22 dct_tests_01.dct_iv_8x8_i.dctv_out_debug_reg_2[2:0] @1000200 -dct_iv_8x8 diff --git a/dsp/dct_iv8_1d.v b/dsp/dct_iv8_1d.v index fdd0be2..548c981 100644 --- a/dsp/dct_iv8_1d.v +++ b/dsp/dct_iv8_1d.v @@ -71,7 +71,7 @@ module dct_iv8_1d#( output [OUT_WIDTH -1:0] dout, output reg pre2_start_out, // 2 clock cycle before Y0 output, full dout sequence // start_out-x-Y0-x-Y7-x-Y4-x-Y3-x-Y1-x-Y6-x-Y2-x-Y5 - output en_out, // valid at the same time slot as pre2_start_out (goes active with pre2_start_out), 1 ahead of data + output en_out, // valid at the same time slot as pre2_start_out (goes active with pre2_start_out), 2 ahead of data output reg [2:0] y_index // for simulation - valid with dout - index of the data output ); @@ -135,6 +135,8 @@ module dct_iv8_1d#( reg restart; // restarting next block if en was active at phase=14; reg run_out; // running output data reg en_out_r; + reg en_out_r2; + assign en_out = en_out_r; @@ -153,16 +155,17 @@ module dct_iv8_1d#( assign dsp_cin_1 = {{P_WIDTH-WIDTH-COSINE_SHIFT{d_in[WIDTH-1]}},d_in,~d_in[WIDTH-1]^din_zero,{COSINE_SHIFT-1{d_in[WIDTH-1]}}}; always @ (posedge clk) begin - if (en_out_r) begin + en_out_r2 <= en_out_r; + if (en_out_r2) begin case (phase_cnt[3:1]) - 3'h0: y_index <= 7; - 3'h1: y_index <= 4; - 3'h2: y_index <= 3; - 3'h3: y_index <= 1; - 3'h4: y_index <= 6; - 3'h5: y_index <= 2; - 3'h6: y_index <= 5; - 3'h7: y_index <= 0; + 3'h0: y_index <= 0; + 3'h1: y_index <= 7; + 3'h2: y_index <= 4; + 3'h3: y_index <= 3; + 3'h4: y_index <= 1; + 3'h5: y_index <= 6; + 3'h6: y_index <= 2; + 3'h7: y_index <= 5; endcase end else begin y_index <= 'bx; diff --git a/dsp/dct_iv_8x8.v b/dsp/dct_iv_8x8.v index 73ac5e8..28a74bc 100644 --- a/dsp/dct_iv_8x8.v +++ b/dsp/dct_iv_8x8.v @@ -106,12 +106,14 @@ module dct_iv_8x8#( reg [1:0] transpose_w_page; reg [6:0] transpose_cntr; // transpose memory counter, [6] == 1 when the last page is being finished reg transpose_in_run; - wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h10); +// wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h10); + wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h11); +// wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h12); reg [2:0] transpose_wa_low; // [2:0] transpose memory low address bits, [3] - other group (of 16) reg [4:0] transpose_wa_high; // high bits of transpose memory write address wire [7:0] transpose_wa = {transpose_wa_high,transpose_wa_low}; wire transpose_wa_decr = (transpose_cntr[0] & ~transpose_cntr[3]); - reg transpose_we; + reg [1:0] transpose_we; // [1] wire [TRANSPOSE_WIDTH-1:0] transpose_di = transpose_cntr[0]? dcth_dout0: dcth_dout1; reg [TRANSPOSE_WIDTH-1:0] transpose_ram[0:255]; @@ -125,8 +127,8 @@ module dct_iv_8x8#( wire transpose_out_start = transpose_in_run && (transpose_cntr[6:0] == 7'h34); // 7'h33 is actual minimum reg [1:0] transpose_r_page; - reg [TRANSPOSE_WIDTH-1:0] transpose_reg; // internal BRAM register - reg [TRANSPOSE_WIDTH-1:0] transpose_out; // output BRAM register + reg signed [TRANSPOSE_WIDTH-1:0] transpose_reg; // internal BRAM register + reg signed [TRANSPOSE_WIDTH-1:0] transpose_out; // output BRAM register reg [7:0] transpose_debug_reg; // internal BRAM register reg [7:0] transpose_debug_out; // output BRAM register @@ -168,10 +170,11 @@ module dct_iv_8x8#( reg [6:0] dctv_out_cntr; // count output data from second (vertical) pass (bit 6 - stopping) reg dctv_out_run; // - wire dctv_out_start = dctv_phin [6:0] == 'h10; +// wire dctv_out_start = dctv_phin [6:0] == 'h10; + wire dctv_out_start = dctv_phin [6:0] == 'h11; reg [3:0] dctv_out_wa_1; - reg dctv_out_we_1; + reg [1:0] dctv_out_we_1; reg dctv_out_sel; // select DCTv channel output; reg signed [OUT_WIDTH-1:0] dctv_out_ram_1[0:15]; reg [2:0] dctv_out_debug_ram_1[0:15]; @@ -266,11 +269,11 @@ module dct_iv_8x8#( 4'hf: transpose_wa_low <= 3; endcase transpose_wa_high <= {transpose_w_page, transpose_cntr[5:4], transpose_cntr[0]} - {transpose_wa_decr,1'b0}; - transpose_we <= dcth_en_out0 || dcth_en_out1; + transpose_we <= {transpose_we[0],dcth_en_out0 | dcth_en_out1}; // Write transpose memory) - if (transpose_we) transpose_ram[transpose_wa] <= transpose_di; - if (transpose_we) transpose_debug_ram[transpose_wa] <= transpose_debug_di; -// if (transpose_we) $display("%d %d @%t",transpose_cntr, transpose_wa, $time) ; + if (transpose_we[1]) transpose_ram[transpose_wa] <= transpose_di; + if (transpose_we[1]) transpose_debug_ram[transpose_wa] <= transpose_debug_di; +// if (transpose_we[1]) $display("%d %d @%t",transpose_cntr, transpose_wa, $time) ; if (rst) transpose_out_run[0] <= 0; else if (transpose_out_start) transpose_out_run[0] <= 1; @@ -335,7 +338,7 @@ module dct_iv_8x8#( if (!dctv_out_run || dctv_out_start) dctv_out_cntr <= 0; else dctv_out_cntr <= dctv_out_cntr + 1; - dctv_out_we_1 <= dctv_en_out0 || dctv_en_out1; + dctv_out_we_1 <= {dctv_out_we_1[0], dctv_en_out0 | dctv_en_out1}; dctv_out_sel <= dctv_out_cntr[0]; @@ -359,8 +362,8 @@ module dct_iv_8x8#( endcase // write first stage of output reordering - if (dctv_out_we_1) dctv_out_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_dout1: dctv_dout0; - if (dctv_out_we_1) dctv_out_debug_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_yindex1: dctv_yindex0; + if (dctv_out_we_1[1]) dctv_out_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_dout1: dctv_dout0; + if (dctv_out_we_1[1]) dctv_out_debug_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_yindex1: dctv_yindex0; if (rst) dctv_out_run_1 <= 0; else if (dctv_out_start_1) dctv_out_run_1 <= 1; diff --git a/dsp/dct_tests_01.tf b/dsp/dct_tests_01.tf index cde8694..b660324 100644 --- a/dsp/dct_tests_01.tf +++ b/dsp/dct_tests_01.tf @@ -65,7 +65,7 @@ module dct_tests_01 (); parameter WIDTH = 24; // input data width // parameter OUT_WIDTH = 16; // output data width parameter OUT_WIDTH = 24; // output data width - parameter TRANSPOSE_WIDTH = 25; // width of the transpose memory (intermediate results) + parameter TRANSPOSE_WIDTH = 24; // width of the transpose memory (intermediate results) parameter OUT_RSHIFT = 3; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation) parameter OUT_RSHIFT2 = 0; // overall right shift for the second (vertical) pass -- 2.18.1